Module:Ortografeyes

I gn a nén co di dzo-pådje /doc pol notule d’ esplikêyes. Clitchîz cial po l’ ahiver.
-- This module makes a list of different 
-- orthographies for a word based on a list of well known references.
-- Only use with Modele:Orto as :
--[[ 

{{Orto
|bondjoû = C8, C9, E34, O0, O3, O4
|bondjou = E1, O2
|bôdjoû = E34
}}

]]--
 
local p = {}
local base = require("Module:Base")
local sourdant = require("Module:Sourdant");
local category = require("Module:Categoreyes");
local lingaedje = require("Module:Lingaedje");
local riscr = require("Module:Riscrijhaedje");

local lang = "wa"
local croejh = ""
local langObj = mw.language.new(lang)
local cat = "mots"
local pagename = ""
local references = {}
local groups = {}
local groupsID = {}
local words = {}
local output_lst = {}
local total_known = 0

--- pad str to the left
string.lpad = function(str, len, char)
    if char == nil then char = ' ' end
    return string.rep(char, len - #str) .. str
end
--- encode string for url
string.url_encode = function(str)
	local str, t = string.gsub(str,"[^%w]",function(c) 
		return string.format("%%%X",string.byte(c)) end)
	return str
end
-- escape % in string replacement with gsub
string.escape_pattern = function(str)
    local str, t = string.gsub(str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
    return str
end

function p.build_cat_grp(word, grpID)
	-- if lang is walloon
	if word == pagename and lang == "wa" and 
	   grpID ~= 'rif' and 
	   output_lst['rif'] ~= nil and
	   output_lst['rif'][word] ~= nil then
		for word,ref in pairs(words[word]) do
			if(references[ref] ~=nil and
			   references[ref].group ~= 'rif') then
				catPrefix = langObj:ucfirst(cat)
				category.store(catPrefix.." ki l' rifondowe est dins "..ref, true, 0, true)
			end
			-- for words before 1900
			if (grpID == "d1900") then
				category.store("Mots ki l' rifondowe egzistéve dedja dins ene ortografeye di dvant Feller", true)
			-- for words in scripta
			elseif (grpID == "vscr" ) then
				category.store("Mots ki l' rifondowe egzistéve dedja dins li scrîta", true)
			end
		end
	end
end

-- make a group of references
function p.build_groups()
	html = ""
	for k, grp in ipairs(groups) do
		grpID =grp.id
		if output_lst[grpID] ~= nil then	
			html = html .. '<div class="TBox og og-'..grpID..'">'
			if grpID == "not" then
				refs = output_lst[grpID]["-"]
				html = html .. '<i>Li mot n’ est nén dins :</i> '..refs
			else
				local li = ''
				local cat = ''
				local style_columns = ''
				local tli = 0
				local ling = lang

				if ling == "wa" then
					if grpID == "vscr" or grpID == "d1900" then
						ling = ling .. "-vsis"
					elseif grpID == "s20es" then
						ling = ling .. "-fel"
					end
				end
				
				for word,refs in pairs(output_lst[grpID]) do	
					local fw = string.match(word, "%[%[")
					local lw = word
					if not fw then
--						lw =  "[["..word.."]]"
						-- handle things like: "r(i)gon (rigon / r'gon)"
						-- we add two following [^(] to _NOT_ match things like "s(i)tucler / (e)stucler"
						-- ustring instead of string because of things like " (è)spitant"
						fw = mw.ustring.find(word, " %([^%)][^%)]")
						if fw then
							local strbegin = string.sub(word, 1, fw - 1)
							strbegin = riscr.get_article_link(strbegin, croejh, ling)
							local strend = string.sub(word, fw + 2)
							strend = string.gsub(strend, "%)$", "")
							-- if "/text/" then it is a phonetic transcription, keep unchanged
							-- else we linkify it
							if not string.match(strend, "/[^ ][^/]+[^ ]/") then
								strend = riscr.get_article_link(strend, croejh, ling)
								if string.match(strend, " [u/] ") then
									strend = riscr.split_article_link( strend )
								end
							end
							lw = strbegin .. " ( " .. strend .. " )"
						else
							fw = string.find(word, ", ")
							if fw then
								local strbegin = string.sub(word, 1, fw - 1)
								strbegin = riscr.get_article_link(strbegin, croejh, ling)
								local strend = string.sub(word, fw + 1)
								strend = riscr.get_article_link(strend, croejh, ling)
								-- just in case
								if string.match(strbegin, " [u/] ") then
									strbegin = riscr.split_article_link( strbegin )
								end
								if string.match(strend, " [u/] ") then
									strend = riscr.split_article_link( strend )
								end
								lw = strbegin .. ", " .. strend
							else
								fw = string.find(word, " [u/] ")
								if fw then
									local strbegin = string.sub(word, 1, fw - 1)
									strbegin = riscr.get_article_link(strbegin, croejh, ling)
									local strend = string.sub(word, fw + 2)
									strend = riscr.get_article_link(strend, croejh, ling)
									local sep = string.sub(word, fw, fw + 2)
									-- just in case
									if string.match(strbegin, " [u/] ") then
										strbegin = riscr.split_article_link( strbegin )
									end
									if string.match(strend, " [u/] ") then
										strend = riscr.split_article_link( strend )
									end
									lw = strbegin .. sep .. strend
								else
									lw = riscr.get_article_link(word, croejh, ling)
								end
							end
						end
						
						-- bold to itself is lost if there is an anchor
						-- we need to check also pagename for special chars to escape
						fw = string.gsub(lw, 
							"%[%[" .. string.escape_pattern(pagename) .. "#([^%]]*)|([^%]]*)%]%]",
							"'''" .. "%2" .. "'''" )
						if fw ~= nil then
							lw = fw
						end
					end
					
					-- change anchor for old spellings of rifondou
					if ling == "wa" and string.match(refs, "rifondaedje ricandjî") then
						lw = string.gsub(lw, "#Walon%|", "#Walon (viye rifondowe)|")
					end

					li = li .. '<li>'..lw..' : '..refs..'</li>'
					tli = tli+1
					
					-- make cat if needed
					p.build_cat_grp(word, grpID)
					
				end
				
				-- display list by columns for numerous items 
				if tli >=9 then
					style_columns = ' c-3"'
				elseif tli >=4 then
					style_columns = ' c-2"'
				end
				
				html = html .. '<div class="TBox-title ogt">'..grp.title..' :</div>'
				html = html .. '<div class="TBox-content'..style_columns..'"><ul>'..li..'</ul></div>'
				
				
				html = html .. cat -- add all cats
			end
			html = html .. '</div>'
		end
	end
	
	return html
end

-- if word needs to be transformed for external ressource
function p.word_transform (word, transfrom)
	if transfrom == "ucfirst" then
		local w1 = mw.ustring.upper(mw.ustring.sub(word, 1, 1))
		local w2 = mw.ustring.sub(word, 2)
		word = w1 .. w2
	elseif transfrom == "uc" then
		word = mw.ustring.upper(word)
		word = string.gsub(word, '/', ',')
	-- space to underscore and ’ to '
	elseif transfrom == "wiki" then
		word = string.gsub(word, ' ', '_')
		word = string.gsub(word, ' ', '_') -- no breaking space
		word = string.gsub(word, '’', '\'')
	-- ’ to '
	elseif transfrom == "apostrofe" then
		word = string.gsub(word, ' ', ' ') -- no breaking space
		word = string.gsub(word, '’', '\'')
	end
		
	return word
end

-- make a reference in wikicode
function p.build_ref (ref, refID, word, refQ)
	if refID ~= nil and references[refID] ~= nil then
			
		local r = nil
		local refID_real = refID
		if references[refID].alias ~= nil then
			r = references[refID].alias
		else
			r = references[refID]
		end
		
		-- ref link
		-- only replace the FIRST occurrence 
		if ref == refID then
			ref = "[[Sourdant:"..ref.."|"..ref.."]]"
		elseif refQ ~=nil then
			local refIDQ = refID_real..":"..refQ
			ref = string.gsub(ref, string.escape_pattern(refIDQ), '[[Sourdant:'..refID..'|'..refID_real..']]',1)
		else
			ref = string.gsub(ref, string.escape_pattern(refID_real), '[[Sourdant:'..refID..'|'..refID_real..']]',1)
		end
						
		
		if word ~= '-' then
			-- add link to wikisource
			if r.wikisource ~= nil then
				local w = r.wikisource
				if w.title ~= nil then
					-- for E89:other_word
					if refQ ~= nil then
						word = mw.text.trim(refQ)
					end
					if word ~= "" then
						if w.transform ~= nil then
							word = p.word_transform(word, w.transform)
						end
						
						local t = string.gsub(w.title, '__WORD__', word)			
						local l = ""
						if w.lang ~= "wa" then l = ":"..w.lang end
						ref = ref.. ' <sup>([[:oldwikisource'..l..':'..t..'|lére]])</sup>'
					end
				end
			
			-- add external link
			elseif r.external ~= nil then
				local ext = r.external
				local query = nil
				
				-- for R10:1841, Ref1:OtherWord, ...
				if ext.queryID ~= nil and refQ ~= nil then
					query = ext.queryID
					word = refQ
				elseif ext.query ~= nil then
					query = ext.query
					if refQ ~= nil and refQ == '' then
						word = ''
					elseif refQ ~= nil then
						-- compatibility with old R11 template
						if refID == "R11" and string.match(refQ, '^[1-9]') then
							word = word .. '_' .. refQ
						else
							word = refQ
						end
					end
				end

				if query ~= nil and word ~= '' then
					if ext.transform ~= nil then
						word = p.word_transform(word, ext.transform)
					else
						if refQ == nil and string.match( word, "%(") 
							and (lang == "wa" or lang == "wa-fel" or lang == "wa-vsis")
						then
							word = riscr.betchete_cogne_motli(word)
						end
					end
				
					query = string.gsub(query, '__WORD__', string.escape_pattern(string.url_encode(word)))
					ref = ref.. ' <sup class="plainlinks">(['..query..' lére])</sup>'
				end
			end	
		end
	end
			
	return ref
end

-- sort references by group
function p.sort_ref (word, ref, lang)
	local li = ""
	local g = "unknown"
	local refID = nil
	local refQ = nil
	
	-- escape spaces in syntax ID:<some words with spaces>
	-- alternative of ID:some_words_with_spaces
	ref = string.gsub(ref, "%:<[^>]+>", function(w) return ":"..string.sub(string.gsub(w, ' ', '_'),3,-2) end)
	
	-- get ID ref (E89, E212b, PiB2, R10:1842, ...)
	local refPattern = "([A-Z]+[A-Za-z0-9-]+)(%:?[^%s]*)"
	refID, refQ = string.match(ref, refPattern)
	if refQ ~= nil and refQ ~= "" then 
		refQ = string.sub(refQ, 2)
	else
		refQ = nil -- string.match gives empty string
	end
	
	if refID ~= nil then
		if	references[refID] ~= nil or	p.load_reference(refID) then
			g = references[refID].group
			if pagename == word then
				-- not cat for author
				if not references[refID].isauthor then
					catPrefix = langObj:ucfirst(cat)
					category.store(catPrefix.." ki sont dins "..refID, true, 0, true)
				end
			end
		end
	else
		category.store("Årtikes avou des sourdants nén rkinoxhous dins l' modele ortografeyes")
	end
	
	-- forced group ([!d1900], ...)
	local fg = string.match(ref, "%[![A-Za-z0-9]+%]")
	if fg ~= nil then
		fgid = string.sub(fg, 3, -2)
		if groupsID[fgid] ~= nil then
			g = fgid
			ref = string.gsub(ref, "%[!"..fgid.."%]", '')
			category.store("Årtikes avou des adjinçnaedjes di troke dins l' modele ortografeyes")
		end
		
	end
		
	-- collecte refs by word
	if words[word] == nil then
		words[word] = {refID}
	else	
		table.insert(words[word], refID)
	end
			
	-- for 'not' group
	if word == "-" or word == "nén dins" then
		g = "not"
		word = "-" -- FIXME we should delete this
	end

	-- check that the group does exist for the loaded language
	if g == nil or groupsID[g] == nil then
		g = 'unknown'
	end
	if g == 'unknown' and groupsID['byscript'] then
		g = 'byscript'
	end
	
	if g ~= "not" and g ~= "unknown" and g ~= "all" then -- and g ~= "byscript" then
		total_known = total_known + 1 
	end

	-- group by writting script
	if (g == "byscript"
		or g == "tif" or g == "ara" or g == "lat") -- legacy
	then
		-- legacy
		if (g == "tif" or g == "ara" or g == "lat") then
			category.store("Årtikes avou des adjinçnaedjes di troke dins l' modele ortografeyes")
		end
		
		local s = require("Module:Scripe")
		local script = s.findBestScript(word, lang)
		if script ~= "None" then
			g = script
		else
			g = 'unknown'
		end
	end

	-- add ref in group
	li = p.build_ref(ref, refID, word, refQ)
	if g ~=nil then
		if output_lst[g] == nil then
			output_lst[g] = {}
		end
		if output_lst[g][word] ~= nil then
			output_lst[g][word] = output_lst[g][word] .. ', '..li
		else
			output_lst[g][word] = li
		end	
	end	
end

-- load reference
-- we don't need all informations, only local data (not Wikidata)
--  group and wikisource or external link
function p.load_reference(refID)
	local ref = sourdant.get_reference(refID, false)
	
	if ref then
		references[refID] = {}
		
		if groupsID["all"] ~= nil then
			references[refID].group = 'all'
		elseif ref.group ~= nil then 
			references[refID].group = ref.group
		else
			references[refID].group = 'unknown'
		end
		
		-- aliases
		if ref.aliases ~= nil then
			for k, a in pairs(ref.aliases) do
				references[a.id] = {}
				references[a.id].alias = references[refID]
				if a.group ~= nil then
					references[a.id].group = a.group
				else
					references[a.id].group = ref.group
				end
			end
		end
		
		-- wikisource
		if ref.wikisource then
			references[refID].wikisource = ref.wikisource
		-- or external
		elseif ref.external then
			references[refID].external = ref.external
		-- or wikipedia
		elseif ref.wikipedia then
			references[refID].wikipedia = ref.wikipedia
		end
				
		return true
	end
		
	return false
end

-- load groups
function p.load_groups()
	
	local grps = base.load_data("Ortografeyes/groups-"..lang)
			
	if grps then
		-- because base.load_data() return a read-only table
		for k, g in ipairs(grps) do
			-- we need to make a copy in 'groups' table
			table.insert(groups, k, g)
			-- and another to access group easily by id
			groupsID[g.id] = groups[k]
		end
	else
		-- add groups 'all' when there is no group
		table.insert(groups, {id="all", title = "Totes les ortografeyes"})
		groupsID["all"] = groups[#groups]
	end

	-- add groups 'unknown' & 'not' (for 'word is not in this ref')
	table.insert(groups, {id="unknown", title = "Ôtès ortografeyes <small>(avou des sourdants nén rkinoxhous)</small>"})
	groupsID["unknown"] = groups[#groups]
	table.insert(groups, {id="not", title = "Nén dins"})
	groupsID["not"] = groups[#groups]
	
	return true -- always ?
end

-- make list
function p.render(orto)
	local html = ""
	local ref = ""
			
	-- for each word
	for word, refs in pairs(orto) do	
		if word ~= "lang" and word ~= "cat" and word ~= "croejh" then -- FIXME see p.init()
			word = mw.text.trim(word)
			-- escape comma	
			refs = string.gsub(refs, "\\,", "\\x2C")	
			refs = mw.text.split(refs, ",", true)
			for key,ref in ipairs(refs) do	
				-- comma	
				ref = string.gsub(ref, "\\x2C", ",")
				ref = mw.text.trim(ref)
				
				p.sort_ref(word, ref, lang) 					
			end	
		end		
	end
		
	local g = p.build_groups()
	html = '<div class="g-or">'..g..'</div>'
	
	if total_known == 0 and lang == "wa" then
		local msg_part = "co"
		if g~="" then msg_part = "cåzu" end
		local linktxt = lingaedje.grammar({args={"Djivêye des motîs",lang,"",vowel='1'}});
		local langtxt = langObj:ucfirst(lingaedje.get_name(lang))
		category.store(lingaedje.grammar({args={"Mots",lang,"avou des ortografeyes a radjouter",vowel='1'}}))
		msg_part = '<p class="info">I gn a '..msg_part..' rén dins ci hagnon ci, mins si vos irîz rnaxhî dins des ôtes motîs, et trover l’ mot « <b>'..pagename..'</b> », riherez ciddé li scrijha do mot ki vos î avoz trové, et l’ rahouca do motî el [[Wiccionaire:'..langtxt..'#Sourdants|'..linktxt..']].</p>'
		if g~="" then
			html = html..msg_part
		else
			html = msg_part
		end
	end
	html = html .. category.get_all()
    return html
end

-- Entry point of the module
function p.init(frame)
	
	-- get args from parent modele
	local modele = mw.getCurrentFrame():getParent()
	local args = modele.args
		
	-- set lang
	if args.lang ~= nil then
		lang = args.lang
	end

	-- set lang
	if args.croejh ~= nil then
		croejh = args.croejh
	end
	
	-- set category (dijhêye, spots and mots ???)
	if args.cat ~= nil then
		if args.cat == "spot" or args.cat == "spots" then
			cat = "spots"
		elseif args.cat == "dijhêye" or args.cat == "dijhêyes" then
			cat = "dijhêyes"
		end
	end
	
	-- set page name
	pagename = mw.title.getCurrentTitle().text
	
	p.load_groups()
							
	return p.render(args)
	
end

return p