Modul:Transliteration

Documentation for this module may be created at Modul:Transliteration/doc

local p = {}
local getArgs = require('Modul:Arguments').getArgs
local transliterationMaps = mw.loadData('Modul:Transliteration/langdata')

local function isTransliterationSupported(langCode)
	if transliterationMaps[langCode] then return true end
	return false
end
p.isTransliterationSupported = isTransliterationSupported

local function postProcess(str) 
	local chars = {}
	local idx = 1
	while idx <= mw.ustring.len(str) do
		local crtChar = mw.ustring.sub(str, idx, idx)
		if crtChar == '\226\140\166' then 
			idx = idx + 1
		elseif crtChar == '\226\140\171' and #table > 1 then
			table.remove(chars, #chars)
		else
			table.insert(chars, crtChar)
		end
		idx = idx + 1
	end
	return table.concat(chars)
end

local function applyConversionRule(conversionRule, crtChar, prevChar, nextChar)
	if not conversionRule then
		return nil
	end
	if type(conversionRule) == 'string' then
		return conversionRule
	end
	if type(conversionRule) == 'table' then
		return prevChar and conversionRule['bh'] and applyConversionRule(conversionRule['bh'][prevChar], crtChar, prevChar, nextChar)
			or nextChar and conversionRule['ah'] and applyConversionRule(conversionRule['ah'][nextChar], crtChar, prevChar, nextChar)
			or conversionRule['def']
	end
	return nil
end

local function transliterate(text, langCode)
	if text == nil then return nil end
	local map = transliterationMaps[langCode]
	local lang = mw.language.new(langCode)
	if not map then 
		error('Transliteration from language ' .. langCode .. ' not supported', 2)
	end
	local out = ''
	for strIdx = 1,mw.ustring.len(text) do
		local crtChar = mw.ustring.sub(text, strIdx, strIdx)
		local prevChar = strIdx > 1 and lang:lc(mw.ustring.sub(text, strIdx - 1, strIdx - 1)) or ''
		local nextChar = strIdx < mw.ustring.len(text) and lang:lc(mw.ustring.sub(text, strIdx + 1, strIdx + 1)) or ''
		
		local convertedChar = applyConversionRule(map[crtChar], crtChar, prevChar, nextChar)
		if not convertedChar then
			local lcCrtChar = lang:lc(crtChar)
			local ucConvertedChar = applyConversionRule(map[lcCrtChar], lcCrtChar, prevChar, nextChar)
			convertedChar = ucConvertedChar and lang:ucfirst(ucConvertedChar)
		end
		out = out .. (convertedChar or crtChar)
	end
	return postProcess(out)
end
p.transliterate = transliterate

local function transliterateFromFrame(frame)
	local args = getArgs(frame)
	local text = args.text or args[1]
	local lang = args.lang or args[2]
	return transliterate(text, lang)
end
p.transliterateFromFrame = transliterateFromFrame

return p