Module:Lang

From TEPwiki, Urth's Encyclopedia
Revision as of 11:17, 3 November 2017 by w>Trappist the monk
Jump to navigation Jump to search

Documentation for this module may be created at Module:Lang/doc

--[=[
This is an experiment to see what is required to consolodate all of the myriad {{lang-xx}} templates
and their subtemplates into a single module with a data table

{{lang-es}} has parameters:
{{{1}}} text - required
{{{link}}} or {{{links}}} defaults to 'yes'
{{{lit}}} literal translation

{{language with name}} has parameters:
{{{1}}} iso language code - required
{{{2}}} language name in English - superfluous?
{{{3}}} text - required
{{{4}}} or {{{lit}}} literal translation
{{{link}}} or {{{links}}} set to 'no' disables '{{{2}} language' wikilinks
{{{rtl}}} passed through to {{lang}}
{{{nocat}}} passed through to {{lang}}

{{lang}} has parameters:
{{{1}}} iso language code - required
{{{2}}} text - required
{{{rtl}}} if set to any value, set dir="rtl" attribute and inserts &lrm; after the </span> tag
{{{nocat}}} if set to any value, disables categorization

How it works now:
	1. {{lang-es}} receives text as {{{1}}} to which it adds italic markup, sets |links to {{{link}}} or {{{links}}} or yes, sets |lit to {{{lit}}}, and calls {{language with name}}.  {{{rtl}}} ignored for this example
	2. {{language with name}} renders [[Spanish language|Spanish]] ({{{links}}} not set) or Spanish ({{{links}}} set) and calls {{lang}}
	3. {{lang}} wraps the text in the <span lang="es">text</span> and adds categorization
]=]

require('Module:No globals');
local p = {};

local getArgs = require ('Module:Arguments').getArgs;
local lang_name_table = require ('Module:Language/name/data');
local lang_data =  mw.loadData ('Module:Lang/data');							-- language name override and transliteration tool-tip tables

local namespace = mw.title.getCurrentTitle().namespace;							-- used for categorization


--[[--------------------------< I S _ S E T >------------------------------------------------------------------

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.

]]

local function is_set( var )
	return not (var == nil or var == '');
end


--[[--------------------------< G E T _ I E T F _ P A R T S >--------------------------------------------------

extracts and returns IETF language tag parts:
	primary language subtag (required) - 2 or 3 character lower case IANA language code [ll]
	script subtag - four character title-case IANA script code [Ssss]
	region subtag - two-character upper-case IANA region code [RR]

in any one of these forms
	ll (or lll)
	ll-Ssss
	ll-RR
	ll-Ssss-RR
each of ll, Ssss, and RR when used must be valid

returns three values.  Valid parts are return as themselves; omitted parts are returned as empty strings, invalid
parts are returned as nil.

]]

local function get_ietf_parts (source)
	local code;
	local script;
	local region;
	
	if not is_set (source) then
		return nil, nil, nil;
	end
	
	if source:match ('^%a+%-%a+%-%a+$') then									-- ll-Ssss-RR
		code, script, region = source:match ('^(%l%l%l?)%-(%u%l%l%l)%-(%u%u)$');
	elseif source:match ('^%a+%-%a%a%a%a') then									-- ll-Ssss
		code, script = source:match ('^(%l%l%l?)%-(%u%l%l%l)$');
		region = '';
	elseif source:match ('^%a+%-%a%a$') then									-- ll-RR
		code, region = source:match ('^(%l%l%l?)%-(%u%u)$');
		script = '';
	elseif source:match ('^%a+$') then											-- ll-RR
		code = source:match ('^(%l%l%l?)$');
		script = '';
		region = '';
	else
		return nil, nil, nil;													-- don't know what we got but it is malformed
	end
	
	if not (lang_data.override[code] or lang_name_table.lang[code]) then
		return nil, nil, nil;													-- invalid language code, don't know about the others (don't care?)
	end
	
	if is_set (script) then
		if not lang_name_table.script[script] then
			return code, nil, nil;												-- language code ok, invalid script, don't know about region (don't care?)
		end
	end
	
	if is_set (region) then
		if not lang_name_table.region[region] then
			return code, script, nil;
		end
	end
	
	return code, script, region;												-- return the good bits
end


--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.

]=]

local function make_wikilink (link, display)
	if is_set (link) then
		if is_set (display) then
			return table.concat ({'[[', link, '|', display, ']]'});
		else
			return table.concat ({'[[', link, ']]'});
		end
	else
		return '';
	end
end


--[[--------------------------< M A K E _ T E X T _ S P A N >--------------------------------------------------

]]

local function make_text_span (code, text, rtl, italic, size)
	local span = {};

	table.insert (span, '<span lang="');										-- open <span> tag
	table.insert (span,  code);													-- language attribute
	table.insert (span, '"');
	if 'yes' == rtl then
		table.insert (span, ' dir="rtl"');										-- for right to left languages
	end
	if is_set (size) then														-- {{lang}} only
		table.insert (span, table.concat ({' style="font-size:', size, ';"'}))
	end
	table.insert (span, '>');													-- close the opening span tag
	if 'yes' == italic then
		table.insert (span, table.concat ({"''", text, "''"}));					-- text with italic markup
	else
		table.insert (span, text);												-- DEFAULT: text is not italicized
	end
	table.insert (span, '</span>');												-- close the span
	if 'yes' == rtl then
		table.insert (span, '&lrm;');											-- make sure the browser knows that we're at the end of the rtl
	end
	
	return table.concat (span);													-- put it all together and done
end


--[[--------------------------< M A K E _ C A T E G O R Y >----------------------------------------------------

]]

local function make_category (code, language_name, nocat)
	local cat = {};
	
	if (0 ~= namespace) or nocat then											-- only categorize in article space
		return '';																-- return empty string for concatenation
	end
	
	table.insert (cat, '[[Category:Articles containing ');
	
	if ('en' == code) or ('eng' == code) then
		table.insert (cat, 'explicitly cited English');
	elseif 'art' == code then
		table.insert (cat, 'constructed')
	else
		table.insert (cat, language_name);
	end
	
	table.insert (cat, '-language text]]');

	return table.concat (cat);	
end


--[[--------------------------< M A K E _ T R A N S L I T >----------------------------------------------------

return translit <span>...</span> else return empty string

The value |script= is not used in {{transl}} for this purpose; instead it uses |code.  Because language scripts
are listed in the {{transl}} switches they are included in the data tables.  The script parameter is introduced
at {{Language with name and transliteration}}.  If |script= is set, this function uses it in preference to code.

]]

local function make_translit (code, language_name, translit, std, script)
	local title;
	local tout = {};
	local title_table = lang_data.translit_title_table;						-- table of transliteration standards and the language codes and scripts that apply to those standards
	
	table.insert (tout, "''<span title=\"");
	
	if not is_set (std) and not is_set (script) then							-- when neither standard nor script specified
		table.insert (tout, language_name);										-- write a generic tool tip
		table.insert (tout, ' transliteration');
	elseif is_set (std) and is_set (script) then								-- when both are specified
		if title_table[std][script] then											-- and legitimate
			table.insert (tout, title_table[std][script]);						-- add the appropriate text to the tool tip
		else
			return '';															-- one or both invalid, set up for an error message
		end
	elseif is_set (std) then													-- script not set, use language code
		if not title_table[std] then return ''; end								-- invalid standard, setupt for error message
		
		if title_table[std][code] then
			table.insert (tout, title_table[std][code]);
		else																	-- code doesn't match
			table.insert (tout, title_table[std]['default']);					-- so use the standard's default
		end
	else																		-- here if script set but std not set
		if title_table['NO_STD'][script] then
			table.insert (tout, title_table['NO_STD'][script]);					-- use script if set
		elseif title_table['NO_STD'][code] then
			table.insert (tout, title_table['NO_STD'][code]);					-- use language code
		else
			table.insert (tout, language_name);									-- write a generic tool tip
			table.insert (tout, ' transliteration');
		end
	end

	table.insert (tout, '" class="Unicode" style="white-space:normal; text-decoration:none">');
	table.insert (tout, translit);
	table.insert (tout, "</span>''");
	return table.concat (tout);
end


--[[--------------------------< L A N G >----------------------------------------------------------------------

{{#invoke:lang|lang|code=<code>|text={{{1}}}|rtl={{{rtl|}}}|size={{{size|}}}|nocat={{{nocat|}}}}}

|code = the BCP47 language code
|text = the displayed text in language specified by code
|rtl = boolean true identifies the language specified by code as a right-to-left language
|size = css keyword appropriate for use with css font-size:<size>
|nocat = boolean true inhibits normal categorization; error categories are not affected

]]

function p.lang (frame)
	local args = getArgs(frame);
	
	local out = {};
	local language_name;
	local code, script, region = get_ietf_parts (args.code);
	
	if is_set (code) then
		if lang_data.override[code] then
			language_name = lang_data.override[code][1]
		elseif lang_name_table.lang[code] then
			language_name = lang_name_table.lang[code][1];						-- table entries sometimes have multiple names, always take the first one
		end
	end
	
	if not is_set (language_name) then
		table.insert (out, '<span style="font-size:100%" class="error">error: unknown language code: ');
		table.insert (out, args.code or 'missing');
		table.insert (out, '</span>');
		
		if 0 ~= namespace then														-- only categorize in article space
			table.insert (out, '[[Category:Articles containing unknown language template codes');
			if is_set (args.code) then
				table.insert (out, '|' .. args.code);							-- add a sort key
			end
			table.insert (out, ']]');
		end
		return table.concat (out);												-- emit an error message and category
	end
	
	table.insert (out, make_text_span (args.code, args.text, args.rtl, nil, args.size));		-- italics set to nil here because not supporte by {{lang}}
	table.insert (out, make_category (args.code, language_name, args.nocat));
	return table.concat (out);													-- put it all together and done
	end


--[[--------------------------< L A N G _ X X >----------------------------------------------------------------

{{#invoke:lang|lang_xx|code=<code>|text={{{1}}}|link={{{links|{{{link}}}}}}|rtl={{{rtl|}}}|nocat={{{nocat|}}}|italic={{{italic|}}}|lit={{{lit|}}}|translit={{{translit|}}}|script={{{script|}}}|std={{{std|}}}}}

|code = the BCP47 language code
|text = the displayed text in language specified by code
|link = boolean true (default) links language specified by code to associated language article
|rtl = boolean true identifies the language specified by code as a right-to-left language
|nocat = boolean true inhibits normal categorization; error categories are not affected
|italic = boolean true (default) renders displayed text in italic font
|lit = text that is a literal translation of text

for those {{lang-xx}} templates that support transliteration:
|translit = text that is a transliteration of text
|std = the standard that applies to the transliteration
|script = ISO 15924 script name; falls back to code

]]

function p.lang_xx (frame)
	local args = getArgs(frame);
	
	if not is_set (args.italic) then
		args.italic = 'yes';													-- DEFAULT for {{lang-xx}} templates is to italicize
	end
	
	args.size = nil;															-- size not supported in {{lang-xx}}
	
	local out = {};
	local language_name;
--	if  lang_data.override[args.code] then
--		language_name = lang_data.override[args.code][1]
--	elseif lang_name_table.lang[args.code] then
--		language_name = lang_name_table.lang[args.code][1];						-- table entries sometimes have multiple names, always take the first one
--	end
	local code, script, region = get_ietf_parts (args.code);
	
	if is_set (code) then
		if lang_data.override[code] then
			language_name = lang_data.override[code][1]
		elseif lang_name_table.lang[code] then
			language_name = lang_name_table.lang[code][1];						-- table entries sometimes have multiple names, always take the first one
		end
	end
	
	local script = args.script or language_name;									-- for translit prefer script of language
	local translit;
	local translit_title;
	
	if not is_set (language_name) then
		table.insert (out, '<span style="font-size:100%" class="error">error: unknown language code: ');
		table.insert (out, args.code or 'missing');
		table.insert (out, '</span>');
		
		if 0 ~= namespace then														-- only categorize in article space
			table.insert (out, '[[Category:Articles containing unknown language template codes');
			if is_set (args.code) then
				table.insert (out, '|' .. args.code);							-- add a sort key
			end
			table.insert (out, ']]');
		end
		return table.concat (out);												-- emit an error message and category
	end
	
	if 'no' == args.link then
		table.insert (out, language_name);										-- language name without wikilink
	else
		table.insert (out, make_wikilink (language_name .. ' language', language_name));	-- language name with wikilink
	end
	table.insert (out, ': ');													-- separator

	table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
	
	if is_set (args.translit) then												-- transliteration (not supported in {{lang}}); not supported in all {{lang-xx}}
		table.insert (out, ', <small>');
		translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name)
		if translit_title.exists then
			table.insert (out, make_wikilink ('Romanization of ' .. script or language_name, 'translit.'));
		else
			table.insert (out, '<abbr title="transliteration">translit.</abbr>');
		end
		table.insert (out, '&nbsp;</small>');
		translit = make_translit (args.code, language_name, args.translit, args.std, args.script)
		if is_set (translit) then
			table.insert (out, translit);
		else
			table.insert (out, '<span style="font-size:100%" class="error">error: invalid translit std or script</span>');
		end
	end
	
	if is_set (args.lit) then													-- translation (not supported in {{lang}})
		table.insert (out, ', <small>');
		table.insert (out, make_wikilink ('Literal translation', 'lit.'));
		table.insert (out, "&nbsp;</small>'");
		table.insert (out, args.lit);
		table.insert (out, "'");
	end
	
	table.insert (out, make_category (args.code, language_name, args.nocat));
	return table.concat (out);													-- put it all together and done
end

return p;