Module:Lang: Difference between revisions

synch from sandbox;
(+|italics= alias;)
(synch from sandbox;)
Line 1:
--[=[
 
Lua support for the {{lang}} and, {{lang-xx}}, and {{transl}} templates and replacement of various supporting templates.
 
]=]
Line 77:
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF)
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86)
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 - EF BC BC)
']', -- close the set
});
Line 88:
--[[--------------------------< V A L I D A T E _ I T A L I C >------------------------------------------------
 
validates |italic= or |italics= assigned values.
 
When |italic= is set and has an acceptible assigned value, return the matching css font-style property value or,
for the special case 'default', return nil.
 
When |italic= is not set, or has an unacceptible assigned value, return nil and a nil error message.
 
When both |italic= and |italics= are set, returns nil and a 'conflicting' error message.
The return value nil causes the calling lang or lang_xx function to set args.italic according to the {{lang}} or
 
{{lang-xx}} template's defined default ('normal' for {{lang}}, 'normal' or 'italic' for {{lang-xx}} depending on
The return value nil causes the calling lang, lang_xx, or transl function to set args.italic according to the template's
the individual template's requirements) or to the value appropriate to |script=, if set.
defined default ('inherit' for {{lang}}, 'inherit' or 'italic' for {{lang-xx}} depending on
the individual template's requirements, 'italic' for {{transl}}) or to the value appropriate to |script=, if set ({{lang}}
and {{lang-xx}} only).
 
Accepted values and the values that this function returns are are:
nil - when |italic= absent or not set; returns nil
default - for completeness, should rarely if ever be used; returns nil
yes - force args.text to be rendered in italic font; returns 'italic'
no - force args.text to be rendered in normal font; returns 'normal'
unset - disables font control so that font-style applied to text is dictated by surrounding markup inside or outside the template; returns 'inherit'
 
]]
 
local function validate_italic (italic, italics)
local properties = {['yes'] = 'italic', ['no'] = 'normal', ['unset'] = 'inherit', ['default'] = nil};
 
if italic and italics then -- return nil and an error message if both are set
return properties[italic];
return nil, 'conflicting: &#124;italic= and &#124;italics=';
end
return properties[italic or italics], nil; -- return an appropriate value and a nil error message
end
 
Line 273 ⟶ 280:
if is_set (script) then
if is_set (args_script) then
return code, nil, nil, nil, nil, 'redundant script tag'; -- both code with script and |script= not allowed
end
else
Line 308 ⟶ 315:
if is_set (variant) then
if is_set (args_variant) then
return code, nil, nil, nil, nil, 'redundant variant tag'; -- both code with variant and |variant= not allowed
end
else
Line 342 ⟶ 349:
--[[--------------------------< M A K E _ E R R O R _ M S G >--------------------------------------------------
 
assembles an error message from template name, message text, then adds a help link, and error category.
 
]]
 
local function make_error_msg (msg, args, template)
local out = {};
local category;
if 'transl' == template then
category = 'transl';
else
category = 'lang and lang-xx'
end
table.insert (out, table.concat ({'&#x5B;', args.text or 'undefined', '&#x5D; '})); -- for error messages output args.text if available
table.insert (out, table.concat ({'<span style=\"font-size:100%; font-style:normal;\" class=\"error\">error: {{', template, '}}: '}));
table.insert (out, msg);
table.insert (out, table.concat ({' ([[:Category:lang', andcategory, lang-xx' template errors|help]])'}));
table.insert (out, '</span>');
if (0 == namespace) and not is_set (args.nocat) then -- only categorize in article space
table.insert (out, table.concat ({'[[Category:lang', andcategory, lang-xx' template errors]]'}));
end
 
Line 402 ⟶ 417:
local span = {};
local style_added = '';
 
if text:match ('^%*') then
table.insert (span, '*'); -- move proto language text prefix outside of italic markup if any
text = text:gsub ('^%*', ''); -- remove the spat from the text
end
 
if 'italic' == style then
Line 444 ⟶ 464:
 
 
--[=[--------------------------< M A K E _ C A T E G O R Y >----------------------------------------------------
 
For individual language, <language>, returns:
TODO: figure out how to correctly support collective language codes: sem, Semitic languages (collective names
[[Category:Articles containing <language>-language text]]
appear to always include the word 'languages'). May need new categories so that the category names are sensible.
 
for English:
]]
[[Category:Articles containing explicitly cited English-language text]]
for artificial languages (code: art)
[[Category:Articles containing constructed-language text]]
 
for ISO 639-2 collective languages (and for 639-1 bh):
[[Category:Articles with text from the <language> languages collective]]
 
]=]
 
local function make_category (code, language_name, nocat)
Line 458 ⟶ 487:
end
if language_name:find ('languages') then
return table.concat ({'[[Category:Articles with text from the ', language_name, ' collective]]'});
end
table.insert (cat, '[[Category:Articles containing ');
if ('en' == code) or ('eng' == code) then
table.insert (cat, 'explicitly cited English');
elseif 'art' == code then
Line 484 ⟶ 517:
To avoid confusion, in this module and the templates that use it, the transliteration script parameter is renamed
to be |translit-script= (in this function, tscript)
 
This function is used by both lang_xx() and transl()
lang_xx() always provides code, language_name, and translit; may provide tscript; never provides style
transl() always provides language_name, translit, and one of code or tscript, never both; always provides style
 
For {{transl}}, style only applies when a language code is provided
]]
 
local function make_translit (code, language_name, translit, std, tscript, style)
local title;
local tout = {};
local title_table = lang_data.translit_title_table; -- table of transliteration standards and the language codes and scripts that apply to those standards
if is_set (code) then -- when a language code is provided (always with {{lang-xx}} templates, not always with {{transl}})
if is_set (code) then
if not style then -- nil for is the default italic style
table.insert (tout, "<i lang=\"");
table.insert (tout, code"<i lang=\""); -- so use <i> tag
else
table.insert (tout, "-Latn\" title=\""); -- transliterations are always Latin script
table.insert (tout, table.concat ({'<span style=\"font-style:', style, '\" lang=\"'})); -- non-standard style, construct a span tag for it
else
end
table.insert (tout, "<i title=\""); -- no language code, no lang= attribute ({{transl}} only)
table.insert (tout, code);
end
table.insert (tout, "-Latn\" title=\""); -- transliterations are always Latin script
else
table.insert (tout, "<span title=\""); -- when no language code: no lang= attribute, not italic ({{transl}} only)
end
if not is_set (std) and not is_set (tscript) then -- when neither standard nor script specified
table.insert (tout, language_name); -- write a generic tool tip
table.insert (tout, '-language transliteration');
elseif is_set (std) and is_set (tscript) then -- when both are specified
if title_table[std][tscript] then -- and if standard is legitimate
table.insert (tout,if title_table[std][tscript]); then -- addand theif appropriatescript textfor tothat thestandard toolis tiplegitimate
table.insert (tout, table.concat ({title_table[std][tscript:lower()], ' (', lang_name_table.script[tscript][1], ' script) transliteration'})); -- add the appropriate text to the tool tip
else
table.insert (tout, title_table[std]['default']); -- use the default if script not in std table; TODO: maint cat? error message because script not found for this standard?
end
else
return ''; -- invalid standard, setup for error message
table.insert (tout, title_table[std]['default']); -- use the default if script not in std table
end
 
elseif is_set (std) then -- translit-script not set, use language code
if not title_table[std] then return ''; end -- invalid standard, setup for error message
if title_table[std][code] then -- if language code is in the table (transl may not provide a language code)
table.insert (tout, table.concat ({title_table[std][code:lower()], ' (', lang_name_table.lang[code][1], ' language) transliteration'})); -- add the appropriate text to the tool tip
else -- code doesn't match
table.insert (tout, title_table[std]['default']); -- so use the standard's default
Line 522 ⟶ 570:
table.insert (tout, title_table['NO_STD'][code]); -- use language code
else
if is_set (tscript) then
table.insert (tout, language_name); -- write a generic tool tip
table.insert (tout, table.concat ({language_name, '-script transliteration'})); -- write a script tool tip
elseif is_set (code) then
table.insert (tout, table.concat ({language_name, '-language transliteration'})); -- write a language tool tip
else
table.insert (tout, ' transliteration'); -- generic tool tip (can we ever get here?)
end
end
end
Line 529 ⟶ 582:
table.insert (tout, '">');
table.insert (tout, translit);
if is_set (code) and not style then -- when a language code is provided (always with {{lang-xx}} templates, not always with {{transl}})
table.insert (tout, "</i>");
table.insert (tout, "</i>"); -- close the italic tag
else
table.insert (tout, "</span>"); -- no language code so close the span tag
end
return table.concat (tout);
end
Line 546 ⟶ 603:
local function validate_text (template, args)
if not is_set (args.text) then
return make_error_msg (table.concat ({'{{', template, '}}: no text'}), args, template);
end
 
if args.text:find ("%f[\']\'\'\'\'%f[^\']") or args.text:find ("\'\'\'\'\'[\']+") then -- because we're looking, look for 4 appostrophes or 6+ appostrophes
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args, template);
end
 
if 'lang-xx' == template then -- for the time being, this error checking does not apply to {{lang}}
if ('unset' ~= args.italic) and ('unset' ~= args.italics) then -- allow italic markup when |italic=unset or |italics=unset
if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then -- italic but not bold, or bold italic
return make_error_msg (table.concat ({'{{', template, '}}: text has italic markup'}), args, template);
end
end
Line 592 ⟶ 649:
--[[--------------------------< P R O T O _ P R E F I X >------------------------------------------------------
 
for proto languages, text is prefixed with a splat. We do that here. as a flag for make_text_span() so that a splat
will be rendered outside of italic markup (if used). If the first character in text here is already a splat, we
do nothing
 
]]
 
local function proto_prefix (text, language_name)
if language_name:find ('^Proto%-') and not text:find ('^*') then -- language is a proto and text does not already have leading splat
return table.concat ({'*', text}); -- prefix proto language text with a splat
end
Line 624 ⟶ 684:
 
if args[1] and args.code then
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |&#124;code=', args, 'lang');
else
args.code = args[1] or args.code; -- prefer args.code
Line 630 ⟶ 690:
 
if args[2] and args.text then
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |&#124;text=', args, 'lang');
else
args.text = args[2] or args.text; -- prefer args.text
Line 645 ⟶ 705:
 
if msg then
return make_error_msg (table.concat ({'{{lang}}: 'msg, msg})args, args'lang');
end
 
if args.italic, andmsg = validate_italic (args.italicsitalic, thenargs.italics);
if msg then
return make_error_msg ('{{lang}}: conflicting: |italic= and |italics=', args);
return make_error_msg (msg, args, 'lang');
else
args.italic = validate_italic (args.italic or args.italics); -- nil or font-style property value
end
 
if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls
if ('latn' == subtags.script) or (is_latn (args.text) and 'en' ~= code) then -- script set to latn or text is wholly latn script but not rendering English
Line 680 ⟶ 739:
end
 
args.text = proto_prefix (args.text, language_name); -- prefix proto-language text with a splat
 
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size));
Line 736 ⟶ 795:
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}}
local out = {};
local language_name; -- used to make display text, article links, and category names
local category_name; -- same as language_name except that it retains any parenthetical disambiguators (if any) from the data set
local subtags = {}; -- IETF subtags script, region, and variant
local code; -- the language code
Line 746 ⟶ 806:
 
if args[1] and args.text then
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |&#124;text=', args, 'lang-xx');
else
args.text = args[1] or args.text; -- prefer args.text
Line 757 ⟶ 817:
 
if args[2] and args.translit then
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |&#124;translit=', args, 'lang-xx');
else
args.translit = args[2] or args.translit -- prefer args.translit
Line 763 ⟶ 823:
if args[3] and (args.translation or args.lit) then
return make_error_msg ('{{lang-xx}}: conflicting: {{{3}}} and |&#124;lit= or |&#124;translation=', args, 'lang-xx');
elseif args.translation and args.lit then
return make_error_msg ('{{lang-xx}}: conflicting: |&#124;lit= and |&#124;translation=', args, 'lang-xx');
else
args.translation = args[3] or args.translation or args.lit; -- prefer args.translation
Line 771 ⟶ 831:
 
if args.links and args.link then
return make_error_msg ('{{lang-xx}}: conflicting: |&#124;links= and |&#124;link=', args, 'lang-xx');
else
args.link = args.link or args.links; -- prefer args.link
Line 781 ⟶ 841:
 
if msg then -- if an error detected then there is an error message
return make_error_msg (table.concatmsg, args, ({'{{lang-xx}}: ', msg}), args);
end
if args.italic, andmsg = validate_italic (args.italicsitalic, thenargs.italics);
if msg then
return make_error_msg ('{{lang-xx}}: conflicting: |italic= and |italics=', args);
return make_error_msg (msg, args, 'lang-xx');
else
args.italic = validate_italic (args.italic or args.italics); -- nil or font-style property value
end
 
Line 827 ⟶ 886:
end
end
 
category_name = language_name; -- category names retain IANA parenthetical diambiguators (if any)
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them
 
if args.label then
Line 834 ⟶ 896:
else
if 'no' == args.link then
table.insert (out, language_name); -- language name without wikilink
else
if language_name:find ('languages') then
table.insert (out, make_wikilink (language_name .. ' language', language_name)); -- language name with wikilink
table.insert (out, make_wikilink (language_name)); -- collective language name uses simple wikilink
else
table.insert (out, make_wikilink (language_name .. ' language', language_name)); -- language name with wikilink
end
end
table.insert (out, ': '); -- separator
end
 
args.text = proto_prefix (args.text, language_name); -- prefix proto-language text with a splat
 
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
 
if is_set (args.translit) and not is_latn (args.text) then -- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test)
table.insert (out, ', '); -- comma to separate text from translit
if 'none' ~= args.label then
table.insert (out, '<small>');
translit_script_name =if lang_name_table.script[args['translit-script']] or language_name;then -- ifwhen |transtranslit-script= validis ISOset, 15924try codeto preferuse associatedthe name over languagescript's name
translit_script_name = lang_name_table.script[args['translit-script'][1]];
translit_title = mw.title.makeTitle (0, table.concat ({'Romanization of ', translit_script_name}));
else
translit_script_name = language_name; -- fall back on language name
end
translit_title = mw.title.makeTitle (0, table.concat ({'Romanization of ', translit_script_name})); -- make a title object
if translit_title.exists and ('no' ~= args.link) then
table.insert (out, make_wikilink ('Romanization of ' .. translit_script_name or language_name, 'translit.')); -- make a wikilink if there is an article to link to
Line 856 ⟶ 926:
table.insert (out, '<abbr title="transliteration">translit.</abbr>'); -- else define the abbreviation
end
table.insert (out, '&nbsp;</small>'); -- close the small tag
end
Line 863 ⟶ 933:
table.insert (out, translit);
else
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or '[missing]'}), '\' or transli-script: \''args, args['translitlang-scriptxx'] or 'missing', '\''}), args);
end
end
Line 881 ⟶ 951:
end
table.insert (out, make_category (code, language_namecategory_name, args.nocat));
table.insert (out, render_maint(args.nocat)); -- maintenance messages and categories
 
Line 890 ⟶ 960:
--[[--------------------------< L A N G _ X X _ I T A L I C >--------------------------------------------------
 
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_italic(). Sets the initial style state to italic.
style state to italic.
 
]]
Line 903 ⟶ 972:
--[[--------------------------< L A N G _ X X _ I N H E R I T >------------------------------------------------
 
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_inherit(). Sets the initial style state to inherit.
style state to inherit.
 
]]
Line 947 ⟶ 1,015:
end
end
 
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them
 
return language_name;
Line 952 ⟶ 1,022:
end
 
 
--[[--------------------------< T R A N S L >------------------------------------------------------------------
 
Prospective replacement for the template {{transl}}
 
]]
 
function p.transl (frame)
local args = getArgs(frame); -- no {{#invoke:}} parameters
local title_table = lang_data.translit_title_table; -- table of transliteration standards and the language codes and scripts that apply to those standards
local language_name; -- language name that matches language code; used for tool tip
local translit; -- translitterated text to display
local script; -- IANA script
local msg; -- for when called functions return an error message
 
if is_set (args[3]) then -- [3] set when {{transl|code|standard|text}}
args.text = args[3]; -- get the transliterated text
args.translit_std = args[2]; -- get the standard
 
if not title_table[args.translit_std] then
return make_error_msg (table.concat ({'unrecognized transliteration standard: ', args.translit_std}), args, 'transl');
end
else
if is_set (args[2]) then -- [2] set when {{transl|code|text}}
args.text = args[2]; -- get the transliterated text
else
if args[1]:match ('^%a%a%a?%a?$') then -- args[2] missing; is args[1] a code or its it the transliterated text?
return make_error_msg ('no text', args, 'transl'); -- args[1] is a code so we're missing text
else
args.text = args[1]; -- args[1] is not a code so we're missing that; assign args.text for error message
return make_error_msg ('missing language / script code', args, 'transl');
end
end
end
 
if is_set (args[1]) then -- IANA language code used for html lang= attribute; or ISO 15924 script code
args.code = args[1]:match ('^%a%a%a?%a?'):lower(); -- use the language/script code portion only (2, 3, or 4 alpha characters); lower case because table indexes are lower case
else
return make_error_msg ('missing language / script code', args, 'transl'); -- missing language / script code so quit
end
 
args.italic, msg = validate_italic (args.italic, args.italics);
if msg then
return make_error_msg (msg, args, 'transl');
end
if 'italic' == args.italic then -- 'italic' when |italic=yes; because that is same as absent or not set and |italic=default
args.italic = nil; -- set to nil;
end
 
if lang_data.override[args.code] then -- is code a language code defined in the override table?
language_name = lang_data.override[args.code][1];
elseif lang_name_table.lang[args.code] then -- is code a language code defined in the standard language code tables?
language_name = lang_name_table.lang[args.code][1];
elseif lang_name_table.script[args.code] then -- if here, code is not a language code; is it a script code?
language_name = lang_name_table.script[args.code][1];
script = args.code; -- code was an ISO 15924 script so use that instead
args.code = ''; -- unset because not a language code
else
return make_error_msg (table.concat ({'unrecognized language / script code: ', args.code}), args, 'transl'); -- invalid language / script code
end
-- here only when all parameters passed to make_translit() are valid
return make_translit (args.code, language_name, args.text, args.translit_std, script, args.italic);
end
 
 
Anonymous user