Module:Lang: Difference between revisions

Jump to navigation Jump to search
Content added Content deleted
(proto language support;)
(synch from sandbox;)
Line 31:
local function is_set( var )
return not (var == nil or var == '');
end
 
 
--[[--------------------------< I S _ L A T N >----------------------------------------------------------------
 
Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else.
 
For the purposes of this function, Latn script is characters less control characters from these Unicode 10.0 Character Code Charts:
[http://www.unicode.org/charts/PDF/U0000.pdf C0 Controls and Basic Latin] U+0020–U+007E (20 - 7E)
[http://www.unicode.org/charts/PDF/U0080.pdf C1 Controls and Latin-1 Supplement] U+00A0-U+00AC, U+00C0–U+00FF (C2 A0 - C2 AC, C3 80 - C3 BF: \194\160-\194\172)
[http://www.unicode.org/charts/PDF/U0100.pdf Latin Extended-A] U+0100–U+017F (C4 80 - C5 BF)
[http://www.unicode.org/charts/PDF/U0180.pdf Latin Extended-B] U+0180–U+024F (C6 80 - C9 8F)
[http://www.unicode.org/charts/PDF/U1E00.pdf Latin Extended Additional] U+1E00-U+1EFF (E1 B8 80 - E1 BB BF)
[http://www.unicode.org/charts/PDF/U2C60.pdf Latin Extended-C] U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF)
[http://www.unicode.org/charts/PDF/UA720.pdf Latin Extended-D] U+A720-U+A7FF (EA 9C A0 - EA 9F BF)
[http://www.unicode.org/charts/PDF/UAB30.pdf Latin Extended-E] U+AB30-U+AB6F (EA AC B0 - EA AD AF)
[http://www.unicode.org/charts/PDF/UFB00.pdf Alphabetic Presentaion Forms] U+FB00-U+FB06 (EF AC 80 - EF AC 86)
[http://www.unicode.org/charts/PDF/UFF00.pdf Halfwidth and Fullwidth Forms] U+FF01-U+FF3C (EF BC 81 EF BC BC)
 
does not include:
[http://www.unicode.org/charts/PDF/U1D00.pdf Phonetic Extensions] U+1D00-U+1D7F (E1 B4 80 - E1 B5 BF)
[http://www.unicode.org/charts/PDF/U0250.pdf IPA Extensions] U+0250-U+02AF (C9 90 - CA AF)
[http://www.unicode.org/charts/PDF/U1D80.pdf Phonetic Extensions Supplement] U+1D80-U+1DBF (E1 B6 80 - E1 B6 BF)
 
]]
 
local function is_latn (text)
local latn = table.concat (
{
'[', -- this is a set so include opening bracket
'\32-\126', -- C0 Controls and Basic Latin U+0020–U+007E (20 - 7E)
'\194\160-\194\172', -- C1 Controls and Latin-1 Supplement U+00A0-U+00AC (C2 A0 - C2 AC)
'\195\128-\195\191', -- (skip shy) U+00C0–U+00FF (C3 80 - C3 BF)
'\196\128-\197\191', -- Latin Extended-A U+0100–U+017F (C4 80 - C5 BF)
'\198\128-\201\143', -- Latin Extended-B U+0180–U+024F (C6 80 - C9 8F)
'\225\184\128-\225\187\191', -- Latin Extended Additional U+1E00-U+1EFF (E1 B8 80 - E1 BB BF)
'\226\177\160-\226\177\191', -- Latin Extended-C U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF)
'\234\156\160-\234\159\191', -- Latin Extended-D U+A720-U+A7FF (EA 9C A0 - EA 9F BF)
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF)
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86)
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 EF BC BC)
']', -- close the set
});
 
text = mw.ustring.gsub (text, '%[%[[^|]+|([^%]]+)%]%]', '%1'); -- remove the link and markup from complex wikilink in case interwiki to non-Latn wikipedia
return not is_set (mw.ustring.gsub (text, latn, '')); -- replace all latn characters with empty space; if result is all empty space, text is latn
end
 
Line 294 ⟶ 340:
]]
 
local function make_error_msg (msg, nocatargs)
local out = {};
table.insert (out, table.concat ({'[', args.text or 'undefined', '] '})); -- for error messages output args.text if available
table.insert (out, '<span style="font-size:100%" class="error">error: ');
table.insert (out, msg);
Line 302 ⟶ 348:
table.insert (out, '</span>');
if (0 == namespace) and not is_set (args.nocat) then -- only categorize in article space
table.insert (out, '[[Category:lang and lang-xx template errors]]');
end
Line 334 ⟶ 380:
 
TODO: add support for block: div tags instead of span tags; would need some sort of proper parameter to control the switch
 
For italic style, can't do ''{{lang|xx|text}}'' without using <span/> tags when text is italic because of -Latn, |italic=yes,
or auto-italics because the wrapping wikimarkup produces this:
<i><i lang="xx">text</i></i>
which is later reduced to this:
<i>text</i>
This reduction happens in some sort of cleanup process outside the scope of this template/module.
 
Until or unless this is fixed italic text must be:
<i><span lang="xx">text</span></i>
 
]]
 
local function make_text_span (code, text, rtl, italicstyle, size)
local span = {};
local style_added = '';
 
if 'italic' == style then
table.insert (span, '<i>'); -- open italic style tag
end
table.insert (span, table.concat ({'<span lang="'})); -- open <span> tag
table.insert (span, table.concat ({code, '\"'})); -- add language attribute
 
table.insert (span, '<span lang="'); -- open <span> tag
table.insert (span, code); -- language attribute
table.insert (span, '"');
if rtl then
table.insert (span, ' dir="rtl"'); -- add direction attribute for right to left languages
end
 
if 'normal' == style then -- when |italic=no
table.insert (span, table.concat ({' style=\"font-style:', italic, ';'})); -- TODO: rename italic to style as that is more meaningful
table.insert (span, ' style=\"font-style:normal;'); -- override external markup, if any
style_added = '\"'; -- remember that style attribute added and is not yet closed
end
 
if is_set (size) then -- when |size=<something>
if style_added then
table.insert (span, table.concat ({' font-size:', size, ';'}))
table.insert (span, table.concat ({' font-size:', size, ';'})); -- add when style attribute already inserted
else
table.insert (span, table.concat ({' style=\"font-size:', size, ';'})); -- create style attribute
style_added = '\"'; -- remember that style attribute added and is not yet closed
end
end
 
table.insert (span, '\">'); -- close the style and close opening span tag
table.insert (span, table.concat ({style_added, '>'})); -- close the style attribute and close opening span tag
table.insert (span, text); -- insert the text
 
table.insert (span, '</span>'); -- close the <span> tag
if rtl'italic' == style then
table.insert (span, '</i>'); -- close italic style tag
end
if rtl then -- legacy; shouldn't be necessary because all of the rtl text is wrapped in <span dir="rtl">text</span>
table.insert (span, '&lrm;'); -- make sure the browser knows that we're at the end of the rtl
end
Line 465 ⟶ 539:
local function validate_text (template, args)
if not is_set (args.text) then
return make_error_msg (table.concat ({'{{', template, '}}: no text'}), args.nocat);
end
 
if 'lang-xx' == template then -- for the time being, this error checking does not apply to {{lang}}
if args.text:find ("\'\'\'\'\'[\']+") then -- because we're looking, look for 6+ appostrophes
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args.nocat);
end
if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then -- italic but not bold, or bold italic
return make_error_msg (table.concat ({'{{', template, '}}: text has italic markup'}), args.nocat);
end
if args.text:find ("\'\'\'\'") then -- because we're looking, look for 4 apostrophes
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args.nocat);
end
end
Line 538 ⟶ 612:
function p.lang (frame)
local args = getArgs(frame);
local out = {};
local language_name; -- used to make category names
local subtags = {}; -- IETF subtags script, region, variant, and private
local code; -- the language code
local msg; -- for error messages
 
if args[1] and args.code then
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |code=', args.nocat);
else
args.code = args[1] or args.code; -- prefer args.code
Line 546 ⟶ 625:
 
if args[2] and args.text then
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |text=', args.nocat);
else
args.text = args[2] or args.text; -- prefer args.text
end
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being)
if is_set (msg) then -- msg is an already-formatted error message
return msg;
end
 
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false
 
local out = {};
local language_name; -- used to make category names
local subtags = {}; -- IETF subtags script, region, variant, and private
local code; -- the language code
local msg; -- for error messages
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}})
 
if msg then
return make_error_msg (table.concat ({'{{lang}}: ', msg}), args.nocat);
end
 
Line 568 ⟶ 646:
 
if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls
if ('latn' == subtags.script) or (is_latn (args.text) and 'en' ~= code) then -- script set to latn or text is wholly latn script but not rendering English
args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic
else
args.italic = 'normalinherit'; -- italic not set; script not latn; setinherit forcurrent font-style:normal
end
end
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being)
if is_set (msg) then -- msg is an already-formatted error message
return msg;
end
 
if is_set (subtags.script) then -- if script set, override rtl setting
if in_array (subtags.script, lang_data.rtl_scripts) then
Line 651 ⟶ 724:
local function _lang_xx (frame)
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}}
local out = {};
local language_name; -- used to make display text, article links, and category names
local subtags = {}; -- IETF subtags script, region, and variant
local code; -- the language code
 
local translit_script_name; -- name associated with IANA (ISO 15924) script code
local translit;
local translit_title;
local msg; -- for error messages
 
if args[1] and args.text then
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |text=', args.nocat);
else
args.text = args[1] or args.text; -- prefer args.text
end
msg = validate_text ('lang-xx', args); -- ensure that |text= is set, does not contain italic markup and is protected from improper bolding
if is_set (msg) then
return msg;
end
 
if args[2] and args.translit then
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |translit=', args.nocat);
else
args.translit = args[2] or args.translit -- prefer args.translit
Line 665 ⟶ 752:
if args[3] and (args.translation or args.lit) then
return make_error_msg ('{{lang-xx}}: conflicting: {{{3}}} and |lit= or |translation=', args.nocat);
elseif args.translation and args.lit then
return make_error_msg ('{{lang-xx}}: conflicting: |lit= and |translation=', args.nocat);
else
args.translation = args[3] or args.translation or args.lit; -- prefer args.translation
Line 673 ⟶ 760:
 
if args.links and args.link then
return make_error_msg ('{{lang-xx}}: conflicting: |links= and |link=', args.nocat);
else
args.link = args.link or args.links; -- prefer args.link
Line 679 ⟶ 766:
 
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false
 
local out = {};
local language_name; -- used to make display text, article links, and category names
local subtags = {}; -- IETF subtags script, region, and variant
local code; -- the language code
 
local translit_script;
local translit;
local translit_title;
local msg; -- for error messages
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private
 
if msg then -- if an error detected then there is an error message
return make_error_msg (table.concat ({'{{lang-xx}}: ', msg}), args.nocat);
end
Line 710 ⟶ 787:
end
msg = validate_text ('lang-xx', args); -- ensure that |text= is set, does not contain italic markup and is protected from improper bolding
if is_set (msg) then
return msg;
end
 
if is_set (subtags.script) then -- if script set override rtl setting
if in_array (subtags.script, lang_data.rtl_scripts) then
Line 740 ⟶ 812:
end
end
 
translit_script = args['translit-script'] or language_name; -- for translit prefer |trans-script= over language
 
if 'no' == args.link then
Line 754 ⟶ 824:
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
-- TODO: make this transl code a separate function so that {{transl}} can use this module?
if is_set (args.translit) and not is_latn (args.text) then -- transliteration (not supported in {{lang}}); not supported inwhen allargs.text {{lang-xx}}is TODO:wholly figurelatn outtext how(this tois knowan whichimperfect {{tlag-xx}} support transliterationtest)
table.insert (out, ', <small>');
translit_script_name = lang_name_table.script[args['translit-script']] or language_name; -- if |trans-script= valid ISO 15924 code prefer associated name over language name
translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name)
translit_title = mw.title.makeTitle (0, table.concat ({'Romanization of ', translit_script_name}));
if translit_title.exists and ('no' ~= args.link) then
table.insert (out, make_wikilink ('Romanization of ' .. translit_scripttranslit_script_name or language_name, 'translit.')); -- make a wikilink if there is an article to link to
else
table.insert (out, '<abbr title="transliteration">translit.</abbr>'); -- else define the abbreviation
end
table.insert (out, '&nbsp;</small>');
Line 767 ⟶ 838:
table.insert (out, translit);
else
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or 'missing', '\' or transli-script: \'', args['translit-script'] or 'missing', '\''}), args.nocat);
end
end
Line 790 ⟶ 861:
 
 
--[[--------------------------< L A N G _ X X _ I T A L I C >----------------------------------------------------------------
 
Entry point for those {{lang-xx}} templates that have not been converted to call either of lang_xx_normallang_xx_italic(). Sets the orinitial
style state to italic.
lang_xx_italic(). Sets the initial style state to italic. When all {{lang-xx}} template that use this module
have been converted, this finction can go away.
 
]]
 
--function p.lang_xxlang_xx_italic (frame)
-- initial_style_state = 'italic';
-- return _lang_xx (frame);
--end
 
 
--[[--------------------------< L A N G _ X X _ I TN AH LE R I CT >--------------------------------------------------
 
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_italiclang_xx_inherit(). Sets the initial
style state to italicinherit.
 
]]
 
function p.lang_xx_italiclang_xx_inherit (frame)
initial_style_state = 'italicinherit';
return _lang_xx (frame);
end
Line 818 ⟶ 888:
 
--[[--------------------------< L A N G _ X X _ N O R M A L >--------------------------------------------------
 
DEPRECATED
TODO: change the {{lang-??}} templates that call lang_xx_normal() to call lang_xx_inherit()
 
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_normal(). Sets the initial
Line 825 ⟶ 898:
 
function p.lang_xx_normal (frame)
initial_style_state = 'normalinherit'; -- inherit here instead of 'normal' because no need to ignore external wiki markup
return _lang_xx (frame);
end
Line 844 ⟶ 917:
local msg; -- holds an error message (not used here) if IETF language tag is malformed or invalid
local language_name = '';
 
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code);