Module:Lang: Difference between revisions
Jump to navigation
Jump to search
Content added Content deleted
(proto language support;) |
(synch from sandbox;) |
||
Line 31: | Line 31: | ||
local function is_set( var ) |
local function is_set( var ) |
||
return not (var == nil or var == ''); |
return not (var == nil or var == ''); |
||
end |
|||
--[[--------------------------< I S _ L A T N >---------------------------------------------------------------- |
|||
Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else. |
|||
For the purposes of this function, Latn script is characters less control characters from these Unicode 10.0 Character Code Charts: |
|||
[http://www.unicode.org/charts/PDF/U0000.pdf C0 Controls and Basic Latin] U+0020–U+007E (20 - 7E) |
|||
[http://www.unicode.org/charts/PDF/U0080.pdf C1 Controls and Latin-1 Supplement] U+00A0-U+00AC, U+00C0–U+00FF (C2 A0 - C2 AC, C3 80 - C3 BF: \194\160-\194\172) |
|||
[http://www.unicode.org/charts/PDF/U0100.pdf Latin Extended-A] U+0100–U+017F (C4 80 - C5 BF) |
|||
[http://www.unicode.org/charts/PDF/U0180.pdf Latin Extended-B] U+0180–U+024F (C6 80 - C9 8F) |
|||
[http://www.unicode.org/charts/PDF/U1E00.pdf Latin Extended Additional] U+1E00-U+1EFF (E1 B8 80 - E1 BB BF) |
|||
[http://www.unicode.org/charts/PDF/U2C60.pdf Latin Extended-C] U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF) |
|||
[http://www.unicode.org/charts/PDF/UA720.pdf Latin Extended-D] U+A720-U+A7FF (EA 9C A0 - EA 9F BF) |
|||
[http://www.unicode.org/charts/PDF/UAB30.pdf Latin Extended-E] U+AB30-U+AB6F (EA AC B0 - EA AD AF) |
|||
[http://www.unicode.org/charts/PDF/UFB00.pdf Alphabetic Presentaion Forms] U+FB00-U+FB06 (EF AC 80 - EF AC 86) |
|||
[http://www.unicode.org/charts/PDF/UFF00.pdf Halfwidth and Fullwidth Forms] U+FF01-U+FF3C (EF BC 81 EF BC BC) |
|||
does not include: |
|||
[http://www.unicode.org/charts/PDF/U1D00.pdf Phonetic Extensions] U+1D00-U+1D7F (E1 B4 80 - E1 B5 BF) |
|||
[http://www.unicode.org/charts/PDF/U0250.pdf IPA Extensions] U+0250-U+02AF (C9 90 - CA AF) |
|||
[http://www.unicode.org/charts/PDF/U1D80.pdf Phonetic Extensions Supplement] U+1D80-U+1DBF (E1 B6 80 - E1 B6 BF) |
|||
]] |
|||
local function is_latn (text) |
|||
local latn = table.concat ( |
|||
{ |
|||
'[', -- this is a set so include opening bracket |
|||
'\32-\126', -- C0 Controls and Basic Latin U+0020–U+007E (20 - 7E) |
|||
'\194\160-\194\172', -- C1 Controls and Latin-1 Supplement U+00A0-U+00AC (C2 A0 - C2 AC) |
|||
'\195\128-\195\191', -- (skip shy) U+00C0–U+00FF (C3 80 - C3 BF) |
|||
'\196\128-\197\191', -- Latin Extended-A U+0100–U+017F (C4 80 - C5 BF) |
|||
'\198\128-\201\143', -- Latin Extended-B U+0180–U+024F (C6 80 - C9 8F) |
|||
'\225\184\128-\225\187\191', -- Latin Extended Additional U+1E00-U+1EFF (E1 B8 80 - E1 BB BF) |
|||
'\226\177\160-\226\177\191', -- Latin Extended-C U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF) |
|||
'\234\156\160-\234\159\191', -- Latin Extended-D U+A720-U+A7FF (EA 9C A0 - EA 9F BF) |
|||
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF) |
|||
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86) |
|||
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 EF BC BC) |
|||
']', -- close the set |
|||
}); |
|||
text = mw.ustring.gsub (text, '%[%[[^|]+|([^%]]+)%]%]', '%1'); -- remove the link and markup from complex wikilink in case interwiki to non-Latn wikipedia |
|||
return not is_set (mw.ustring.gsub (text, latn, '')); -- replace all latn characters with empty space; if result is all empty space, text is latn |
|||
end |
end |
||
Line 294: | Line 340: | ||
]] |
]] |
||
local function make_error_msg (msg, |
local function make_error_msg (msg, args) |
||
local out = {}; |
local out = {}; |
||
table.insert (out, table.concat ({'[', args.text or 'undefined', '] '})); -- for error messages output args.text if available |
|||
table.insert (out, '<span style="font-size:100%" class="error">error: '); |
table.insert (out, '<span style="font-size:100%" class="error">error: '); |
||
table.insert (out, msg); |
table.insert (out, msg); |
||
Line 302: | Line 348: | ||
table.insert (out, '</span>'); |
table.insert (out, '</span>'); |
||
if (0 == namespace) and not is_set (nocat) then |
if (0 == namespace) and not is_set (args.nocat) then -- only categorize in article space |
||
table.insert (out, '[[Category:lang and lang-xx template errors]]'); |
table.insert (out, '[[Category:lang and lang-xx template errors]]'); |
||
end |
end |
||
Line 334: | Line 380: | ||
TODO: add support for block: div tags instead of span tags; would need some sort of proper parameter to control the switch |
TODO: add support for block: div tags instead of span tags; would need some sort of proper parameter to control the switch |
||
For italic style, can't do ''{{lang|xx|text}}'' without using <span/> tags when text is italic because of -Latn, |italic=yes, |
|||
or auto-italics because the wrapping wikimarkup produces this: |
|||
<i><i lang="xx">text</i></i> |
|||
which is later reduced to this: |
|||
<i>text</i> |
|||
This reduction happens in some sort of cleanup process outside the scope of this template/module. |
|||
Until or unless this is fixed italic text must be: |
|||
<i><span lang="xx">text</span></i> |
|||
]] |
]] |
||
local function make_text_span (code, text, rtl, |
local function make_text_span (code, text, rtl, style, size) |
||
local span = {}; |
local span = {}; |
||
local style_added = ''; |
|||
if 'italic' == style then |
|||
table.insert (span, '<i>'); -- open italic style tag |
|||
end |
|||
table.insert (span, table.concat ({'<span lang="'})); -- open <span> tag |
|||
table.insert (span, table.concat ({code, '\"'})); -- add language attribute |
|||
table.insert (span, '<span lang="'); -- open <span> tag |
|||
table.insert (span, code); -- language attribute |
|||
table.insert (span, '"'); |
|||
if rtl then |
if rtl then |
||
table.insert (span, ' dir="rtl"'); -- for right to left languages |
table.insert (span, ' dir="rtl"'); -- add direction attribute for right to left languages |
||
end |
end |
||
if 'normal' == style then -- when |italic=no |
|||
table.insert (span, table.concat ({' style=\"font-style:', italic, ';'})); -- TODO: rename italic to style as that is more meaningful |
|||
table.insert (span, ' style=\"font-style:normal;'); -- override external markup, if any |
|||
style_added = '\"'; -- remember that style attribute added and is not yet closed |
|||
end |
|||
if is_set (size) then |
if is_set (size) then -- when |size=<something> |
||
if style_added then |
|||
table.insert (span, table.concat ({' font-size:', size, ';'})) |
|||
table.insert (span, table.concat ({' font-size:', size, ';'})); -- add when style attribute already inserted |
|||
else |
|||
table.insert (span, table.concat ({' style=\"font-size:', size, ';'})); -- create style attribute |
|||
style_added = '\"'; -- remember that style attribute added and is not yet closed |
|||
end |
|||
end |
end |
||
table.insert (span, '\">'); -- close the style and close opening span tag |
|||
table.insert (span, table.concat ({style_added, '>'})); -- close the style attribute and close opening span tag |
|||
table.insert (span, text); -- insert the text |
table.insert (span, text); -- insert the text |
||
table.insert (span, '</span>'); -- close the span |
table.insert (span, '</span>'); -- close the <span> tag |
||
if |
if 'italic' == style then |
||
table.insert (span, '</i>'); -- close italic style tag |
|||
end |
|||
if rtl then -- legacy; shouldn't be necessary because all of the rtl text is wrapped in <span dir="rtl">text</span> |
|||
table.insert (span, '‎'); -- make sure the browser knows that we're at the end of the rtl |
table.insert (span, '‎'); -- make sure the browser knows that we're at the end of the rtl |
||
end |
end |
||
Line 465: | Line 539: | ||
local function validate_text (template, args) |
local function validate_text (template, args) |
||
if not is_set (args.text) then |
if not is_set (args.text) then |
||
return make_error_msg (table.concat ({'{{', template, '}}: no text'}), args |
return make_error_msg (table.concat ({'{{', template, '}}: no text'}), args); |
||
end |
end |
||
if 'lang-xx' == template then -- for the time being, this error checking does not apply to {{lang}} |
if 'lang-xx' == template then -- for the time being, this error checking does not apply to {{lang}} |
||
if args.text:find ("\'\'\'\'\'[\']+") then -- because we're looking, look for 6+ appostrophes |
if args.text:find ("\'\'\'\'\'[\']+") then -- because we're looking, look for 6+ appostrophes |
||
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args |
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args); |
||
end |
end |
||
if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then -- italic but not bold, or bold italic |
if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then -- italic but not bold, or bold italic |
||
return make_error_msg (table.concat ({'{{', template, '}}: text has italic markup'}), args |
return make_error_msg (table.concat ({'{{', template, '}}: text has italic markup'}), args); |
||
end |
end |
||
if args.text:find ("\'\'\'\'") then -- because we're looking, look for 4 apostrophes |
if args.text:find ("\'\'\'\'") then -- because we're looking, look for 4 apostrophes |
||
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args |
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args); |
||
end |
end |
||
end |
end |
||
Line 538: | Line 612: | ||
function p.lang (frame) |
function p.lang (frame) |
||
local args = getArgs(frame); |
local args = getArgs(frame); |
||
local out = {}; |
|||
local language_name; -- used to make category names |
|||
local subtags = {}; -- IETF subtags script, region, variant, and private |
|||
local code; -- the language code |
|||
local msg; -- for error messages |
|||
if args[1] and args.code then |
if args[1] and args.code then |
||
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |code=', args |
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |code=', args); |
||
else |
else |
||
args.code = args[1] or args.code; -- prefer args.code |
args.code = args[1] or args.code; -- prefer args.code |
||
Line 546: | Line 625: | ||
if args[2] and args.text then |
if args[2] and args.text then |
||
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |text=', args |
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |text=', args); |
||
else |
else |
||
args.text = args[2] or args.text; -- prefer args.text |
args.text = args[2] or args.text; -- prefer args.text |
||
end |
end |
||
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being) |
|||
if is_set (msg) then -- msg is an already-formatted error message |
|||
return msg; |
|||
end |
|||
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
||
local out = {}; |
|||
local language_name; -- used to make category names |
|||
local subtags = {}; -- IETF subtags script, region, variant, and private |
|||
local code; -- the language code |
|||
local msg; -- for error messages |
|||
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}}) |
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}}) |
||
if msg then |
if msg then |
||
return make_error_msg (table.concat ({'{{lang}}: ', msg}), args |
return make_error_msg (table.concat ({'{{lang}}: ', msg}), args); |
||
end |
end |
||
Line 568: | Line 646: | ||
if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls |
if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls |
||
if 'latn' == subtags.script then |
if ('latn' == subtags.script) or (is_latn (args.text) and 'en' ~= code) then -- script set to latn or text is wholly latn script but not rendering English |
||
args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic |
args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic |
||
else |
else |
||
args.italic = ' |
args.italic = 'inherit'; -- italic not set; script not latn; inherit current style |
||
end |
end |
||
end |
end |
||
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being) |
|||
if is_set (msg) then -- msg is an already-formatted error message |
|||
return msg; |
|||
end |
|||
if is_set (subtags.script) then -- if script set, override rtl setting |
if is_set (subtags.script) then -- if script set, override rtl setting |
||
if in_array (subtags.script, lang_data.rtl_scripts) then |
if in_array (subtags.script, lang_data.rtl_scripts) then |
||
Line 651: | Line 724: | ||
local function _lang_xx (frame) |
local function _lang_xx (frame) |
||
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}} |
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}} |
||
local out = {}; |
|||
local language_name; -- used to make display text, article links, and category names |
|||
local subtags = {}; -- IETF subtags script, region, and variant |
|||
local code; -- the language code |
|||
local translit_script_name; -- name associated with IANA (ISO 15924) script code |
|||
local translit; |
|||
local translit_title; |
|||
local msg; -- for error messages |
|||
if args[1] and args.text then |
if args[1] and args.text then |
||
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |text=', args |
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |text=', args); |
||
else |
else |
||
args.text = args[1] or args.text; -- prefer args.text |
args.text = args[1] or args.text; -- prefer args.text |
||
end |
|||
msg = validate_text ('lang-xx', args); -- ensure that |text= is set, does not contain italic markup and is protected from improper bolding |
|||
if is_set (msg) then |
|||
return msg; |
|||
end |
end |
||
if args[2] and args.translit then |
if args[2] and args.translit then |
||
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |translit=', args |
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |translit=', args); |
||
else |
else |
||
args.translit = args[2] or args.translit -- prefer args.translit |
args.translit = args[2] or args.translit -- prefer args.translit |
||
Line 665: | Line 752: | ||
if args[3] and (args.translation or args.lit) then |
if args[3] and (args.translation or args.lit) then |
||
return make_error_msg ('{{lang-xx}}: conflicting: {{{3}}} and |lit= or |translation=', args |
return make_error_msg ('{{lang-xx}}: conflicting: {{{3}}} and |lit= or |translation=', args); |
||
elseif args.translation and args.lit then |
elseif args.translation and args.lit then |
||
return make_error_msg ('{{lang-xx}}: conflicting: |lit= and |translation=', args |
return make_error_msg ('{{lang-xx}}: conflicting: |lit= and |translation=', args); |
||
else |
else |
||
args.translation = args[3] or args.translation or args.lit; -- prefer args.translation |
args.translation = args[3] or args.translation or args.lit; -- prefer args.translation |
||
Line 673: | Line 760: | ||
if args.links and args.link then |
if args.links and args.link then |
||
return make_error_msg ('{{lang-xx}}: conflicting: |links= and |link=', args |
return make_error_msg ('{{lang-xx}}: conflicting: |links= and |link=', args); |
||
else |
else |
||
args.link = args.link or args.links; -- prefer args.link |
args.link = args.link or args.links; -- prefer args.link |
||
Line 679: | Line 766: | ||
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
||
local out = {}; |
|||
local language_name; -- used to make display text, article links, and category names |
|||
local subtags = {}; -- IETF subtags script, region, and variant |
|||
local code; -- the language code |
|||
local translit_script; |
|||
local translit; |
|||
local translit_title; |
|||
local msg; -- for error messages |
|||
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private |
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private |
||
if msg then -- if an error detected then there is an error message |
if msg then -- if an error detected then there is an error message |
||
return make_error_msg (table.concat ({'{{lang-xx}}: ', msg}), args |
return make_error_msg (table.concat ({'{{lang-xx}}: ', msg}), args); |
||
end |
end |
||
Line 710: | Line 787: | ||
end |
end |
||
msg = validate_text ('lang-xx', args); -- ensure that |text= is set, does not contain italic markup and is protected from improper bolding |
|||
if is_set (msg) then |
|||
return msg; |
|||
end |
|||
if is_set (subtags.script) then -- if script set override rtl setting |
if is_set (subtags.script) then -- if script set override rtl setting |
||
if in_array (subtags.script, lang_data.rtl_scripts) then |
if in_array (subtags.script, lang_data.rtl_scripts) then |
||
Line 740: | Line 812: | ||
end |
end |
||
end |
end |
||
translit_script = args['translit-script'] or language_name; -- for translit prefer |trans-script= over language |
|||
if 'no' == args.link then |
if 'no' == args.link then |
||
Line 754: | Line 824: | ||
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size)) |
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size)) |
||
-- TODO: make this transl code a separate function so that {{transl}} can use this module? |
-- TODO: make this transl code a separate function so that {{transl}} can use this module? |
||
if is_set (args.translit) then |
if is_set (args.translit) and not is_latn (args.text) then -- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test) |
||
table.insert (out, ', <small>'); |
table.insert (out, ', <small>'); |
||
translit_script_name = lang_name_table.script[args['translit-script']] or language_name; -- if |trans-script= valid ISO 15924 code prefer associated name over language name |
|||
translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name) |
|||
translit_title = mw.title.makeTitle (0, table.concat ({'Romanization of ', translit_script_name})); |
|||
if translit_title.exists and ('no' ~= args.link) then |
if translit_title.exists and ('no' ~= args.link) then |
||
table.insert (out, make_wikilink ('Romanization of ' .. |
table.insert (out, make_wikilink ('Romanization of ' .. translit_script_name or language_name, 'translit.')); -- make a wikilink if there is an article to link to |
||
else |
else |
||
table.insert (out, '<abbr title="transliteration">translit.</abbr>'); |
table.insert (out, '<abbr title="transliteration">translit.</abbr>'); -- else define the abbreviation |
||
end |
end |
||
table.insert (out, ' </small>'); |
table.insert (out, ' </small>'); |
||
Line 767: | Line 838: | ||
table.insert (out, translit); |
table.insert (out, translit); |
||
else |
else |
||
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or 'missing', '\' or transli-script: \'', args['translit-script'] or 'missing', '\''}), args |
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or 'missing', '\' or transli-script: \'', args['translit-script'] or 'missing', '\''}), args); |
||
end |
end |
||
end |
end |
||
Line 790: | Line 861: | ||
--[[--------------------------< L A N G _ X X > |
--[[--------------------------< L A N G _ X X _ I T A L I C >-------------------------------------------------- |
||
Entry point for those {{lang-xx}} templates that have |
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_italic(). Sets the initial |
||
style state to italic. |
|||
lang_xx_italic(). Sets the initial style state to italic. When all {{lang-xx}} template that use this module |
|||
have been converted, this finction can go away. |
|||
]] |
]] |
||
function p.lang_xx_italic (frame) |
|||
initial_style_state = 'italic'; |
|||
return _lang_xx (frame); |
|||
end |
|||
--[[--------------------------< L A N G _ X X _ I |
--[[--------------------------< L A N G _ X X _ I N H E R I T >------------------------------------------------ |
||
Entry point for those {{lang-xx}} templates that have been converted to call |
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_inherit(). Sets the initial |
||
style state to |
style state to inherit. |
||
]] |
]] |
||
function p. |
function p.lang_xx_inherit (frame) |
||
initial_style_state = ' |
initial_style_state = 'inherit'; |
||
return _lang_xx (frame); |
return _lang_xx (frame); |
||
end |
end |
||
Line 818: | Line 888: | ||
--[[--------------------------< L A N G _ X X _ N O R M A L >-------------------------------------------------- |
--[[--------------------------< L A N G _ X X _ N O R M A L >-------------------------------------------------- |
||
DEPRECATED |
|||
TODO: change the {{lang-??}} templates that call lang_xx_normal() to call lang_xx_inherit() |
|||
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_normal(). Sets the initial |
Entry point for those {{lang-xx}} templates that have been converted to call lang_xx_normal(). Sets the initial |
||
Line 825: | Line 898: | ||
function p.lang_xx_normal (frame) |
function p.lang_xx_normal (frame) |
||
initial_style_state = ' |
initial_style_state = 'inherit'; -- inherit here instead of 'normal' because no need to ignore external wiki markup |
||
return _lang_xx (frame); |
return _lang_xx (frame); |
||
end |
end |
||
Line 844: | Line 917: | ||
local msg; -- holds an error message (not used here) if IETF language tag is malformed or invalid |
local msg; -- holds an error message (not used here) if IETF language tag is malformed or invalid |
||
local language_name = ''; |
local language_name = ''; |
||
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code); |
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code); |