Module:Lang: Difference between revisions
Jump to navigation
Jump to search
Content added Content deleted
(fix variant length test;) |
(synch from sandbox;) |
||
Line 8: | Line 8: | ||
local p = {}; |
local p = {}; |
||
local initial_style_state; -- set by |
local initial_style_state; -- set by lang_xx_normal() and lang_xx_italic() |
||
local getArgs = require ('Module:Arguments').getArgs; |
local getArgs = require ('Module:Arguments').getArgs; |
||
Line 37: | Line 37: | ||
Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else. |
Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else. |
||
For the purposes of this function, Latn script is characters less control characters from these Unicode 10.0 Character Code Charts: |
|||
[http://www.unicode.org/charts/PDF/U0000.pdf C0 Controls and Basic Latin] U+0020–U+007E (20 - 7E) + see note about <poem>...</poem> support |
|||
[http://www.unicode.org/charts/PDF/U0080.pdf C1 Controls and Latin-1 Supplement] U+00A0-U+00AC, U+00C0–U+00FF (C2 A0 - C2 AC, C3 80 - C3 BF: \194\160-\194\172) |
|||
[http://www.unicode.org/charts/PDF/U0100.pdf Latin Extended-A] U+0100–U+017F (C4 80 - C5 BF) |
|||
[http://www.unicode.org/charts/PDF/U0180.pdf Latin Extended-B] U+0180–U+024F (C6 80 - C9 8F) |
|||
[http://www.unicode.org/charts/PDF/U1E00.pdf Latin Extended Additional] U+1E00-U+1EFF (E1 B8 80 - E1 BB BF) |
|||
[http://www.unicode.org/charts/PDF/U2C60.pdf Latin Extended-C] U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF) |
|||
[http://www.unicode.org/charts/PDF/UA720.pdf Latin Extended-D] U+A720-U+A7FF (EA 9C A0 - EA 9F BF) |
|||
[http://www.unicode.org/charts/PDF/UAB30.pdf Latin Extended-E] U+AB30-U+AB6F (EA AC B0 - EA AD AF) |
|||
[http://www.unicode.org/charts/PDF/UFB00.pdf Alphabetic Presentaion Forms] U+FB00-U+FB06 (EF AC 80 - EF AC 86) |
|||
[http://www.unicode.org/charts/PDF/UFF00.pdf Halfwidth and Fullwidth Forms] U+FF01-U+FF3C (EF BC 81 EF BC BC) |
|||
does not include: |
|||
[http://www.unicode.org/charts/PDF/U1D00.pdf Phonetic Extensions] U+1D00-U+1D7F (E1 B4 80 - E1 B5 BF) |
|||
[http://www.unicode.org/charts/PDF/U0250.pdf IPA Extensions] U+0250-U+02AF (C9 90 - CA AF) |
|||
[http://www.unicode.org/charts/PDF/U1D80.pdf Phonetic Extensions Supplement] U+1D80-U+1DBF (E1 B6 80 - E1 B6 BF) |
|||
{{lang}} is used inside <poem>...</poem> tags for song lyrics, poetry, etc. <poem>...</poem> replaces newlines with |
|||
poem stripmarkers. These have the form: |
|||
?'"`UNIQ--poem-67--QINU`"'? |
|||
where the '?' character is actually the delete character (U+007F, \127). Including the '\n' (U+0010) and 'del' (U+007F) |
|||
characters in the latn character table allows {{lang}} to auto-italicize text within <poem>...</poem> tags. |
|||
]] |
]] |
||
p.is_latn = require ('Module:Unicode data').is_Latin |
|||
function p.is_latn (text) |
|||
local latn = table.concat ( |
|||
{ |
|||
'[', -- this is a set so include opening bracket |
|||
'\n\32-\127', -- C0 Controls and Basic Latin U+0020–U+007E (20 - 7E) + (U+0010 and U+007F <poem>...</poem> support) |
|||
'\194\160-\194\172', -- C1 Controls and Latin-1 Supplement U+00A0-U+00AC (C2 A0 - C2 AC) |
|||
'\195\128-\195\191', -- (skip shy) U+00C0–U+00FF (C3 80 - C3 BF) |
|||
'\196\128-\197\191', -- Latin Extended-A U+0100–U+017F (C4 80 - C5 BF) |
|||
'\198\128-\201\143', -- Latin Extended-B U+0180–U+024F (C6 80 - C9 8F) |
|||
'\225\184\128-\225\187\191', -- Latin Extended Additional U+1E00-U+1EFF (E1 B8 80 - E1 BB BF) |
|||
'\226\177\160-\226\177\191', -- Latin Extended-C U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF) |
|||
'\234\156\160-\234\159\191', -- Latin Extended-D U+A720-U+A7FF (EA 9C A0 - EA 9F BF) |
|||
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF) |
|||
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86) |
|||
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 - EF BC BC) |
|||
'–', -- ndash |
|||
'—', -- mdash |
|||
'«', '»', -- guillemets commonly used in several 'Latn' languages |
|||
']', -- close the set |
|||
}); |
|||
text = mw.text.decode (text, true); -- replace numeric and named html entities with their unicode characters |
|||
text = mw.ustring.gsub (text, '%[%[[^|]+|([^%]]+)%]%]', '%1'); -- remove the link and markup from complex wikilink in case interwiki to non-Latn wikipedia |
|||
return not is_set (mw.ustring.gsub (text, latn, '')); -- replace all latn characters with empty space; if result is all empty space, text is latn |
|||
end |
|||
Line 162: | Line 115: | ||
]] |
]] |
||
local function validate_italic ( |
local function validate_italic (args) |
||
local properties = {['yes'] = 'italic', ['no'] = 'normal', ['unset'] = 'inherit', ['invert'] = 'invert', ['default'] = nil}; |
local properties = {['yes'] = 'italic', ['no'] = 'normal', ['unset'] = 'inherit', ['invert'] = 'invert', ['default'] = nil}; |
||
local count = 0 |
|||
for _, arg in pairs {'italic', 'italics', 'i'} do |
|||
if args[arg] then |
|||
count = count + 1 |
|||
end |
|||
end |
|||
if |
if count > 1 then -- return nil and an error message if more than one is set |
||
return nil, ' |
return nil, 'only one of |italic=, |italics=, or |i= can be specified'; |
||
end |
end |
||
return properties[italic or italics], nil; |
return properties[args.italic or args.italics or args.i], nil; -- return an appropriate value and a nil error message |
||
end |
|||
--[=[--------------------------< V A L I D A T E _ C A T _ A R G S >---------------------------------------------------------- |
|||
Default behavior of the {{lang}} and {{lang-xx}} templates is to add categorization when the templates are used in mainspace. |
|||
This default functionality may be suppressed by setting |nocat=yes or |cat=no. This function selects one of these two parameters |
|||
to control categorization. |
|||
Because having two parameters with 'opposite' names and 'opposite' values is confusing, this function accepts only affirmative |
|||
values for |nocat= and only negative values for |cat=; in both cases the 'other' sense (and non-sense) is not accepted and the |
|||
parameter is treated as if it were not set in the template. |
|||
Sets args.nocat to true if categorization is to be turned off; to nil if the default behavior should apply. |
|||
Accepted values for |nocat= are the text strings: |
|||
'yes', 'y', 'true', 't', on, '1' -- [[Module:Yesno]] returns logical true for all of these; false or nil else |
|||
for |cat= |
|||
'no', 'n', 'false', 'f', 'off', '0' -- [[Module:Yesno]] returns logical false for all of these; true or nil else |
|||
]=] |
|||
local function validate_cat_args (args) |
|||
if not (args.nocat or args.cat) then -- both are nil, so categorize |
|||
return; |
|||
end |
|||
local yesno = require "Module:Yesno"; |
|||
if false == yesno (args.cat) or true == yesno (args.nocat) then |
|||
args.nocat = true; -- set to true when args.nocat is affirmative; nil else (as if the parameter were not set in the template) |
|||
else -- args.nocat is the parameter actually used. |
|||
args.nocat = nil; |
|||
end |
|||
end |
end |
||
Line 248: | Line 241: | ||
because those parameters are superfluous to the IETF subtags in |code=) |
because those parameters are superfluous to the IETF subtags in |code=) |
||
returns six values. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid |
returns six values; all lower case. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid |
||
parts are returned as nil; the sixth returned item is an error message (if an error detected) or nil. |
parts are returned as nil; the sixth returned item is an error message (if an error detected) or nil. |
||
Line 270: | Line 263: | ||
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-%d%d%d%d$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits) |
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-%d%d%d%d$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits) |
||
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$'); |
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a%- |
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a%-%w%w%w%w%w+$') then -- ll-Ssss-RR-variant (where variant is 5-8 alnum characters) |
||
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-( |
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%- |
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-%w%w%w%w%w+$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters) |
||
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-( |
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%d$') then |
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%d$') then -- ll-Ssss-variant (where variant is 4 digits) |
||
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$'); |
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%a%a%- |
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%w%w%w%w%w+$') then -- ll-Ssss-variant (where variant is 5-8 alnum characters) |
||
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-( |
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%w%w%w%w%w%w?%w?%w?)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%-%d%d%d%d$') then |
elseif source:match ('^%a%a%a?%-%a%a%-%d%d%d%d$') then -- ll-RR-variant (where variant is 4 digits) |
||
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$'); |
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?%-%d%d%d%-%d%d%d%d$') then |
elseif source:match ('^%a%a%a?%-%d%d%d%-%d%d%d%d$') then -- ll-DDD-variant (where region is 3 digits; variant is 4 digits) |
||
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$'); |
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%- |
elseif source:match ('^%a%a%a?%-%a%a%-%w%w%w%w%w+$') then -- ll-RR-variant (where variant is 5-8 alnum characters) |
||
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-( |
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$'); |
||
elseif source:match ('^%a%a%a?%-%d%d%d%- |
elseif source:match ('^%a%a%a?%-%d%d%d%-%w%w%w%w%w+$') then -- ll-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters) |
||
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-( |
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$'); |
||
elseif source:match ('^%a%a%a?%-%d%d%d%d$') then -- ll-variant (where variant is 4 digits) |
elseif source:match ('^%a%a%a?%-%d%d%d%d$') then -- ll-variant (where variant is 4 digits) |
||
code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$'); |
code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?%- |
elseif source:match ('^%a%a%a?%-%w%w%w%w%w+$') then -- ll-variant (where variant is 5-8 alnum characters) |
||
code, variant = source:match ('^(%a%a%a?)%-( |
code, variant = source:match ('^(%a%a%a?)%-(%w%w%w%w%w%w?%w?%w?)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a$') then -- ll-Ssss-RR |
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a$') then -- ll-Ssss-RR |
||
Line 299: | Line 292: | ||
code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$'); |
code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a%a%a$') then |
elseif source:match ('^%a%a%a?%-%a%a%a%a$') then -- ll-Ssss |
||
code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$'); |
code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$'); |
||
elseif source:match ('^%a%a%a?%-%a%a$') then |
elseif source:match ('^%a%a%a?%-%a%a$') then -- ll-RR |
||
code, region = source:match ('^(%a%a%a?)%-(%a%a)$'); |
code, region = source:match ('^(%a%a%a?)%-(%a%a)$'); |
||
elseif source:match ('^%a%a%a?%-%d%d%d$') then |
elseif source:match ('^%a%a%a?%-%d%d%d$') then -- ll-DDD (region is 3 digits) |
||
code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$'); |
code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$'); |
||
elseif source:match ('^%a%a%a?$') then |
elseif source:match ('^%a%a%a?$') then -- ll |
||
code = source:match ('^(%a%a%a?)$'); |
code = source:match ('^(%a%a%a?)$'); |
||
elseif source:match ('^%a%a%a?%-x%- |
elseif source:match ('^%a%a%a?%-x%-%w%w?%w?%w?%w?%w?%w?%w?$') then -- ll-x-pppppppp) |
||
code, private = source:match ('^(%a%a%a?)%-x%-( |
code, private = source:match ('^(%a%a%a?)%-x%-(%w+)$'); |
||
else |
else |
||
Line 420: | Line 413: | ||
table.insert (out, '</span>'); |
table.insert (out, '</span>'); |
||
if (0 == namespace) and not |
if (0 == namespace) and not args.nocat then -- only categorize in article space |
||
table.insert (out, table.concat ({'[[Category:', category, ' template errors]]'})); |
table.insert (out, table.concat ({'[[Category:', category, ' template errors]]'})); |
||
end |
end |
||
Line 666: | Line 659: | ||
Italic rendering is controlled by the |italic= template parameter so italic markup should never appear in args.text |
Italic rendering is controlled by the |italic= template parameter so italic markup should never appear in args.text |
||
either as ''itself''' or as '''''bold italic'''''. |
either as ''itself''' or as '''''bold italic''''' unless |italic=unset or |italic=invert. |
||
]=] |
]=] |
||
Line 679: | Line 672: | ||
end |
end |
||
local style = args.italic |
local style = args.italic; |
||
-- if ('unset' ~= args.italic) and ('unset' ~= args.italics) then -- allow italic markup when |italic=unset or |italics=unset |
|||
if ('unset' ~= style) and ('invert' ~=style) then |
if ('unset' ~= style) and ('invert' ~=style) then |
||
if args.text:find ("%f[\']\'\'%f[^\']") or args.text:find ("%f[\']\'\'\'\'\'%f[^\']") then -- italic but not bold, or bold italic |
if args.text:find ("%f[\']\'\'%f[^\']") or args.text:find ("%f[\']\'\'\'\'\'%f[^\']") then -- italic but not bold, or bold italic |
||
Line 707: | Line 699: | ||
end |
end |
||
if (0 < #maint_cats) and (0 == namespace) and not |
if (0 < #maint_cats) and (0 == namespace) and not nocat then -- when there are maintenance categories; article namespace only |
||
for _, cat in ipairs (maint_cats) do |
for _, cat in ipairs (maint_cats) do |
||
table.insert (maint, table.concat ({'[[Category:', cat, ']]'})); -- format and add the categories |
table.insert (maint, table.concat ({'[[Category:', cat, ']]'})); -- format and add the categories |
||
Line 769: | Line 761: | ||
return msg; |
return msg; |
||
end |
end |
||
validate_cat_args (args); -- determine if categorization should be suppressed |
|||
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
||
Line 778: | Line 772: | ||
end |
end |
||
args.italic, msg = validate_italic (args |
args.italic, msg = validate_italic (args); |
||
if msg then |
if msg then |
||
return make_error_msg (msg, args, 'lang'); |
return make_error_msg (msg, args, 'lang'); |
||
Line 909: | Line 903: | ||
args.link = args.link or args.links; -- prefer args.link |
args.link = args.link or args.links; -- prefer args.link |
||
end |
end |
||
validate_cat_args (args); -- determine if categorization should be suppressed |
|||
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false |
||
Line 918: | Line 914: | ||
end |
end |
||
args.italic, msg = validate_italic (args |
args.italic, msg = validate_italic (args); |
||
if msg then |
if msg then |
||
return make_error_msg (msg, args, 'lang-xx'); |
return make_error_msg (msg, args, 'lang-xx'); |
||
Line 1,065: | Line 1,061: | ||
All code combinations supported by {{lang}} and the {{lang-xx}} templates are supported by this function. |
All code combinations supported by {{lang}} and the {{lang-xx}} templates are supported by this function. |
||
Set invoke's |link= parameter to yes to get wikilinked version of the language name. |
|||
]] |
]] |
||
Line 1,070: | Line 1,068: | ||
function p.name_from_code (frame) |
function p.name_from_code (frame) |
||
local subtags = {}; -- IETF subtags script, region, variant, and private |
local subtags = {}; -- IETF subtags script, region, variant, and private |
||
local raw_code = |
local raw_code = frame.args[1]; -- save a copy of the input IETF subtag |
||
local link = 'yes' == frame.args['link']; -- save a copy of the link-enable positional parameter (value can be anything) |
|||
local code; -- the language code |
local code; -- the language code |
||
local msg; -- |
local msg; -- gets an error message if IETF language tag is malformed or invalid |
||
local language_name = ''; |
local language_name = ''; |
||
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code); |
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code); |
||
if msg then |
if msg then |
||
local template = (frame.args['template'] and table.concat ({'{{', frame.args['template'], '}}: '})) or ''; -- make template name (if provided by the template) |
|||
return ''; |
|||
return table.concat ({'<span style=\"font-size:100%; font-style:normal;\" class=\"error\">error: ', template, msg, '</span>'}); |
|||
end |
end |
||
Line 1,095: | Line 1,095: | ||
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them |
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them |
||
if link then -- when |link=yes, wikilink the language name |
|||
if language_name:find ('languages') then |
|||
return make_wikilink (language_name); -- collective language name uses simple wikilink |
|||
else |
|||
return make_wikilink (language_name .. ' language', language_name); -- language name with wikilink |
|||
end |
|||
end |
|||
return language_name; |
return language_name; |
||
Line 1,103: | Line 1,111: | ||
--[[--------------------------< T R A N S L >------------------------------------------------------------------ |
--[[--------------------------< T R A N S L >------------------------------------------------------------------ |
||
Entry point for {{transl}} |
|||
]] |
]] |
||
Line 1,145: | Line 1,153: | ||
end |
end |
||
args.italic, msg = validate_italic (args |
args.italic, msg = validate_italic (args); |
||
if msg then |
if msg then |
||
return make_error_msg (msg, args, 'transl'); |
return make_error_msg (msg, args, 'transl'); |