Module:Lang: Difference between revisions

Jump to navigation Jump to search
1,683 bytes removed ,  5 years ago
synch from sandbox;
(fix variant length test;)
(synch from sandbox;)
Line 8:
local p = {};
 
local initial_style_state; -- set by lang_xx_inheritlang_xx_normal() and lang_xx_italic()
 
local getArgs = require ('Module:Arguments').getArgs;
Line 37:
 
Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else.
 
For the purposes of this function, Latn script is characters less control characters from these Unicode 10.0 Character Code Charts:
[http://www.unicode.org/charts/PDF/U0000.pdf C0 Controls and Basic Latin] U+0020–U+007E (20 - 7E) + see note about <poem>...</poem> support
[http://www.unicode.org/charts/PDF/U0080.pdf C1 Controls and Latin-1 Supplement] U+00A0-U+00AC, U+00C0–U+00FF (C2 A0 - C2 AC, C3 80 - C3 BF: \194\160-\194\172)
[http://www.unicode.org/charts/PDF/U0100.pdf Latin Extended-A] U+0100–U+017F (C4 80 - C5 BF)
[http://www.unicode.org/charts/PDF/U0180.pdf Latin Extended-B] U+0180–U+024F (C6 80 - C9 8F)
[http://www.unicode.org/charts/PDF/U1E00.pdf Latin Extended Additional] U+1E00-U+1EFF (E1 B8 80 - E1 BB BF)
[http://www.unicode.org/charts/PDF/U2C60.pdf Latin Extended-C] U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF)
[http://www.unicode.org/charts/PDF/UA720.pdf Latin Extended-D] U+A720-U+A7FF (EA 9C A0 - EA 9F BF)
[http://www.unicode.org/charts/PDF/UAB30.pdf Latin Extended-E] U+AB30-U+AB6F (EA AC B0 - EA AD AF)
[http://www.unicode.org/charts/PDF/UFB00.pdf Alphabetic Presentaion Forms] U+FB00-U+FB06 (EF AC 80 - EF AC 86)
[http://www.unicode.org/charts/PDF/UFF00.pdf Halfwidth and Fullwidth Forms] U+FF01-U+FF3C (EF BC 81 EF BC BC)
 
does not include:
[http://www.unicode.org/charts/PDF/U1D00.pdf Phonetic Extensions] U+1D00-U+1D7F (E1 B4 80 - E1 B5 BF)
[http://www.unicode.org/charts/PDF/U0250.pdf IPA Extensions] U+0250-U+02AF (C9 90 - CA AF)
[http://www.unicode.org/charts/PDF/U1D80.pdf Phonetic Extensions Supplement] U+1D80-U+1DBF (E1 B6 80 - E1 B6 BF)
 
{{lang}} is used inside <poem>...</poem> tags for song lyrics, poetry, etc. <poem>...</poem> replaces newlines with
poem stripmarkers. These have the form:
?'"`UNIQ--poem-67--QINU`"'?
where the '?' character is actually the delete character (U+007F, \127). Including the '\n' (U+0010) and 'del' (U+007F)
characters in the latn character table allows {{lang}} to auto-italicize text within <poem>...</poem> tags.
 
]]
 
p.is_latn = require ('Module:Unicode data').is_Latin
function p.is_latn (text)
local latn = table.concat (
{
'[', -- this is a set so include opening bracket
'\n\32-\127', -- C0 Controls and Basic Latin U+0020–U+007E (20 - 7E) + (U+0010 and U+007F <poem>...</poem> support)
'\194\160-\194\172', -- C1 Controls and Latin-1 Supplement U+00A0-U+00AC (C2 A0 - C2 AC)
'\195\128-\195\191', -- (skip shy) U+00C0–U+00FF (C3 80 - C3 BF)
'\196\128-\197\191', -- Latin Extended-A U+0100–U+017F (C4 80 - C5 BF)
'\198\128-\201\143', -- Latin Extended-B U+0180–U+024F (C6 80 - C9 8F)
'\225\184\128-\225\187\191', -- Latin Extended Additional U+1E00-U+1EFF (E1 B8 80 - E1 BB BF)
'\226\177\160-\226\177\191', -- Latin Extended-C U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF)
'\234\156\160-\234\159\191', -- Latin Extended-D U+A720-U+A7FF (EA 9C A0 - EA 9F BF)
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF)
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86)
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 - EF BC BC)
'–', -- ndash
'—', -- mdash
'«', '»', -- guillemets commonly used in several 'Latn' languages
']', -- close the set
});
 
text = mw.text.decode (text, true); -- replace numeric and named html entities with their unicode characters
text = mw.ustring.gsub (text, '%[%[[^|]+|([^%]]+)%]%]', '%1'); -- remove the link and markup from complex wikilink in case interwiki to non-Latn wikipedia
return not is_set (mw.ustring.gsub (text, latn, '')); -- replace all latn characters with empty space; if result is all empty space, text is latn
end
 
 
Line 162 ⟶ 115:
]]
 
local function validate_italic (italic, italicsargs)
local properties = {['yes'] = 'italic', ['no'] = 'normal', ['unset'] = 'inherit', ['invert'] = 'invert', ['default'] = nil};
local count = 0
for _, arg in pairs {'italic', 'italics', 'i'} do
if args[arg] then
count = count + 1
end
end
 
if italiccount and> italics1 then -- return nil and an error message if bothmore than one areis set
return nil, 'conflicting:only one of &#124;italic= and, &#124;italics=, or &#124;i= can be specified';
end
return properties[args.italic or args.italics or args.i], nil; -- return an appropriate value and a nil error message
end
 
 
--[=[--------------------------< V A L I D A T E _ C A T _ A R G S >----------------------------------------------------------
 
Default behavior of the {{lang}} and {{lang-xx}} templates is to add categorization when the templates are used in mainspace.
This default functionality may be suppressed by setting |nocat=yes or |cat=no. This function selects one of these two parameters
to control categorization.
 
Because having two parameters with 'opposite' names and 'opposite' values is confusing, this function accepts only affirmative
values for |nocat= and only negative values for |cat=; in both cases the 'other' sense (and non-sense) is not accepted and the
parameter is treated as if it were not set in the template.
 
Sets args.nocat to true if categorization is to be turned off; to nil if the default behavior should apply.
 
Accepted values for |nocat= are the text strings:
'yes', 'y', 'true', 't', on, '1' -- [[Module:Yesno]] returns logical true for all of these; false or nil else
for |cat=
'no', 'n', 'false', 'f', 'off', '0' -- [[Module:Yesno]] returns logical false for all of these; true or nil else
 
]=]
 
local function validate_cat_args (args)
if not (args.nocat or args.cat) then -- both are nil, so categorize
return;
end
local yesno = require "Module:Yesno";
if false == yesno (args.cat) or true == yesno (args.nocat) then
args.nocat = true; -- set to true when args.nocat is affirmative; nil else (as if the parameter were not set in the template)
else -- args.nocat is the parameter actually used.
args.nocat = nil;
end
end
 
Line 248 ⟶ 241:
because those parameters are superfluous to the IETF subtags in |code=)
 
returns six values; all lower case. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid
parts are returned as nil; the sixth returned item is an error message (if an error detected) or nil.
 
Line 270 ⟶ 263:
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-%d%d%d%d$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$');
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a%-[%aw%d][w%aw%d][w%a%d][%a%d][%a%d]w+$') then -- ll-Ssss-RR-variant (where variant is 5-8 alnum characters)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-([%aw%d][w%aw%d][w%aw%d][%a%d][%a%d][%a%d]w?[%a%d]w?[%a%d]w?)$');
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-[%aw%d][w%aw%d][w%a%d][%a%d][%a%d]w+$') then -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-([%aw%d][w%aw%d][w%aw%d][%a%d][%a%d][%a%d]w?[%a%d]w?[%a%d]w?)$');
 
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%d$') then -- ll-Ssss-variant (where variant is 4 digits)
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$');
elseif source:match ('^%a%a%a?%-%a%a%a%a%-[%aw%d][w%aw%d][w%a%d][%a%d][%a%d]w+$') then -- ll-Ssss-variant (where variant is 5-8 alnum characters)
code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-([%aw%d][w%aw%d][w%aw%d][%a%d][%a%d][%a%d]w?[%a%d]w?[%a%d]w?)$');
 
elseif source:match ('^%a%a%a?%-%a%a%-%d%d%d%d$') then -- ll-RR-variant (where variant is 4 digits)
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$');
elseif source:match ('^%a%a%a?%-%d%d%d%-%d%d%d%d$') then -- ll-DDD-variant (where region is 3 digits; variant is 4 digits)
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$');
elseif source:match ('^%a%a%a?%-%a%a%-[%aw%d][w%aw%d][w%a%d][%a%d][%a%d]w+$') then -- ll-RR-variant (where variant is 5-8 alnum characters)
code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-([%aw%d][w%aw%d][w%aw%d][%a%d][%a%d][%a%d]w?[%a%d]w?[%a%d]w?)$');
elseif source:match ('^%a%a%a?%-%d%d%d%-[%aw%d][w%aw%d][w%a%d][%a%d][%a%d]w+$') then -- ll-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-([%aw%d][w%aw%d][w%aw%d][%a%d][%a%d][%a%d]w?[%a%d]w?[%a%d]w?)$');
 
elseif source:match ('^%a%a%a?%-%d%d%d%d$') then -- ll-variant (where variant is 4 digits)
code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$');
elseif source:match ('^%a%a%a?%-[%aw%d][w%aw%d][w%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?w+$') then -- ll-variant (where variant is 5-8 alnum characters)
code, variant = source:match ('^(%a%a%a?)%-([%aw%d][w%aw%d][w%aw%d][%a%d][%a%d][%a%d]w?[%a%d]w?[%a%d]w?)$');
 
elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a$') then -- ll-Ssss-RR
Line 299 ⟶ 292:
code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$');
 
elseif source:match ('^%a%a%a?%-%a%a%a%a$') then -- ll-Ssss
code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$');
 
elseif source:match ('^%a%a%a?%-%a%a$') then -- ll-RR
code, region = source:match ('^(%a%a%a?)%-(%a%a)$');
elseif source:match ('^%a%a%a?%-%d%d%d$') then -- ll-DDD (region is 3 digits)
code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$');
 
elseif source:match ('^%a%a%a?$') then -- ll
code = source:match ('^(%a%a%a?)$');
 
elseif source:match ('^%a%a%a?%-x%-[%aw%d][%a%d]w?[%a%d]w?[%a%d]w?[%a%d]w?[%a%d]w?[%a%d]w?[%a%d]w?$') then -- ll-x-pppppppp)
code, private = source:match ('^(%a%a%a?)%-x%-([%a%d]w+)$');
 
else
Line 420 ⟶ 413:
table.insert (out, '</span>');
if (0 == namespace) and not is_set (args.nocat) then -- only categorize in article space
table.insert (out, table.concat ({'[[Category:', category, ' template errors]]'}));
end
Line 666 ⟶ 659:
 
Italic rendering is controlled by the |italic= template parameter so italic markup should never appear in args.text
either as ''itself''' or as '''''bold italic''''' unless |italic=unset or |italic=invert.
 
]=]
Line 679 ⟶ 672:
end
 
local style = args.italic or args.italics;
 
-- if ('unset' ~= args.italic) and ('unset' ~= args.italics) then -- allow italic markup when |italic=unset or |italics=unset
if ('unset' ~= style) and ('invert' ~=style) then
if args.text:find ("%f[\']\'\'%f[^\']") or args.text:find ("%f[\']\'\'\'\'\'%f[^\']") then -- italic but not bold, or bold italic
Line 707 ⟶ 699:
end
if (0 < #maint_cats) and (0 == namespace) and not is_set (nocat) then -- when there are mainenancemaintenance categories; article namespace only
for _, cat in ipairs (maint_cats) do
table.insert (maint, table.concat ({'[[Category:', cat, ']]'})); -- format and add the categories
Line 769 ⟶ 761:
return msg;
end
 
validate_cat_args (args); -- determine if categorization should be suppressed
 
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false
Line 778 ⟶ 772:
end
 
args.italic, msg = validate_italic (args.italic, args.italics);
if msg then
return make_error_msg (msg, args, 'lang');
Line 909 ⟶ 903:
args.link = args.link or args.links; -- prefer args.link
end
 
validate_cat_args (args); -- determine if categorization should be suppressed
 
args.rtl = args.rtl == 'yes'; -- convert to boolean: 'yes' -> true, other values -> false
Line 918 ⟶ 914:
end
args.italic, msg = validate_italic (args.italic, args.italics);
if msg then
return make_error_msg (msg, args, 'lang-xx');
Line 1,065 ⟶ 1,061:
 
All code combinations supported by {{lang}} and the {{lang-xx}} templates are supported by this function.
 
Set invoke's |link= parameter to yes to get wikilinked version of the language name.
 
]]
Line 1,070 ⟶ 1,068:
function p.name_from_code (frame)
local subtags = {}; -- IETF subtags script, region, variant, and private
local raw_code = (frame.args and frame.args[1]) or frame; -- save a copy of the input IETF subtag
local link = 'yes' == frame.args['link']; -- save a copy of the link-enable positional parameter (value can be anything)
local code; -- the language code
local msg; -- holdsgets an error message (not used here) if IETF language tag is malformed or invalid
local language_name = '';
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code);
if msg then
local template = (frame.args['template'] and table.concat ({'{{', frame.args['template'], '}}: '})) or ''; -- make template name (if provided by the template)
return '';
return table.concat ({'<span style=\"font-size:100%; font-style:normal;\" class=\"error\">error: ', template, msg, '</span>'});
end
 
Line 1,095:
 
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them
 
if link then -- when |link=yes, wikilink the language name
if language_name:find ('languages') then
return make_wikilink (language_name); -- collective language name uses simple wikilink
else
return make_wikilink (language_name .. ' language', language_name); -- language name with wikilink
end
end
 
return language_name;
Line 1,103 ⟶ 1,111:
--[[--------------------------< T R A N S L >------------------------------------------------------------------
 
ProspectiveEntry replacementpoint for the template {{transl}}
 
]]
Line 1,145 ⟶ 1,153:
end
 
args.italic, msg = validate_italic (args.italic, args.italics);
if msg then
return make_error_msg (msg, args, 'transl');
Anonymous user
Cookies help us deliver our services. By using our services, you agree to our use of cookies.

Navigation menu