Module:Lang: Difference between revisions

+support for private use tags;
No edit summary
(+support for private use tags;)
Line 108:
region subtag - two-letter or three digit IANA region code
variant subtag - four digit or 5-8 alnum variant code
private subtag - x- followed by 1-8 alnum private code; only supported with the primary language tag
 
in any one of these forms
Line 114 ⟶ 115:
lang-region lang-region-variant
lang-script-region lang-script-region-variant
lang-x-private
each of lang, script, region, variant, and variantprivate, when used, must be valid
 
Languages with both two- and three-character code synonyms are promoted to the two-character synonym because
Line 124 ⟶ 126:
because those parameters are superfluous to the IETF subtags in |code=)
 
returns fivesix values. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid
parts are returned as nil; the fifthsixth returned item is an error message (if an error detected) or nil.
 
see http://www.rfc-editor.org/rfc/bcp/bcp47.txt section 2.1
Line 136 ⟶ 138:
local region = '';
local variant = '';
local private = '';
local c;
if not is_set (source) then
return nil, nil, nil, nil, nil, 'missing language tag';
end
 
Line 186 ⟶ 189:
code = source:match ('^(%a%a%a?)$');
 
elseif source:match ('^%a%a%a?%-x%-[%a%d][%a%d]?[%a%d]?[%a%d]?[%a%d]?[%a%d]?[%a%d]?[%a%d]?$') then -- ll-x-pppppppp)
elseif source:match ('^1%a+$') then -- special case for two Linguist list identifiers in Module:Lang/data override table
code, private = source:match ('^(1%a%a%a?)%-x%-([%a%d]+)$'); -- these may need tweaks in future if other alphanumeric Linguist list identifiers are added
 
else
return nil, nil, nil, nil, nil, table.concat ({'unrecognized language tag: ', source}); -- don't know what we got but it is malformed
end
 
Line 196 ⟶ 199:
if not (lang_data.override[code] or lang_name_table.lang[code]) then
return nil, nil, nil, nil, nil, table.concat ({'unrecognized language code: ', code}); -- invalid language code, don't know about the others (don't care?)
end
Line 207 ⟶ 210:
if is_set (script) then
if is_set (args_script) then
return code, nil, nil, nil, nil, 'redundant script tag'; -- both code with script and |script= not allowed
end
else
Line 216 ⟶ 219:
script = script:lower(); -- ensure that we use and return lower case version of this
if not lang_name_table.script[script] then
return code, nil, nil, nil, nil, table.concat ({'unrecognized script: ', script, ' for code: ', code}); -- language code ok, invalid script, don't know about the others (don't care?)
end
end
-- if lang_name_table.suppressed[script] then -- ensure that code-script does not use a suppressed script
-- if in_array (code, lang_name_table.suppressed[script]) then
-- return code, nil, nil, nil, table.concat ({'script: ', script, ' not supported for code: ', code}); -- language code ok, script is suppressed for this code
-- end
-- end
 
if is_set (region) then
if is_set (args_region) then
return code, nil, nil, nil, nil, 'redundant region tag'; -- both code with region and |region= not allowed
end
else
Line 231 ⟶ 239:
region = region:lower(); -- ensure that we use and return lower case version of this
if not lang_name_table.region[region] then
return code, script, nil, nil, nil, table.concat ({'unrecognized region: ', region, ' for code: ', code});
end
end
Line 237 ⟶ 245:
if is_set (variant) then
if is_set (args_variant) then
return code, nil, nil, nil, nil, 'redundant variant tag'; -- both code with variant and |variant= not allowed
end
else
Line 246 ⟶ 254:
variant = variant:lower(); -- ensure that we use and return lower case version of this
if not lang_name_table.variant[variant] then -- make sure variant is valid
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant});
end -- does this duplicate/replace tests in lang() and lang_xx()?
if is_set (script) then -- if script set it must be part of the 'prefix'
if not in_array (table.concat ({code, '-', script}), lang_name_table.variant[variant]['prefixes']) then
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code-script pair: ', code, '-', script});
end
else
if not in_array (code, lang_name_table.variant[variant]['prefixes']) then
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code: ', code});
end
end
end
 
if is_set (private) then
return code, script, region, variant; -- return the good bits
private = private:lower(); -- ensure that we use and return lower case version of this
if not lang_data.override[table.concat ({code, '-x-', private})] then -- make sure private tag is valid; note that index
return code, script, region, nil, nil, table.concat ({'unrecognized private tag: ', private});
end
end
return code, script, region, variant, private, nil; -- return the good bits; make sure that msg is nil
end
 
Line 564 ⟶ 578:
local msg;
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}})
 
if not (code and subtags.script and subtags.region and subtags.variant) then
Line 580 ⟶ 594:
end
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being)
if is_set (msg) then
return msg;
Line 593 ⟶ 607:
end
 
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles; private omitted because private
 
if is_set (subtags.private) and lang_data.override[table.concat ({code, 'x-', subtags.private})] then -- look for private use tags
language_name = lang_data.override[table.concat ({code, 'x-', subtags.private})][1];
elseif lang_data.override[code] then
language_name = lang_data.override[code][1]
elseif lang_name_table.lang[code] then
Line 688 ⟶ 704:
local msg;
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private
 
if not (code and subtags.script and subtags.region and subtags.variant) then
Line 719 ⟶ 735:
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles
 
if is_set (subtags.private) and lang_data.override[argstable.concat ({code:lower(, 'x-', subtags.private})] then -- first look for wholeprivate IETFuse tag in override tabletags
language_name = lang_data.override[argstable.code:lowerconcat ({code, 'x-', subtags.private})][1];
elseif lang_data.override[args.code:lower()] then -- first look for whole IETF tag in override table
language_name = lang_data.override[args.code:lower()][1];
elseif lang_data.override[code] then -- not there so try basic language code
language_name = lang_data.override[code][1];
elseif not is_set (subtags.variant) then
if lang_name_table.lang[code] then
Anonymous user