Module:Lang: Difference between revisions

synch from sandbox;
(+name_from_code() for documentation and perhaps other purposes;)
(synch from sandbox;)
Line 60:
 
return properties[italic];
end
 
 
--[[--------------------------< T O _ B O O L E A N >----------------------------------------------------------
 
Returns true for 'yes', false for 'no', and nil for any other value.
 
]]
 
local function to_boolean (param)
if param == 'yes' then
return true
elseif param == 'no' then
return false
else
return nil
end
end
 
Line 111 ⟶ 94:
local function format_ietf_tag (code, script, region, variant)
local out = {};
 
local c;
table.insert (out, code:lower());
if is_set (script) then
cscript = script:matchlower():gsub ('^%a'):, string.upper(); -- make script sentence case
script = script:lower():gsub ('^%a', c, 1);
table.insert (out, script);
end
Line 308 ⟶ 289:
 
 
--[=[--------------------------< M A K E _ E R R O R _ M S G >--------------------------------------------------
 
assembles an error message from message text, then adds a help link and error category.
]=]
 
]]
 
local function make_error_msg (msg, nocat)
Line 373 ⟶ 356:
table.insert (span, text); -- insert the text
 
-- if is_set (size) then -- {{lang}} only
-- table.insert (span, table.concat ({' style="font-size:', size, ';"'}))
-- end
-- table.insert (span, '>'); -- close the opening span tag
-- if italic then
-- table.insert (span, table.concat ({"''", text, "''"})); -- text with italic markup
-- else
-- table.insert (span, text); -- DEFAULT: text is not italicized
-- end
table.insert (span, '</span>'); -- close the span
if rtl then
Line 394 ⟶ 368:
 
TODO: figure out how to correctly support collective language codes: sem, Semitic languages (collective names
appear to always include the word 'languages').. May need new categories so that the category names are sensible.
 
]]
Line 507 ⟶ 481:
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args.nocat);
end
end
if args.italic then -- protect single quote marks from being converted to bold markup
args.text = args.text:gsub ("^\'[^\']+", "<span></span>%1"); -- leading single quote mark
args.text = args.text:gsub ("[^\']+\'$", "%1<span></span>"); -- trailing single quote mark
end
end
 
 
--[[--------------------------< S U B T A G _ C H E C K >------------------------------------------------------
 
checks the subtags: script, region, and variant to be valid and in agreement with any tags that are included in
the main language code: if |script=Cyrl and language code is 'abq-Cyrl' the two script must agree (case insensitive)
 
returns the selected subtag and nil error message;
returns empty string and nil error message when both language code subtag and matching subtag parameter are not set
returns nil and error message else
 
TODO: this not required any longer? Parameter subtags |script=, |region=, and |variant= are all consolidated
with code subtags, and validated, in get_ietf_parts().
 
]]
 
local function subtag_check (name, args_code, code_subtag, args_subtag, nocat)
 
if not is_set (code_subtag) and not is_set (args_subtag) then -- no subtags, then bale
return ''; -- empty string for concatenation
end
args_subtag = args_subtag and args_subtag:lower(); -- so we only need do this once; prettify later
if not is_set (code_subtag) then -- if no ietf subtag in args.code
if is_set (args_subtag) then -- and if |<name>= has a value
if lang_name_table[name][args_subtag] then -- and that value is legitimate subtag
code_subtag = args_subtag; -- then use |<name>=
else
return nil, make_error_msg (table.concat ({'{{lang-xx}}: invalid ' .. name .. ': ', args_subtag}), nocat);
end
end
else -- here when language code has a subtag
if is_set (args_subtag) and (code_subtag ~= args_subtag) then -- if there is a subtag parameter then it must match
return nil, make_error_msg (table.concat ({'{{lang-xx}}: code / ' .. name .. ' mismatch: ', args_code:lower(), ' / ', args_subtag}), nocat);
end
end
 
return code_subtag;
end
 
Line 558 ⟶ 488:
 
render mainenance messages and categories
 
]]
 
Line 563 ⟶ 494:
local maint = {};
if 0 < #maint_msgs then -- when there are maintenance messages
table.insert (maint, table.concat ({'<span class="lang-comment" style="font-style:normal; display:none; color:#33aa33; margin-left:0.3em">'})); -- opening <span> tag
for _, msg in ipairs (maint_msgs) do
table.insert (maint, table.concat ({msg, ' '})); -- add message strings
end
table.insert (maint, '</span>'); -- close the span
end
if (0 < #maint_cats) and (0 == namespace) and not is_set (nocat) then -- when there are mainenance categories; article namespace only
for _, cat in ipairs (maint_cats) do
table.insert (maint, table.concat ({'[[Category:', cat, ']]'})); -- format and add the categories
end
end
Line 583 ⟶ 514:
--[[--------------------------< L A N G >----------------------------------------------------------------------
 
entry point for {{lang}}
<includeonly>{{#invoke:lang|lang|code={{{1|}}}|text={{{2|}}}|rtl={{{rtl|}}}|italic={{{italic|}}}|size={{{size|}}}|nocat={{{nocat|}}}}}</includeonly>
 
there should be no reason to set parameters in the {{lang}} {{#invoke:}}
|code = the BCP47 language code
<includeonly>{{#invoke:lang|lang}}</includeonly>
|text = the displayed text in language specified by code
 
|rtl = boolean true identifies the language specified by code as a right-to-left language
parameters are recieved from the template's frame (parent frame)
|size = css keyword appropriate for use with css font-size:<size>
|nocat = boolean true inhibits normal categorization; error categories are not affected
 
]]
Line 599 ⟶ 529:
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |code=', args.nocat);
else
args.code = args[1] or args.code; -- prefer args.code
end
 
Line 605 ⟶ 535:
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |text=', args.nocat);
else
args.text = args[2] or args.text; -- prefer args.text
end
Line 611 ⟶ 541:
 
local out = {};
local language_name; -- used to make category names
local subtags = {}; -- IETF subtags script, region, variant, and private
local code; -- the language code
local msg; -- for error messages
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}})
 
if msg then
if not (code and subtags.script and subtags.region and subtags.variant) then
return make_error_msg (table.concat ({'{{lang}}: ', msg}), args.nocat);
end
 
args.italic = to_boolean(args.italic); -- convert to boolean or nil: 'yes' -> true, 'no' -> false; else nil
args.italic = validate_italic (args.italic); -- nil or font-style property value
 
if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls
if 'latn' == subtags.script then -- script set to latn
-- args.italic = true; -- DEFAULT for {{lang}} templates is upright; but if latn script
args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic
else
-- args.italic = false; -- italic not set; script not latn
args.italic = 'normal'; -- italic not set; script not latn; set for font-style:normal
end
end
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being)
if is_set (msg) then -- msg is an already-formatted error message
return msg;
end
 
if is_set (subtags.script) then -- if script set, override rtl setting
if in_array (subtags.script, lang_data.rtl_scripts) then
args.rtl = true; -- script is an rtl script
Line 651 ⟶ 577:
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles; private omitted because private
 
if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then -- look for private use tags; done this way because ...
language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1]; -- ... args.code does not get private subtag
elseif lang_data.override[code] then -- get the language name for categorization
language_name = lang_data.override[code][1] -- prefer language names taken from the override table
elseif lang_name_table.lang[code] then
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one
Line 669 ⟶ 595:
--[[--------------------------< L A N G _ X X >----------------------------------------------------------------
 
For the {{lang-xx}} templates, the only parameter required to be set in the template is the language code. All
<includeonly>{{#invoke:lang|lang_xx
other parameters can, usually should, be written in the template call. For {{lang-xx}} templates for languages
|code=<code>
that can have multiple writing systems, it may be appropriate to set |script= as well.
|script={{{script|}}}
|region={{{region|}}}
|variant={{{variant|}}}
|text={{{1|}}}
|link={{{links|{{{link}}}}}}
|rtl={{{rtl|}}}
|nocat={{{nocat|}}}
|italic={{{italic|}}}
|translation={{{lit|}}}
|translit={{{translit|}}}
|translit-script={{{translit-script|}}}
|translit-std={{{translit-std|}}}}}</includeonly>
 
For each {{lang-xx}} template choose the appropriate entry-point function so that this function know the default
|code = (required) the BCP47 language code
styling that should be applied to text.
|script = BCP47 script name; especially for use with languages that use multiple writing systems; yields to the script subtag in |code= if present [not yet implemented]
|text = (required) the displayed text in language specified by code
|link = boolean true (default) links language specified by code to associated language article
|rtl = boolean true identifies the language specified by code as a right-to-left language
|nocat = boolean true inhibits normal categorization; error categories are not affected
|italic = boolean true (default) renders displayed text in italic font; when |italic= not set and |script= set to something other than Latn then args.italic='no' [not yet implemented]
|lit = text that is a literal translation of text
 
For normal, upright style:
for those {{lang-xx}} templates that support transliteration:
<includeonly>{{#invoke:lang|lang_xx_normal|code=xx}}</includeonly>
|translit = text that is a transliteration of text
For italic style:
|translit-std = the standard that applies to the transliteration
<includeonly>{{#invoke:lang|lang_xx_italic|code=xx}}</includeonly>
|translit-script = ISO 15924 script name; falls back to code
 
All other parameters should be received from the template's frame (parent frame)
 
Supported parameters are:
|code = (required) the IANA language code
|script = IANA script code; especially for use with languages that use multiple writing systems; yields to the script subtag in |code= if present [not yet implemented]
|region = IANA region code
|variant = IANA variant code
|text = (required) the displayed text in language specified by code
|link = boolean false ('no') unlinks language specified by code to associated language article
|rtl = boolean true ('yes') identifies the language specified by code as a right-to-left language
|nocat = boolean true ('yes') inhibits normal categorization; error categories are not affected
|italic = boolean true ('yes') renders displayed text in italic font; boolean false ('no') renders displayed text in normal font; not set renders according to initial_style_state
|lit = text that is a literal translation of text
for those {{lang-xx}} templates that support transliteration: -- TODO: figure out how to determine if transliteration is allowed/appropriate
|translit = text that is a transliteration of text
|translit-std = the standard that applies to the transliteration
|translit-script = ISO 15924 script name; falls back to code
 
For {{lang-xx}}, the positional parameters are:
Line 702 ⟶ 630:
{{{2}}} transliterated text
{{{3}}} literal translation text
no other positional parameters are allowed
 
]]
 
local function _lang_xx (frame)
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}}; is that the right thing to do?
 
if args[1] and args.text then
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |text=', args.nocat);
else
args.text = args[1] or args.text; -- prefer args.text
end
 
Line 717 ⟶ 646:
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |translit=', args.nocat);
else
args.translit = args[2] or args.translit -- prefer args.translit
end
Line 725 ⟶ 654:
return make_error_msg ('{{lang-xx}}: conflicting: |lit= and |translation=', args.nocat);
else
args.translation = args[3] or args.translation or args.lit; -- prefer args.translation
end
 
Line 731 ⟶ 660:
return make_error_msg ('{{lang-xx}}: conflicting: |links= and |link=', args.nocat);
else
args.link = args.link or args.links; -- prefer args.link
end
 
Line 737 ⟶ 666:
 
local out = {};
local language_name; -- used to make display text, article links, and category names
local subtags = {}; -- IETF subtags script, region, and variant
local code; -- the language code
 
local translit_script;
local translit;
local translit_title;
local msg; -- for error messages
 
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private
 
if msg then -- if an error detected then there is an error message
if not (code and subtags.script and subtags.region and subtags.variant) then
return make_error_msg (table.concat ({'{{lang-xx}}: ', msg}), args.nocat);
end
-- args.italic = to_boolean (args.italic); -- convert to boolean or nil: 'yes' -> true, 'no' -> false; else nil
args.italic = validate_italic (args.italic); -- nil or font-style property value
 
-- if args.italic == nil then -- args.italic controls
-- if not is_set (subtags.script) or ('latn' == subtags.script) then -- script not set then default; script set to latn same
-- args.italic = true; -- DEFAULT for {{lang-xx}} templates is to italicize
-- else
-- args.italic = false; -- italic not set; script not latn
-- end
-- end
if nil == args.italic then -- args.italic controls
if is_set (subtags.script) then
Line 789 ⟶ 709:
 
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles
 
if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then -- look for private use tags; done this way because ...
language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1]; -- ... args.code does not get private subtag
elseif lang_data.override[args.code:lower()] then -- first look for whole IETF tag in override table
language_name = lang_data.override[args.code:lower()][1]; -- args.code:lower() because format_ietf_tag() returns mixed case
elseif lang_data.override[code] then -- not there so try basic language code
language_name = lang_data.override[code][1];
Line 816 ⟶ 736:
 
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
-- TODO: make this transl code a separate function so that {{transl}} can use this module?
if is_set (args.translit) then -- transliteration (not supported in {{lang}}); not supported in all {{lang-xx}} TODO: figure out how to know which {{tlag-xx}} support transliteration
table.insert (out, ', <small>');
translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name)
Line 830 ⟶ 750:
table.insert (out, translit);
else
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or 'missing', '\' or translittransli-script: \'', args['translit-script'] or 'missing', '\''}), args.nocat);
end
end
Line 847 ⟶ 767:
table.insert (out, make_category (code, language_name, args.nocat));
table.insert (out, render_maint(args.nocat)); -- maintenance messages and categories
 
return table.concat (out); -- put it all together and done
Line 861 ⟶ 781:
]]
 
--function p.lang_xx (frame)
-- initial_style_state = 'italic';
-- return _lang_xx (frame);
--end
 
 
Anonymous user