Module:Lang: Difference between revisions

Jump to navigation Jump to search
Content added Content deleted
(+name_from_code() for documentation and perhaps other purposes;)
(synch from sandbox;)
Line 60: Line 60:


return properties[italic];
return properties[italic];
end


--[[--------------------------< T O _ B O O L E A N >----------------------------------------------------------

Returns true for 'yes', false for 'no', and nil for any other value.

]]

local function to_boolean (param)
if param == 'yes' then
return true
elseif param == 'no' then
return false
else
return nil
end
end
end


Line 111: Line 94:
local function format_ietf_tag (code, script, region, variant)
local function format_ietf_tag (code, script, region, variant)
local out = {};
local out = {};

local c;
table.insert (out, code:lower());
table.insert (out, code:lower());
if is_set (script) then
if is_set (script) then
c = script:match ('^%a'):upper(); -- make script sentence case
script = script:lower():gsub ('^%a', string.upper);
script = script:lower():gsub ('^%a', c, 1);
table.insert (out, script);
table.insert (out, script);
end
end
Line 308: Line 289:




--[=[-------------------------< M A K E _ E R R O R _ M S G >--------------------------------------------------
--[[--------------------------< M A K E _ E R R O R _ M S G >--------------------------------------------------


assembles an error message from message text, then adds a help link and error category.
]=]

]]


local function make_error_msg (msg, nocat)
local function make_error_msg (msg, nocat)
Line 373: Line 356:
table.insert (span, text); -- insert the text
table.insert (span, text); -- insert the text


-- if is_set (size) then -- {{lang}} only
-- table.insert (span, table.concat ({' style="font-size:', size, ';"'}))
-- end
-- table.insert (span, '>'); -- close the opening span tag
-- if italic then
-- table.insert (span, table.concat ({"''", text, "''"})); -- text with italic markup
-- else
-- table.insert (span, text); -- DEFAULT: text is not italicized
-- end
table.insert (span, '</span>'); -- close the span
table.insert (span, '</span>'); -- close the span
if rtl then
if rtl then
Line 394: Line 368:


TODO: figure out how to correctly support collective language codes: sem, Semitic languages (collective names
TODO: figure out how to correctly support collective language codes: sem, Semitic languages (collective names
appear to always include the word 'languages').. May need new categories so that the category names are sensible.
appear to always include the word 'languages'). May need new categories so that the category names are sensible.


]]
]]
Line 507: Line 481:
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args.nocat);
return make_error_msg (table.concat ({'{{', template, '}}: text has malformed markup'}), args.nocat);
end
end
end
if args.italic then -- protect single quote marks from being converted to bold markup
args.text = args.text:gsub ("^\'[^\']+", "<span></span>%1"); -- leading single quote mark
args.text = args.text:gsub ("[^\']+\'$", "%1<span></span>"); -- trailing single quote mark
end
end
end


--[[--------------------------< S U B T A G _ C H E C K >------------------------------------------------------

checks the subtags: script, region, and variant to be valid and in agreement with any tags that are included in
the main language code: if |script=Cyrl and language code is 'abq-Cyrl' the two script must agree (case insensitive)

returns the selected subtag and nil error message;
returns empty string and nil error message when both language code subtag and matching subtag parameter are not set
returns nil and error message else

TODO: this not required any longer? Parameter subtags |script=, |region=, and |variant= are all consolidated
with code subtags, and validated, in get_ietf_parts().

]]

local function subtag_check (name, args_code, code_subtag, args_subtag, nocat)

if not is_set (code_subtag) and not is_set (args_subtag) then -- no subtags, then bale
return ''; -- empty string for concatenation
end
args_subtag = args_subtag and args_subtag:lower(); -- so we only need do this once; prettify later
if not is_set (code_subtag) then -- if no ietf subtag in args.code
if is_set (args_subtag) then -- and if |<name>= has a value
if lang_name_table[name][args_subtag] then -- and that value is legitimate subtag
code_subtag = args_subtag; -- then use |<name>=
else
return nil, make_error_msg (table.concat ({'{{lang-xx}}: invalid ' .. name .. ': ', args_subtag}), nocat);
end
end
else -- here when language code has a subtag
if is_set (args_subtag) and (code_subtag ~= args_subtag) then -- if there is a subtag parameter then it must match
return nil, make_error_msg (table.concat ({'{{lang-xx}}: code / ' .. name .. ' mismatch: ', args_code:lower(), ' / ', args_subtag}), nocat);
end
end

return code_subtag;
end
end


Line 558: Line 488:


render mainenance messages and categories
render mainenance messages and categories

]]
]]


Line 563: Line 494:
local maint = {};
local maint = {};
if 0 < #maint_msgs then
if 0 < #maint_msgs then -- when there are maintenance messages
table.insert (maint, table.concat ({'<span class="lang-comment" style="font-style:normal; display:none; color:#33aa33; margin-left:0.3em">'}));
table.insert (maint, table.concat ({'<span class="lang-comment" style="font-style:normal; display:none; color:#33aa33; margin-left:0.3em">'})); -- opening <span> tag
for _, msg in ipairs (maint_msgs) do
for _, msg in ipairs (maint_msgs) do
table.insert (maint, table.concat ({msg, ' '}));
table.insert (maint, table.concat ({msg, ' '})); -- add message strings
end
end
table.insert (maint, '</span>');
table.insert (maint, '</span>'); -- close the span
end
end
if (0 < #maint_cats) and (0 == namespace) and not is_set (nocat) then
if (0 < #maint_cats) and (0 == namespace) and not is_set (nocat) then -- when there are mainenance categories; article namespace only
for _, cat in ipairs (maint_cats) do
for _, cat in ipairs (maint_cats) do
table.insert (maint, table.concat ({'[[Category:', cat, ']]'}));
table.insert (maint, table.concat ({'[[Category:', cat, ']]'})); -- format and add the categories
end
end
end
end
Line 583: Line 514:
--[[--------------------------< L A N G >----------------------------------------------------------------------
--[[--------------------------< L A N G >----------------------------------------------------------------------


entry point for {{lang}}
<includeonly>{{#invoke:lang|lang|code={{{1|}}}|text={{{2|}}}|rtl={{{rtl|}}}|italic={{{italic|}}}|size={{{size|}}}|nocat={{{nocat|}}}}}</includeonly>


there should be no reason to set parameters in the {{lang}} {{#invoke:}}
|code = the BCP47 language code
<includeonly>{{#invoke:lang|lang}}</includeonly>
|text = the displayed text in language specified by code

|rtl = boolean true identifies the language specified by code as a right-to-left language
parameters are recieved from the template's frame (parent frame)
|size = css keyword appropriate for use with css font-size:<size>
|nocat = boolean true inhibits normal categorization; error categories are not affected


]]
]]
Line 599: Line 529:
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |code=', args.nocat);
return make_error_msg ('{{lang}}: conflicting: {{{1}}} and |code=', args.nocat);
else
else
args.code = args[1] or args.code;
args.code = args[1] or args.code; -- prefer args.code
end
end


Line 605: Line 535:
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |text=', args.nocat);
return make_error_msg ('{{lang}}: conflicting: {{{2}}} and |text=', args.nocat);
else
else
args.text = args[2] or args.text;
args.text = args[2] or args.text; -- prefer args.text
end
end
Line 611: Line 541:


local out = {};
local out = {};
local language_name;
local language_name; -- used to make category names
local subtags = {};
local subtags = {}; -- IETF subtags script, region, variant, and private
local code;
local code; -- the language code
local msg;
local msg; -- for error messages


code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}})
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code); -- |script=, |region=, |variant= not supported because they should be part of args.code ({{{1}}} in {{lang}})


if msg then
if not (code and subtags.script and subtags.region and subtags.variant) then
return make_error_msg (table.concat ({'{{lang}}: ', msg}), args.nocat);
return make_error_msg (table.concat ({'{{lang}}: ', msg}), args.nocat);
end
end


args.italic = to_boolean(args.italic); -- convert to boolean or nil: 'yes' -> true, 'no' -> false; else nil
args.italic = validate_italic (args.italic); -- nil or font-style property value
args.italic = validate_italic (args.italic); -- nil or font-style property value


if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls
if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls
if 'latn' == subtags.script then -- script set to latn
if 'latn' == subtags.script then -- script set to latn
-- args.italic = true; -- DEFAULT for {{lang}} templates is upright; but if latn script
args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic
args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic
else
else
-- args.italic = false; -- italic not set; script not latn
args.italic = 'normal'; -- italic not set; script not latn; set for font-style:normal
args.italic = 'normal'; -- italic not set; script not latn; set for font-style:normal
end
end
end
end
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being)
msg = validate_text ('lang', args); -- ensure that |text= is set (italic test disabled for the time being)
if is_set (msg) then
if is_set (msg) then -- msg is an already-formatted error message
return msg;
return msg;
end
end


if is_set (subtags.script) then -- if script set override rtl setting
if is_set (subtags.script) then -- if script set, override rtl setting
if in_array (subtags.script, lang_data.rtl_scripts) then
if in_array (subtags.script, lang_data.rtl_scripts) then
args.rtl = true; -- script is an rtl script
args.rtl = true; -- script is an rtl script
Line 651: Line 577:
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles; private omitted because private
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles; private omitted because private


if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then -- look for private use tags
if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then -- look for private use tags; done this way because ...
language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1];
language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1]; -- ... args.code does not get private subtag
elseif lang_data.override[code] then
elseif lang_data.override[code] then -- get the language name for categorization
language_name = lang_data.override[code][1]
language_name = lang_data.override[code][1] -- prefer language names taken from the override table
elseif lang_name_table.lang[code] then
elseif lang_name_table.lang[code] then
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one
Line 669: Line 595:
--[[--------------------------< L A N G _ X X >----------------------------------------------------------------
--[[--------------------------< L A N G _ X X >----------------------------------------------------------------


For the {{lang-xx}} templates, the only parameter required to be set in the template is the language code. All
<includeonly>{{#invoke:lang|lang_xx
other parameters can, usually should, be written in the template call. For {{lang-xx}} templates for languages
|code=<code>
that can have multiple writing systems, it may be appropriate to set |script= as well.
|script={{{script|}}}
|region={{{region|}}}
|variant={{{variant|}}}
|text={{{1|}}}
|link={{{links|{{{link}}}}}}
|rtl={{{rtl|}}}
|nocat={{{nocat|}}}
|italic={{{italic|}}}
|translation={{{lit|}}}
|translit={{{translit|}}}
|translit-script={{{translit-script|}}}
|translit-std={{{translit-std|}}}}}</includeonly>


For each {{lang-xx}} template choose the appropriate entry-point function so that this function know the default
|code = (required) the BCP47 language code
styling that should be applied to text.
|script = BCP47 script name; especially for use with languages that use multiple writing systems; yields to the script subtag in |code= if present [not yet implemented]
|text = (required) the displayed text in language specified by code
|link = boolean true (default) links language specified by code to associated language article
|rtl = boolean true identifies the language specified by code as a right-to-left language
|nocat = boolean true inhibits normal categorization; error categories are not affected
|italic = boolean true (default) renders displayed text in italic font; when |italic= not set and |script= set to something other than Latn then args.italic='no' [not yet implemented]
|lit = text that is a literal translation of text


For normal, upright style:
for those {{lang-xx}} templates that support transliteration:
<includeonly>{{#invoke:lang|lang_xx_normal|code=xx}}</includeonly>
|translit = text that is a transliteration of text
For italic style:
|translit-std = the standard that applies to the transliteration
<includeonly>{{#invoke:lang|lang_xx_italic|code=xx}}</includeonly>
|translit-script = ISO 15924 script name; falls back to code

All other parameters should be received from the template's frame (parent frame)

Supported parameters are:
|code = (required) the IANA language code
|script = IANA script code; especially for use with languages that use multiple writing systems; yields to the script subtag in |code= if present [not yet implemented]
|region = IANA region code
|variant = IANA variant code
|text = (required) the displayed text in language specified by code
|link = boolean false ('no') unlinks language specified by code to associated language article
|rtl = boolean true ('yes') identifies the language specified by code as a right-to-left language
|nocat = boolean true ('yes') inhibits normal categorization; error categories are not affected
|italic = boolean true ('yes') renders displayed text in italic font; boolean false ('no') renders displayed text in normal font; not set renders according to initial_style_state
|lit = text that is a literal translation of text
for those {{lang-xx}} templates that support transliteration: -- TODO: figure out how to determine if transliteration is allowed/appropriate
|translit = text that is a transliteration of text
|translit-std = the standard that applies to the transliteration
|translit-script = ISO 15924 script name; falls back to code


For {{lang-xx}}, the positional parameters are:
For {{lang-xx}}, the positional parameters are:
Line 702: Line 630:
{{{2}}} transliterated text
{{{2}}} transliterated text
{{{3}}} literal translation text
{{{3}}} literal translation text
no other positional parameters are allowed


]]
]]


local function _lang_xx (frame)
local function _lang_xx (frame)
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}}; is that the right thing to do?
local args = getArgs(frame, {parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}}


if args[1] and args.text then
if args[1] and args.text then
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |text=', args.nocat);
return make_error_msg ('{{lang-xx}}: conflicting: {{{1}}} and |text=', args.nocat);
else
else
args.text = args[1] or args.text;
args.text = args[1] or args.text; -- prefer args.text
end
end


Line 717: Line 646:
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |translit=', args.nocat);
return make_error_msg ('{{lang-xx}}: conflicting: {{{2}}} and |translit=', args.nocat);
else
else
args.translit = args[2] or args.translit
args.translit = args[2] or args.translit -- prefer args.translit
end
end
Line 725: Line 654:
return make_error_msg ('{{lang-xx}}: conflicting: |lit= and |translation=', args.nocat);
return make_error_msg ('{{lang-xx}}: conflicting: |lit= and |translation=', args.nocat);
else
else
args.translation = args[3] or args.translation or args.lit;
args.translation = args[3] or args.translation or args.lit; -- prefer args.translation
end
end


Line 731: Line 660:
return make_error_msg ('{{lang-xx}}: conflicting: |links= and |link=', args.nocat);
return make_error_msg ('{{lang-xx}}: conflicting: |links= and |link=', args.nocat);
else
else
args.link = args.link or args.links;
args.link = args.link or args.links; -- prefer args.link
end
end


Line 737: Line 666:


local out = {};
local out = {};
local language_name;
local language_name; -- used to make display text, article links, and category names
local subtags = {};
local subtags = {}; -- IETF subtags script, region, and variant
local code;
local code; -- the language code


local translit_script;
local translit_script;
local translit;
local translit;
local translit_title;
local translit_title;
local msg;
local msg; -- for error messages


code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (args.code, args.script, args.region, args.variant); -- private omitted because private


if msg then -- if an error detected then there is an error message
if not (code and subtags.script and subtags.region and subtags.variant) then
return make_error_msg (table.concat ({'{{lang-xx}}: ', msg}), args.nocat);
return make_error_msg (table.concat ({'{{lang-xx}}: ', msg}), args.nocat);
end
end
-- args.italic = to_boolean (args.italic); -- convert to boolean or nil: 'yes' -> true, 'no' -> false; else nil
args.italic = validate_italic (args.italic); -- nil or font-style property value
args.italic = validate_italic (args.italic); -- nil or font-style property value


-- if args.italic == nil then -- args.italic controls
-- if not is_set (subtags.script) or ('latn' == subtags.script) then -- script not set then default; script set to latn same
-- args.italic = true; -- DEFAULT for {{lang-xx}} templates is to italicize
-- else
-- args.italic = false; -- italic not set; script not latn
-- end
-- end
if nil == args.italic then -- args.italic controls
if nil == args.italic then -- args.italic controls
if is_set (subtags.script) then
if is_set (subtags.script) then
Line 789: Line 709:


args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles

if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then -- look for private use tags
if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then -- look for private use tags; done this way because ...
language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1];
language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1]; -- ... args.code does not get private subtag
elseif lang_data.override[args.code:lower()] then -- first look for whole IETF tag in override table
elseif lang_data.override[args.code:lower()] then -- look for whole IETF tag in override table
language_name = lang_data.override[args.code:lower()][1];
language_name = lang_data.override[args.code:lower()][1]; -- args.code:lower() because format_ietf_tag() returns mixed case
elseif lang_data.override[code] then -- not there so try basic language code
elseif lang_data.override[code] then -- not there so try basic language code
language_name = lang_data.override[code][1];
language_name = lang_data.override[code][1];
Line 816: Line 736:


table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
-- TODO: make this transl code a separate function so that {{transl}} can use this module?
if is_set (args.translit) then -- transliteration (not supported in {{lang}}); not supported in all {{lang-xx}}
if is_set (args.translit) then -- transliteration (not supported in {{lang}}); not supported in all {{lang-xx}} TODO: figure out how to know which {{tlag-xx}} support transliteration
table.insert (out, ', <small>');
table.insert (out, ', <small>');
translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name)
translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name)
Line 830: Line 750:
table.insert (out, translit);
table.insert (out, translit);
else
else
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or 'missing', '\' or translit-script: \'', args['translit-script'] or 'missing', '\''}), args.nocat);
return make_error_msg (table.concat ({'{{lang-xx}}: invalid translit-std: \'', args['translit-std'] or 'missing', '\' or transli-script: \'', args['translit-script'] or 'missing', '\''}), args.nocat);
end
end
end
end
Line 847: Line 767:
table.insert (out, make_category (code, language_name, args.nocat));
table.insert (out, make_category (code, language_name, args.nocat));
table.insert (out, render_maint(args.nocat)); -- maintenance messages and categories
table.insert (out, render_maint(args.nocat)); -- maintenance messages and categories


return table.concat (out); -- put it all together and done
return table.concat (out); -- put it all together and done
Line 861: Line 781:
]]
]]


--function p.lang_xx (frame)
function p.lang_xx (frame)
-- initial_style_state = 'italic';
initial_style_state = 'italic';
-- return _lang_xx (frame);
return _lang_xx (frame);
--end
end