Module:Lang: Difference between revisions

Line 6:

require('Module:No globals');

local p = {};

local initial_style_state; -- set by lang_xx_normal() and lang_xx_italic()

local getArgs = require ('Module:Arguments').getArgs;

local unicode = require ("Module:Unicode data"); -- for is_latin() and is_rtl()

local yesno = require ('Module:Yesno');

local lang_name_table = mw.loadData ('Module:Language/name/data');

Line 32:

Line 34:

return not (var == nil or var == '');

end

--[[------------------------------------------------------------------------------------------

Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else.

]]

p.is_latn = require ('Module:Unicode data').is_Latin

Line 156:

Line 149:

return;

end

local yesno = require "Module:Yesno";

if false == yesno (args.cat) or true == yesno (args.nocat) then

Line 172:

Line 164:

]]

local function in_array( needle, haystack )

local function in_array ( needle, haystack )

if needle == nil then

return false;

Line 249:

Line 241:

local function get_ietf_parts (source, args_script, args_region, args_variant)

local code, script, region, variant, private; -- ietf tag parts

local code;

local script = '';

local region = '';

local variant = '';

local private = '';

if not is_set (source) then

Line 259:

Line 247:

end

local pattern = { -- table of tables holding acceptibe ietf tag patterns and short names of the ietf part captured by the pattern

if source:match ('^%a%a%a?%-%a%a%a%a%-%a%a%-%d%d%d%d$') then -- ll-Ssss-RR-variant (where variant is 4 digits)

~~code, script, region, variant = source:match (~~'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$');

{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$', 's', 'r', 'v'}, -- 1 - ll-Ssss-RR-variant (where variant is 4 digits)

~~elseif source:match (~~'^%a%a%a?%-%a%a%a%a%-%d%d%d%-%d%d%d%d$') ~~then~~ -- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)

{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$', 's', 'r', 'v'}, -- 2 - ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)

~~code, script, region, variant = source:match (~~'^(%a%a%a?)%-(%a%a%a%a)%-(%d%~~d%d~~)%-(%d%d%d%d)$');

{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$', 's', 'r', 'v'}, -- 3 - ll-Ssss-RR-variant (where variant is 5-8 alnum characters)

~~elseif source:match (~~'^%a%a%a?%-%a%a%a%a%-%a%a%-%w%w%w%w%w+$') ~~then~~ -- ll-Ssss-RR-variant (where variant is 5-8 alnum characters)

{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$', 's', 'r', 'v'}, -- 4 - ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)

code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$');

~~elseif source:match (~~'^%a%a%a?%-%a%a%a%a%-%d%d%d%~~-%w%w%w%w%w+~~$') ~~then~~ -- ll-Ssss~~-DDD~~-variant (where ~~region~~ is 3 digits~~; variant is 5-8 alnum characters~~)

{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$', 's', 'v'}, -- 5 - ll-Ssss-variant (where variant is 4 digits)

~~code, script, region, variant = source:match (~~'^(%a%a%a?)%-(%a%a%a%a~~)%-(%d%d%d~~)%-(%w%w%w%w%w%w?%w?%w?)$');

{'^(%a%a%a?)%-(%a%a%a%a)%-(%w%w%w%w%w%w?%w?%w?)$', 's', 'v'}, -- 6 - ll-Ssss-variant (where variant is 5-8 alnum characters)

{'^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$', 'r', 'v'}, -- 7 - ll-RR-variant (where variant is 4 digits)

{'^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$', 'r', 'v'}, -- 8 - ll-DDD-variant (where region is 3 digits; variant is 4 digits)

{'^(%a%a%a?)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$', 'r', 'v'}, -- 9 - ll-RR-variant (where variant is 5-8 alnum characters)

{'^(%a%a%a?)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$', 'r', 'v'}, -- 10 - ll-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)

{'^(%a%a%a?)%-(%d%d%d%d)$', 'v'}, -- 11 - ll-variant (where variant is 4 digits)

{'^(%a%a%a?)%-(%w%w%w%w%w%w?%w?%w?)$', 'v'}, -- 12 - ll-variant (where variant is 5-8 alnum characters)

{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$', 's', 'r'}, -- 13 - ll-Ssss-RR

{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$', 's', 'r'}, -- 14 - ll-Ssss-DDD (region is 3 digits)

{'^(%a%a%a?)%-(%a%a%a%a)$', 's'}, -- 15 - ll-Ssss

{'^(%a%a%a?)%-(%a%a)$', 'r'}, -- 16 - ll-RR

{'^(%a%a%a?)%-(%d%d%d)$', 'r'}, -- 17 - ll-DDD (region is 3 digits)

{'^(%a%a%a?)$'}, -- 18 - ll

{'^(%a%a%a?)%-x%-(%w%w?%w?%w?%w?%w?%w?%w?)$', 'p'}, -- 19 - ll-x-pppppppp (private is 1-8 alnum characters)

}

local t = {}; -- table of captures; serves as a translator between captured ietf tag parts and named variables

elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%d$') then -- ll-Ssss-variant (where variant is 4 digits)

code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$');

elseif source:match ('^%a%a%a?%-%a%a%a%a%-%w%w%w%w%w+$') then -- ll-Ssss-variant (where variant is 5-8 alnum characters)

code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%w%w%w%w%w%w?%w?%w?)$');

for i, v in ipairs (pattern) do -- spin through the pattern table looking for a match

elseif source:match ('^%a%a%a?%-%a%a%-%d%d%d%d$') then -- ll-RR-variant (where variant is 4 digits)

local c1, c2, c3, c4; -- captures in the 'pattern' from the pattern table go here

code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$');

elseif source:match ('^%a%a%a?%-%d%d%d%-%d%d%d%d$') then -- ll-DDD-variant (where region is 3 digits; variant is 4 digits)

~~code~~, ~~region~~, ~~variant~~ = source:match (~~'^(%a%a%a?~~)%-~~(%d%d%d)%~~-~~(%d%d%d%d~~)~~$');~~

c1, c2, c3, c4 = source:match (pattern[i][1]); -- one or more captures set if source matches pattern[i])

if c1 then -- c1 always set on match

elseif source:match ('^%a%a%a?%-%a%a%-%w%w%w%w%w+$') then -- ll-RR-variant (where variant is 5-8 alnum characters)

code = c1; -- first capture is always code

code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$');

t = {

elseif source:match ('^%a%a%a?%-%d%d%d%-%w%w%w%w%w+$') then -- ll-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)

[pattern[i][2] or 'x'] = c2, -- fill the table of captures with the rest of the captures

code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$');

[pattern[i][3] or 'x'] = c3, -- take index names from pattern table and assign sequential captures

[pattern[i][4] or 'x'] = c4, -- index name may be nil in pattern[i] table so "or 'x'" spoofs a name for this index in this table

};

script = t.s or ''; -- translate table contents to named variables;

region = t.r or ''; -- absent table entries are nil so set named ietf parts to empty string for concatenation

variant= t.v or '';

private = t.p or '';

break; -- and done

end

if not code then

elseif source:match ('^%a%a%a?%-%d%d%d%d$') then -- ll-variant (where variant is 4 digits)

code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$');

elseif source:match ('^%a%a%a?%-%w%w%w%w%w%w?%w?%w?$') then -- ll-variant (where variant is 5-8 alnum characters)

code, variant = source:match ('^(%a%a%a?)%-(%w%w%w%w%w+)$');

elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a$') then -- ll-Ssss-RR

code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$');

elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d$') then -- ll-Ssss-DDD (region is 3 digits)

code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$');

elseif source:match ('^%a%a%a?%-%a%a%a%a$') then -- ll-Ssss

code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$');

elseif source:match ('^%a%a%a?%-%a%a$') then -- ll-RR

code, region = source:match ('^(%a%a%a?)%-(%a%a)$');

elseif source:match ('^%a%a%a?%-%d%d%d$') then -- ll-DDD (region is 3 digits)

code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$');

elseif source:match ('^%a%a%a?$') then -- ll

code = source:match ('^(%a%a%a?)$');

elseif source:match ('^%a%a%a?%-x%-%w%w?%w?%w?%w?%w?%w?%w?$') then -- ll-x-pppppppp)

code, private = source:match ('^(%a%a%a?)%-x%-(%w+)$');

else

return nil, nil, nil, nil, nil, table.concat ({'unrecognized language tag: ', source}); -- don't know what we got but it is malformed

end

Line 442:

Line 434:

--[[--------------------------< M A ~~K E~~ _ T E X T ~~_ S~~ P A N >--------------------------------------------------

--[[--------------------------< D I V _ M A R K U P _ A D D >--------------------------------------------------

adds and tags to list-item text or to implied .. text. mixed not supported

TODO: add support for block: div tags instead of span tags; would need some sort of proper parameter to control the switch

]]

For italic style, can't do ''{{lang|xx|text}}'' without using tags when text is italic because of -Latn, |italic=yes,

or auto-italics because the wrapping wikimarkup produces this:

text

which is later reduced to this:

text

This reduction happens in some sort of cleanup process outside the scope of this template/module.

local function div_markup_add (text, style)

Until or unless this is fixed italic text must be:

local implied_p = {};

text

if text:find ('^\n[%*:;#]') then -- look for list markup; list markup must begin at start of text

if 'italic' == style then

return mw.ustring.gsub (text, '(\n[%*:;#]+)([^\n]+)', '%1%2'); -- insert italic markup at each list item

else

return text;

end

if text:find ('\n+') then -- look for any number of \n characters in text

text = text:gsub ('([^\n])\n([^\n])', '%1 %2'); -- replace single newline characters with a space character which mimics mediawiki

if 'italic' == style then

text = text:gsub('[^\n]+', '%1'); -- insert p and italic markup tags at each impled p (two or more consecutive '\n\n' sequences)

else

text = text:gsub ('[^\n]+', '%1'); -- insert p markup at each impled p

text = text:gsub ('\n', ''); -- strip newline characters

end

return text;

end

--[[--------------------------< M A K E _ T E X T _ H T M L >--------------------------------------------------

Add the html markup to text according to the type of content that it is: or tags for inline content or

<div> tags for block content

]]

local function ~~make_text_span~~ (code, text, rtl, style, size, language)

local function make_text_html (code, text, tag, rtl, style, size, language)

local ~~span~~ = {};

local html = {};

local style_added = '';

if text:match ('^%*') then

table.insert (~~span~~, '*'); -- move proto language text prefix outside of italic markup if any; use numeric entity because ~~plan~~ splat confuses MediaWiki

table.insert (html, '*'); -- move proto language text prefix outside of italic markup if any; use numeric entity because plane splat confuses MediaWiki

text = text:gsub ('^%*', ''); -- remove the splat from the text

end

if 'span' == tag then -- default html tag for inline content

if 'italic' == style then

~~table.insert (span,~~ '~~~~'); -- ~~open~~ ~~italic~~ ~~style tag~~

if 'italic' == style then -- but if italic

tag = 'i'; -- change to tags

end

else -- must be div so go

text = div_markup_add (text, style); -- handle implied , implied with , and list markup (*;:#) with

end

table.insert (span, table.concat ({' tag

table.insert (span, table.concat ({code, '\"'})); -- add language attribute

table.insert (html, table.concat ({'<', tag})); -- open the , , or <div> html tag

if rtl then

table.insert (~~span~~, ' ~~dir~~="~~rtl~~"'); -- add ~~direction~~ attribute ~~for right to left languages~~

table.insert (html, table.concat ({' lang="', code, '\"'})); -- add language attribute

if rtl or unicode.is_rtl(text) then

table.insert (html, ' dir="rtl"'); -- add direction attribute for right to left languages

end

if 'normal' == style then -- when |italic=no

table.insert (~~span~~, ' style=\"font-style:normal;'); -- override external markup, if any

table.insert (html, ' style=\"font-style:normal;'); -- override external markup, if any

style_added = '\"'; -- remember that style attribute added and is not yet closed

end

Line 485:

Line 504:

if is_set (size) then -- when |size=<something>

if is_set (style_added) then

table.insert (~~span~~, table.concat ({' font-size:', size, ';'})); -- add when style attribute already inserted

table.insert (html, table.concat ({' font-size:', size, ';'})); -- add when style attribute already inserted

else

table.insert (~~span~~, table.concat ({' style=\"font-size:', size, ';'})); -- create style attribute

table.insert (html, table.concat ({' style=\"font-size:', size, ';'})); -- create style attribute

style_added = '\"'; -- remember that style attribute added and is not yet closed

end

Line 493:

Line 512:

if is_set (language) then

table.insert (~~span~~, table.concat ({style_added, ' title=\"', language})); --start the title text

table.insert (html, table.concat ({style_added, ' title=\"', language})); --start the title text

if language:find ('languages') then

table.insert (~~span~~, ' collective text\"'); -- for collective languages

table.insert (html, ' collective text\"'); -- for collective languages

else

table.insert (~~span~~, ' language text\"'); -- for individual languages

table.insert (html, ' language text\"'); -- for individual languages

end

table.insert (~~span~~, '>'); -- close the opening ~~span~~ tag

table.insert (html, '>'); -- close the opening html tag

else

table.insert (~~span~~, table.concat ({style_added, '>'})); -- close the style attribute and close opening ~~span~~ tag

table.insert (html, table.concat ({style_added, '>'})); -- close the style attribute and close opening html tag

end

table.insert (~~span~~, text); -- insert the text

table.insert (html, text); -- insert the text

table.insert (~~span~~, '</~~span~~>'); -- close the tag

table.insert (html, table.concat ({'</', tag, '>'})); -- close the , , or <div> html tag

if 'italic' == style then

if rtl then -- legacy; shouldn't be necessary because all of the rtl text is wrapped inside an html tag with dir="rtl" attribute

table.insert (span, ''); -- close italic style tag

table.insert (html, '&lrm;'); -- make sure the browser knows that we're at the end of the rtl

end

if rtl then -- legacy; shouldn't be necessary because all of the rtl text is wrapped in text

table.insert (span, '&lrm;'); -- make sure the browser knows that we're at the end of the rtl

end

return table.concat (~~span~~); -- put it all together and done

return table.concat (html); -- put it all together and done

end

Line 653:

Line 669:

--[=[-------------------------< V A L I D A T E _ T E X T >---------------------------------------------------

--[[--------------------------< V A L I D A T E _ T E X T >----------------------------------------------------

This function checks the content of args.text and returns empty string if nothing is amiss else it returns an

Line 659:

Line 675:

Italic rendering is controlled by the |italic= template parameter so italic markup should never appear in args.text

either as ''itself''' or as '''''bold italic''''' unless |italic=unset or |italic=invert.

either as ''itself'' or as '''''bold italic''''' unless |italic=unset or |italic=invert.

]=]

]]

local function validate_text (template, args)

Line 711:

Line 727:

--[[--------------------------------------------------------------------------------

for proto languages, text is prefixed with a splat. We do that here as a flag for ~~make_text_span~~() so that a splat

for proto languages, text is prefixed with a splat. We do that here as a flag for make_text_html() so that a splat

will be rendered outside of italic markup (if used). If the first character in text here is already a splat, we

do nothing

Line 723:

Line 739:

return text;

end

--[[--------------------------< H A S _ P O E M _ T A G >------------------------------------------------------

looks for a poem strip marker in text; returns true when found; false else

auto-italic detection disabled when text has poem stripmarker because it is not possible for this code to know

the content that will replace the stripmarker.

]]

local function has_poem_tag (text)

return text:find ('\127[^\127]*UNIQ%-%-poem%-[%a%d]+%-QINU[^\127]*\127') and true or false;

end

--[[--------------------------< H T M L _ T A G _ S E L E C T >------------------------------------------------

Inspects content of and selectively trims text. Returns text and the name of an appropriate html tag for text.

If text contains:

\n\n text has implied .. tags - trim leading and trailing whitespace and return

If text begins with list markup:

\n* unordered

\n; definition

\n: definition

\n# ordered

trim all leading whitespace except \n and trim all trailing whitespace

If text contains <poem>...</poem> stripmarker, return text unmodified and choose <div>..</div> tags because

the stripmarker is replaced with text wrapped in <div>..</div> tags.

]]

local function html_tag_select (text)

local tag;

if has_poem_tag (text) then -- contains poem stripmarker (we can't know the content of that)

tag = 'div'; -- poem replacement is in div tags so lang must use div tags

elseif mw.text.trim (text):find ('\n\n+') then -- contains implied p tags

text = mw.text.trim (text); -- trim leading and trailing whitespace characters

tag = 'div'; -- must be div because span may not contain p tags (added later by MediaWiki); poem replacement is in div tags

elseif text:find ('\n[%*:;%#]') then -- if text has list markup

text = text:gsub ('^[\t\r\f ]*', ''):gsub ('%s*$', ''); -- trim all whitespace except leading newline character '\n'

tag = 'div'; -- must be div because span may not contain ul, dd, dl, ol tags (added later by MediaWiki)

else

text = mw.text.trim (text); -- plain text

tag = 'span'; -- so span is fine

end

return text, tag;

end

Line 737:

Line 805:

]]

function p.lang (frame)

local function lang (frame)

local args = getArgs(frame);

local args = getArgs (frame, { -- this code so that we can detect and handle wiki list markup in text

valueFunc = function (key, value)

if 2 == key or 'text' == key then -- the 'text' parameter; do not trim wite space

return value; -- return untrimmed 'text'

elseif value then -- all other values: if the value is not nil

value = mw.text.trim (value); -- trim whitespace

if '' ~= value then -- empty string when value was only whitespace

return value;

end

return nil; -- value was empty or contained only whitespace

end -- end of valueFunc

});

local out = {};

local language_name; -- used to make category names

Line 744:

Line 825:

local code; -- the language code

local msg; -- for error messages

local tag = 'span'; -- initial value for make_text_html()

if args[1] and args.code then

Line 757:

Line 839:

end

msg = validate_text ('lang', args); -- ensure that |text= is set ~~(italic test disabled for the time being)~~

msg = validate_text ('lang', args); -- ensure that |text= is set

if is_set (msg) then -- msg is an already-formatted error message

return msg;

end

args.text, tag = html_tag_select (args.text); -- inspects text; returns appropriate html tag with text trimmed accordingly

validate_cat_args (args); -- determine if categorization should be suppressed

Line 778:

Line 862:

if nil == args.italic then -- nil when |italic= absent or not set or |italic=default; args.italic controls

if ('latn' == subtags.script) or ~~(p.is_latn (args.text) and 'en' ~= code) then~~ -- script ~~set to latn or text~~ is ~~wholly~~ latn ~~script but not rendering English~~

if ('latn' == subtags.script) or -- script is latn

('en' ~= code and not is_set (subtags.script) and not has_poem_tag (args.text) and unicode.is_Latin (args.text)) then -- text not English, no script specified and not in poem markup but is wholly latn script (auto-italics)

args.italic = 'italic'; -- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic

else

args.italic = 'inherit'; -- italic not set; script not latn; inherit current style

Line 795:

Line 880:

args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles; private omitted because private

if ~~is_set~~ (subtags.private) and ~~lang_data.override[~~table.concat ({code, '-x-', subtags.private})] ~~then~~ -- ~~look~~ ~~for~~ private ~~use~~ ~~tags~~; ~~done~~ ~~this~~ ~~way~~ ~~because~~ ~~...~~

subtags.private = subtags.private and table.concat ({code, '-x-', subtags.private}) or nil; -- assemble a complete private ietf subtag; args.code does not get private subtag

language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1]; -- ... args.code does not get private subtag

~~elseif~~ lang_data.override[~~code~~] then -- get the language name for categorization

if is_set (subtags.private) and lang_data.override[subtags.private] then -- get the language name for categorization

language_name = lang_data.override[~~code~~][1] -- ~~prefer~~ ~~language~~ ~~names~~ ~~taken~~ ~~from~~ ~~the~~ ~~override~~ ~~table~~

language_name = lang_data.override[subtags.private][1]; -- first look for private use tag language name

elseif lang_data.override[code] then

language_name = lang_data.override[code][1] -- then language names taken from the override table

elseif lang_name_table.lang[code] then

language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one

end

if 'invert' == args.italic then

if 'invert' == args.italic and 'span' == tag then -- invert only supported for in-line content

args.text = invert_italics (args.text)

end

Line 809:

Line 896:

args.text = proto_prefix (args.text, language_name); -- prefix proto-language text with a splat

table.insert (out, ~~make_text_span~~ (args.code, args.text, args.rtl, args.italic, args.size, language_name));

table.insert (out, make_text_html (args.code, args.text, tag, args.rtl, args.italic, args.size, language_name));

table.insert (out, make_category (code, language_name, args.nocat));

table.insert (out, render_maint(args.nocat)); -- maintenance messages and categories

return table.concat (out); -- put it all together and done

Line 823:

Line 910:

that can have multiple writing systems, it may be appropriate to set |script= as well.

For each {{lang-xx}} template choose the appropriate entry-point function so that this function ~~know~~ the default

For each {{lang-xx}} template choose the appropriate entry-point function so that this function knows the default

styling that should be applied to text.

Line 835:

Line 922:

Supported parameters are:

|code = (required) the IANA language code

|script = IANA script code; especially for use with languages that use multiple writing systems~~; yields to the script subtag in |code= if present [not yet implemented]~~

|script = IANA script code; especially for use with languages that use multiple writing systems

|region = IANA region code

|variant = IANA variant code

|text = (required) the displayed text in language specified by code

|link = boolean false ('no') ~~unlinks~~ ~~language~~ ~~specified by~~ code to associated language article

|link = boolean false ('no') does not link code-spcified language name to associated language article

|rtl = boolean true ('yes') identifies the language specified by code as a right-to-left language

|nocat = boolean true ('yes') inhibits normal categorization; error categories are not affected

|cat = boolian false ('no') opposite form of |nocat=

|italic = boolean true ('yes') renders displayed text in italic font; boolean false ('no') renders displayed text in normal font; not set renders according to initial_style_state

|lit = text that is a literal translation of text

|label = 'none' to suppress all labeling (language name, 'translit.', 'lit.')

any other text replaces language-name label - automatic wikilinking disabled

for those {{lang-xx}} templates that support transliteration (those ~~template~~ where |text= is entirely latn script):

for those {{lang-xx}} templates that support transliteration (those templates where |text= is not entirely latn script):

|translit = text that is a transliteration of text

|translit-std = the standard that applies to the transliteration

Line 861:

Line 949:

local function _lang_xx (frame)

local args = getArgs(frame, ~~{parentFirst= true}); -- parameters in the template override parameters set in the {{#invoke:}}~~

local args = getArgs(frame,

{

parentFirst= true, -- parameters in the template override parameters set in the {{#invoke:}}

valueFunc = function (key, value)

if 1 == key then -- the 'text' parameter; do not trim wite space

return value; -- return untrimmed 'text'

elseif value then -- all other values: if the value is not nil

value = mw.text.trim (value); -- trim whitespace

if '' ~= value then -- empty string when value was only whitespace

return value;

end

return nil; -- value was empty or contained only whitespace

end -- end of valueFunc

});

local out = {};

local language_name; -- used to make display text, article links

Line 872:

Line 975:

local translit_title;

local msg; -- for error messages

local tag = 'span'; -- initial value for make_text_html()

if args[1] and args.text then

Line 883:

Line 987:

return msg;

end

args.text, tag = html_tag_select (args.text); -- inspects text; returns appropriate html tag with text trimmed accordingly

if args[2] and args.translit then

Line 941:

Line 1,047:

args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles

if ~~is_set~~ (subtags.private) and ~~lang_data.override[~~table.concat ({code, '-x-', subtags.private})] ~~then~~ -- ~~look~~ ~~for~~ private ~~use~~ ~~tags~~; ~~done~~ ~~this~~ ~~way~~ ~~because~~ ~~...~~

subtags.private = subtags.private and table.concat ({code, '-x-', subtags.private}) or nil; -- assemble a complete private ietf subtag; args.code does not get private subtag

language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1]; -- ... args.code does not get private subtag

if is_set (subtags.private) and lang_data.override[subtags.private] then -- get the language name for categorization

language_name = lang_data.override[subtags.private][1]; -- first look for private use tag language name

elseif lang_data.override[args.code:lower()] then -- look for whole IETF tag in override table

language_name = lang_data.override[args.code:lower()][1]; -- args.code:lower() because format_ietf_tag() returns mixed case

Line 983:

Line 1,091:

args.text = proto_prefix (args.text, language_name); -- prefix proto-language text with a splat

table.insert (out, ~~make_text_span~~ (args.code, args.text, args.rtl, args.italic, args.size))

table.insert (out, make_text_html (args.code, args.text, tag, args.rtl, args.italic, args.size))

if is_set (args.translit) and not p.~~is_latn~~ (args.text) then -- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test)

if is_set (args.translit) and not unicode.is_Latin (args.text) then -- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test)

table.insert (out, ', '); -- comma to separate text from translit

if 'none' ~= args.label then

Line 1,038:

Line 1,146:

]]

function p.lang_xx_italic (frame)

local function lang_xx_italic (frame)

initial_style_state = 'italic';

return _lang_xx (frame);

Line 1,050:

Line 1,158:

]]

function p.lang_xx_inherit (frame)

local function lang_xx_inherit (frame)

initial_style_state = 'inherit';

return _lang_xx (frame);

Line 1,066:

Line 1,174:

]]

function p.name_from_code (frame)

local function name_from_code (frame)

local subtags = {}; -- IETF subtags script, region, variant, and private

local raw_code = frame.args[1]; -- save a copy of the input IETF subtag

Line 1,115:

Line 1,223:

]]

function p.transl (frame)

local function transl (frame)

local args = getArgs(frame); -- no {{#invoke:}} parameters

local title_table = lang_data.translit_title_table; -- table of transliteration standards and the language codes and scripts that apply to those standards

Line 1,178:

Line 1,286:

--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------

return p;

]]

return {

lang = lang, -- entry point for {{lang}}

lang_xx_inherit = lang_xx_inherit, -- entry points for {{lang-??}}

lang_xx_italic = lang_xx_italic,

name_from_code = name_from_code, -- used for template documentation; possible use in ISO 639 name from code templates

transl = transl, -- entry point for {{transl}}

};

@@ Line 6: / Line 6: @@
 require('Module:No globals');
-local p = {};
 local initial_style_state;														-- set by lang_xx_normal() and lang_xx_italic()
 local getArgs = require ('Module:Arguments').getArgs;
+local unicode = require ("Module:Unicode data");								-- for is_latin() and is_rtl()
+local yesno = require ('Module:Yesno');
 local lang_name_table = mw.loadData ('Module:Language/name/data');
@@ Line 32: / Line 34: @@
 	return not (var == nil or var == '');
 end
---[[--------------------------< I S _ L A T N >----------------------------------------------------------------
-Returns true if all of text argument is written using Latn script for letters, numbers and punctuationset; false else.
-]]
-p.is_latn = require ('Module:Unicode data').is_Latin
@@ Line 156: / Line 149: @@
 		return;
 	end
-	local yesno = require "Module:Yesno";
 	if false == yesno (args.cat) or true == yesno (args.nocat) then
@@ Line 172: / Line 164: @@
 ]]
-local function in_array( needle, haystack )
+local function in_array ( needle, haystack )
 	if needle == nil then
 		return false;
@@ Line 249: / Line 241: @@
 local function get_ietf_parts (source, args_script, args_region, args_variant)
+	local code, script, region, variant, private;								-- ietf tag parts
-	local code;
-	local script = '';
-	local region = '';
-	local variant = '';
-	local private = '';
 	if not is_set (source) then
@@ Line 259: / Line 247: @@
 	end
+	local pattern = {															-- table of tables holding acceptibe ietf tag patterns and short names of the ietf part captured by the pattern
-	if source:match ('^%a%a%a?%-%a%a%a%a%-%a%a%-%d%d%d%d$') then												-- ll-Ssss-RR-variant (where variant is 4 digits)
-		code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$');
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$', 's', 'r', 'v'}, 				-- 1 -  ll-Ssss-RR-variant (where variant is 4 digits)
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-%d%d%d%d$') then											-- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$', 's', 'r', 'v'},				-- 2 -  ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)
-		code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$');
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$', 's', 'r', 'v'},		-- 3 -  ll-Ssss-RR-variant (where variant is 5-8 alnum characters)
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a%-%w%w%w%w%w+$') then											-- ll-Ssss-RR-variant (where variant is 5-8 alnum characters)
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$', 's', 'r', 'v'},	-- 4 -  ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
-		code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$');
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%-%w%w%w%w%w+$') then										-- ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$', 's', 'v'},						-- 5 -  ll-Ssss-variant (where variant is 4 digits)
-		code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$');
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%w%w%w%w%w%w?%w?%w?)$', 's', 'v'},			-- 6 -  ll-Ssss-variant (where variant is 5-8 alnum characters)
+		{'^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$', 'r', 'v'},							-- 7 -  ll-RR-variant (where variant is 4 digits)
+		{'^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$', 'r', 'v'},						-- 8 -  ll-DDD-variant (where region is 3 digits; variant is 4 digits)
+		{'^(%a%a%a?)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$', 'r', 'v'},				-- 9 -  ll-RR-variant (where variant is 5-8 alnum characters)
+		{'^(%a%a%a?)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$', 'r', 'v'},				-- 10 - ll-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
+		{'^(%a%a%a?)%-(%d%d%d%d)$', 'v'},										-- 11 - ll-variant (where variant is 4 digits)
+		{'^(%a%a%a?)%-(%w%w%w%w%w%w?%w?%w?)$', 'v'},							-- 12 - ll-variant (where variant is 5-8 alnum characters)
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$', 's', 'r'},							-- 13 - ll-Ssss-RR
+		{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$', 's', 'r'},						-- 14 - ll-Ssss-DDD (region is 3 digits)
+		{'^(%a%a%a?)%-(%a%a%a%a)$', 's'},										-- 15 - ll-Ssss
+		{'^(%a%a%a?)%-(%a%a)$', 'r'},											-- 16 - ll-RR
+		{'^(%a%a%a?)%-(%d%d%d)$', 'r'},											-- 17 - ll-DDD (region is 3 digits)
+		{'^(%a%a%a?)$'},														-- 18 - ll
+		{'^(%a%a%a?)%-x%-(%w%w?%w?%w?%w?%w?%w?%w?)$', 'p'},						-- 19 - ll-x-pppppppp (private is 1-8 alnum characters)
+		}
+	local t = {};																-- table of captures; serves as a translator between captured ietf tag parts and named variables
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d%d$') then								-- ll-Ssss-variant (where variant is 4 digits)
-		code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$');
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%w%w%w%w%w+$') then							-- ll-Ssss-variant (where variant is 5-8 alnum characters)
-		code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%w%w%w%w%w%w?%w?%w?)$');
+	for i, v in ipairs (pattern) do												-- spin through the pattern table looking for a match
-	elseif source:match ('^%a%a%a?%-%a%a%-%d%d%d%d$') then									-- ll-RR-variant (where variant is 4 digits)
+		local c1, c2, c3, c4;													-- captures in the 'pattern' from the pattern table go here
-		code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$');
-	elseif source:match ('^%a%a%a?%-%d%d%d%-%d%d%d%d$') then								-- ll-DDD-variant (where region is 3 digits; variant is 4 digits)
-		code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$');
+		c1, c2, c3, c4 = source:match (pattern[i][1]);							-- one or more captures set if source matches pattern[i])
+			if c1 then															-- c1 always set on match
-	elseif source:match ('^%a%a%a?%-%a%a%-%w%w%w%w%w+$') then								-- ll-RR-variant (where variant is 5-8 alnum characters)
+				code = c1;														-- first capture is always code
-		code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%w%w%w%w%w%w?%w?%w?)$');
+				t = {
-	elseif source:match ('^%a%a%a?%-%d%d%d%-%w%w%w%w%w+$') then								-- ll-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters)
+					[pattern[i][2] or 'x'] = c2,								-- fill the table of captures with the rest of the captures
-		code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%w%w%w%w%w%w?%w?%w?)$');
+					[pattern[i][3] or 'x'] = c3,								-- take index names from pattern table and assign sequential captures
+					[pattern[i][4] or 'x'] = c4,								-- index name may be nil in pattern[i] table so "or 'x'" spoofs a name for this index in this table
+					};
+				script = t.s or '';												-- translate table contents to named variables;
+				region = t.r or '';												-- absent table entries are nil so set named ietf parts to empty string for concatenation
+				variant= t.v or '';
+				private = t.p or '';
+				break;															-- and done
+			end
+	end
+	if not code then
-	elseif source:match ('^%a%a%a?%-%d%d%d%d$') then								-- ll-variant (where variant is 4 digits)
-		code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$');
-	elseif source:match ('^%a%a%a?%-%w%w%w%w%w%w?%w?%w?$') then								-- ll-variant (where variant is 5-8 alnum characters)
-		code, variant = source:match ('^(%a%a%a?)%-(%w%w%w%w%w+)$');
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%a%a$') then							-- ll-Ssss-RR
-		code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$');
-	elseif source:match ('^%a%a%a?%-%a%a%a%a%-%d%d%d$') then						-- ll-Ssss-DDD (region is 3 digits)
-		code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$');
-	elseif source:match ('^%a%a%a?%-%a%a%a%a$') then							-- ll-Ssss
-		code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$');
-	elseif source:match ('^%a%a%a?%-%a%a$') then								-- ll-RR
-		code, region = source:match ('^(%a%a%a?)%-(%a%a)$');
-	elseif source:match ('^%a%a%a?%-%d%d%d$') then								-- ll-DDD (region is 3 digits)
-		code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$');
-	elseif source:match ('^%a%a%a?$') then										-- ll
-		code = source:match ('^(%a%a%a?)$');
-	elseif source:match ('^%a%a%a?%-x%-%w%w?%w?%w?%w?%w?%w?%w?$') then			-- ll-x-pppppppp)
-		code, private = source:match ('^(%a%a%a?)%-x%-(%w+)$');
-	else
 		return nil, nil, nil, nil, nil, table.concat ({'unrecognized language tag: ', source});		-- don't know what we got but it is malformed
 	end
@@ Line 442: / Line 434: @@
---[[--------------------------< M A K E _ T E X T _ S P A N >--------------------------------------------------
+--[[--------------------------< D I V _ M A R K U P _ A D D >--------------------------------------------------
+adds <i> and </i> tags to list-item text or to implied <p>..</p> text.  mixed not supported
-TODO: add support for block: div tags instead of span tags; would need some sort of proper parameter to control the switch
+]]
-For italic style, can't do ''{{lang|xx|text}}'' without using <span/> tags when text is italic because of -Latn, |italic=yes,
-or auto-italics because the wrapping wikimarkup produces this:
-	<i><i lang="xx">text</i></i>
-which is later reduced to this:
-	<i>text</i>
-This reduction happens in some sort of cleanup process outside the scope of this template/module.
+local function div_markup_add (text, style)
-Until or unless this is fixed italic text must be:
+local implied_p = {};
-	<i><span lang="xx">text</span></i>
+	if text:find ('^\n[%*:;#]') then											-- look for list markup; list markup must begin at start of text
+		if 'italic' == style then
+			return mw.ustring.gsub (text, '(\n[%*:;#]+)([^\n]+)', '%1<i>%2</i>');	-- insert italic markup at each list item
+		else
+			return text;
+		end
+	end
+	if text:find ('\n+') then													-- look for any number of \n characters in text
+		text = text:gsub ('([^\n])\n([^\n])', '%1 %2');							-- replace single newline characters with a space character which mimics mediawiki
+		if 'italic' == style then
+			text = text:gsub('[^\n]+', '<p><i>%1</i></p>');						-- insert p and italic markup tags at each impled p (two or more consecutive '\n\n' sequences)
+		else
+			text = text:gsub ('[^\n]+', '<p>%1</p>');							-- insert p markup at each impled p
+			text = text:gsub ('\n', '');										-- strip newline characters
+		end
+	end
+	return text;
+end
+--[[--------------------------< M A K E _ T E X T _ H T M L >--------------------------------------------------
+Add the html markup to text according to the type of content that it is: <span> or <i> tags for inline content or
+<div> tags for block content
 ]]
-local function make_text_span (code, text, rtl, style, size, language)
+local function make_text_html (code, text, tag, rtl, style, size, language)
-	local span = {};
+	local html = {};
 	local style_added = '';
 	if text:match ('^%*') then
-		table.insert (span, '&#42;');											-- move proto language text prefix outside of italic markup if any; use numeric entity because plan splat confuses MediaWiki
+		table.insert (html, '&#42;');											-- move proto language text prefix outside of italic markup if any; use numeric entity because plane splat confuses MediaWiki
 		text = text:gsub ('^%*', '');											-- remove the splat from the text
 	end
+	if 'span' == tag then														-- default html tag for inline content
-	if 'italic' == style then
-		table.insert (span, '<i>');												-- open italic style tag
+		if 'italic' == style then												-- but if italic
+			tag = 'i';															-- change to <i> tags
+		end
+	else																		-- must be div so go
+		text = div_markup_add (text, style);									-- handle implied <p>, implied <p> with <i>, and list markup (*;:#) with <i>
 	end
-	table.insert (span, table.concat ({'<span lang="'}));						-- open <span> tag
-	table.insert (span, table.concat ({code, '\"'}));							-- add language attribute
+	table.insert (html, table.concat ({'<', tag}));								-- open the <i>, <span>, or <div> html tag
-	if rtl then
-		table.insert (span, ' dir="rtl"');										-- add direction attribute for right to left languages
+	table.insert (html, table.concat ({' lang="', code, '\"'}));				-- add language attribute
+	if rtl or unicode.is_rtl(text) then
+		table.insert (html, ' dir="rtl"');										-- add direction attribute for right to left languages
 	end
 	if 'normal' == style then													-- when |italic=no
-		table.insert (span, ' style=\"font-style:normal;');						-- override external markup, if any
+		table.insert (html, ' style=\"font-style:normal;');						-- override external markup, if any
 		style_added = '\"';														-- remember that style attribute added and is not yet closed
 	end
@@ Line 485: / Line 504: @@
 	if is_set (size) then														-- when |size=<something>
 		if is_set (style_added) then
-			table.insert (span, table.concat ({' font-size:', size, ';'}));		-- add when style attribute already inserted
+			table.insert (html, table.concat ({' font-size:', size, ';'}));		-- add when style attribute already inserted
 		else
-			table.insert (span, table.concat ({' style=\"font-size:', size, ';'}));	-- create style attribute
+			table.insert (html, table.concat ({' style=\"font-size:', size, ';'}));	-- create style attribute
 			style_added = '\"';													-- remember that style attribute added and is not yet closed
 		end
@@ Line 493: / Line 512: @@
 	if is_set (language) then
-		table.insert (span, table.concat ({style_added, ' title=\"', language}));	--start the title text
+		table.insert (html, table.concat ({style_added, ' title=\"', language}));	--start the title text
 		if language:find ('languages') then
-			table.insert (span, ' collective text\"');							-- for collective languages
+			table.insert (html, ' collective text\"');							-- for collective languages
 		else
-			table.insert (span, ' language text\"');							-- for individual languages
+			table.insert (html, ' language text\"');							-- for individual languages
 		end
-		table.insert (span, '>');												-- close the opening span tag
+		table.insert (html, '>');												-- close the opening html tag
 	else
-		table.insert (span, table.concat ({style_added, '>'}));					-- close the style attribute and close opening span tag
+		table.insert (html, table.concat ({style_added, '>'}));					-- close the style attribute and close opening html tag
 	end
-	table.insert (span, text);													-- insert the text
+	table.insert (html, text);													-- insert the text
-	table.insert (span, '</span>');												-- close the <span> tag
+	table.insert (html, table.concat ({'</', tag, '>'}));						-- close the <i>, <span>, or <div> html tag
-	if 'italic' == style then
+	if rtl then																	-- legacy; shouldn't be necessary because all of the rtl text is wrapped inside an html tag with dir="rtl" attribute
-		table.insert (span, '</i>');											-- close italic style tag
+		table.insert (html, '&lrm;');											-- make sure the browser knows that we're at the end of the rtl
-	end
-	if rtl then																	-- legacy; shouldn't be necessary because all of the rtl text is wrapped in <span dir="rtl">text</span>
-		table.insert (span, '&lrm;');											-- make sure the browser knows that we're at the end of the rtl
 	end
-	return table.concat (span);													-- put it all together and done
+	return table.concat (html);													-- put it all together and done
 end
@@ Line 653: / Line 669: @@
---[=[-------------------------< V A L I D A T E _ T E X T >---------------------------------------------------
+--[[--------------------------< V A L I D A T E _ T E X T >----------------------------------------------------
 This function checks the content of args.text and returns empty string if nothing is amiss else it returns an
@@ Line 659: / Line 675: @@
 Italic rendering is controlled by the |italic= template parameter so italic markup should never appear in args.text
-either as ''itself''' or as '''''bold italic''''' unless |italic=unset or |italic=invert.
+either as ''itself'' or as '''''bold italic''''' unless |italic=unset or |italic=invert.
-]=]
+]]
 local function validate_text (template, args)
@@ Line 711: / Line 727: @@
 --[[--------------------------< P R O T O _ P R E F I X >------------------------------------------------------
-for proto languages, text is prefixed with a splat.  We do that here as a flag for make_text_span() so that a splat
+for proto languages, text is prefixed with a splat.  We do that here as a flag for make_text_html() so that a splat
 will be rendered outside of italic markup (if used).  If the first character in text here is already a splat, we
 do nothing
@@ Line 723: / Line 739: @@
 	return text;
+end
+--[[--------------------------< H A S _ P O E M _ T A G >------------------------------------------------------
+looks for a poem strip marker in text; returns true when found; false else
+auto-italic detection disabled when text has poem stripmarker because it is not possible for this code to know
+the content that will replace the stripmarker.
+]]
+local function has_poem_tag (text)
+	return text:find ('\127[^\127]*UNIQ%-%-poem%-[%a%d]+%-QINU[^\127]*\127') and true or false;
+end
+--[[--------------------------< H T M L _ T A G _ S E L E C T >------------------------------------------------
+Inspects content of and selectively trims text.  Returns text and the name of an appropriate html tag for text.
+If text contains:
+	\n\n	text has implied <p>..</p> tags - trim leading and trailing whitespace and return
+If text begins with list markup:
+	\n*		unordered
+	\n;		definition
+	\n:		definition
+	\n#		ordered
+trim all leading whitespace except  \n and trim all trailing whitespace
+If text contains <poem>...</poem> stripmarker, return text unmodified and choose <div>..</div> tags because
+the stripmarker is replaced with text wrapped in <div>..</div> tags.
+]]
+local function html_tag_select (text)
+	local tag;
+	if has_poem_tag (text) then													-- contains poem stripmarker (we can't know the content of that)
+		tag = 'div';															-- poem replacement is in div tags so lang must use div tags
+	elseif mw.text.trim (text):find ('\n\n+') then								-- contains implied p tags
+		text = mw.text.trim (text);												-- trim leading and trailing whitespace characters
+		tag = 'div';															-- must be div because span may not contain p tags (added later by MediaWiki); poem replacement is in div tags
+	elseif text:find ('\n[%*:;%#]') then										-- if text has list markup
+		text = text:gsub ('^[\t\r\f ]*', ''):gsub ('%s*$', '');					-- trim all whitespace except leading newline character '\n'
+		tag = 'div';															-- must be div because span may not contain ul, dd, dl, ol tags (added later by MediaWiki)
+	else
+		text = mw.text.trim (text);												-- plain text
+		tag = 'span';															-- so span is fine
+	end
+	return text, tag;
 end
@@ Line 737: / Line 805: @@
 ]]
-function p.lang (frame)
+local function lang (frame)
-	local args = getArgs(frame);
+	local args = getArgs (frame, {												-- this code so that we can detect and handle wiki list markup in text
+		valueFunc = function (key, value)
+			if 2 == key or 'text' == key then									-- the 'text' parameter; do not trim wite space
+				return value;													-- return untrimmed 'text'
+			elseif value then													-- all other values: if the value is not nil
+				value = mw.text.trim (value);									-- trim whitespace
+				if '' ~= value then												-- empty string when value was only whitespace
+					return value;
+				end
+			end
+			return nil;															-- value was empty or contained only whitespace
+		end																		-- end of valueFunc
+		});
 	local out = {};
 	local language_name;														-- used to make category names
@@ Line 744: / Line 825: @@
 	local code;																	-- the language code
 	local msg;																	-- for error messages
+	local tag = 'span';															-- initial value for make_text_html()
 	if args[1] and args.code then
@@ Line 757: / Line 839: @@
 	end
-	msg = validate_text ('lang', args);											-- ensure that |text= is set  (italic test disabled for the time being)
+	msg = validate_text ('lang', args);											-- ensure that |text= is set
 	if is_set (msg) then														-- msg is an already-formatted error message
 		return msg;
 	end
+	args.text, tag = html_tag_select (args.text);								-- inspects text; returns appropriate html tag with text trimmed accordingly
 	validate_cat_args (args);													-- determine if categorization should be suppressed
@@ Line 778: / Line 862: @@
 	if nil == args.italic then													-- nil when |italic= absent or not set or |italic=default; args.italic controls
-		if ('latn' == subtags.script) or (p.is_latn (args.text) and 'en' ~= code) then	-- script set to latn or text is wholly latn script but not rendering English
+		if ('latn' == subtags.script) or								 		-- script is latn
+			('en' ~= code and not is_set (subtags.script) and not has_poem_tag (args.text) and unicode.is_Latin (args.text)) then -- text not English, no script specified and not in poem markup but is wholly latn script (auto-italics)
-			args.italic = 'italic';												-- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic
+				args.italic = 'italic';											-- DEFAULT for {{lang}} templates is upright; but if latn script set for font-style:italic
 		else
 			args.italic = 'inherit';											-- italic not set; script not latn; inherit current style
@@ Line 795: / Line 880: @@
 	args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant);	-- format to recommended subtag styles; private omitted because private
-	if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then	-- look for private use tags; done this way because ...
+	subtags.private = subtags.private and table.concat ({code, '-x-', subtags.private}) or nil;		-- assemble a complete private ietf subtag; args.code does not get private subtag
-		language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1];				-- ... args.code does not get private subtag
-	elseif lang_data.override[code] then										-- get the language name for categorization
+	if is_set (subtags.private) and lang_data.override[subtags.private] then	-- get the language name for categorization
-		language_name = lang_data.override[code][1]								-- prefer language names taken from the override table
+		language_name = lang_data.override[subtags.private][1];					-- first look for private use tag language name
+	elseif lang_data.override[code] then
+		language_name = lang_data.override[code][1]								-- then language names taken from the override table
 	elseif lang_name_table.lang[code] then
 		language_name = lang_name_table.lang[code][1];							-- table entries sometimes have multiple names, always take the first one
 	end
-	if 'invert' == args.italic then
+	if 'invert' == args.italic and 'span' == tag then							-- invert only supported for in-line content
 		args.text = invert_italics (args.text)
 	end
@@ Line 809: / Line 896: @@
 	args.text = proto_prefix (args.text, language_name);						-- prefix proto-language text with a splat
-	table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size, language_name));
+	table.insert (out, make_text_html (args.code, args.text, tag, args.rtl, args.italic, args.size, language_name));
 	table.insert (out, make_category (code, language_name, args.nocat));
-	table.insert (out, render_maint(args.nocat));											-- maintenance messages and categories
+	table.insert (out, render_maint(args.nocat));								-- maintenance messages and categories
 	return table.concat (out);													-- put it all together and done
@@ Line 823: / Line 910: @@
 that can have multiple writing systems, it may be appropriate to set |script= as well.
-For each {{lang-xx}} template choose the appropriate entry-point function so that this function know the default
+For each {{lang-xx}} template choose the appropriate entry-point function so that this function knows the default
 styling that should be applied to text.
@@ Line 835: / Line 922: @@
 Supported parameters are:
 	|code = (required) the IANA language code
-	|script = IANA script code; especially for use with languages that use multiple writing systems; yields to the script subtag in |code= if present [not yet implemented]
+	|script = IANA script code; especially for use with languages that use multiple writing systems
 	|region = IANA region code
 	|variant = IANA variant code
 	|text = (required) the displayed text in language specified by code
-	|link = boolean false ('no') unlinks language specified by code to associated language article
+	|link = boolean false ('no') does not link code-spcified language name to associated language article
 	|rtl = boolean true ('yes') identifies the language specified by code as a right-to-left language
 	|nocat = boolean true ('yes') inhibits normal categorization; error categories are not affected
+	|cat = boolian false ('no') opposite form of |nocat=
 	|italic = boolean true ('yes') renders displayed text in italic font; boolean false ('no') renders displayed text in normal font; not set renders according to initial_style_state
 	|lit = text that is a literal translation of text
-	|label =	'none' to suppress all labeling (language name, 'translit.', 'lit.')
+	|label = 'none' to suppress all labeling (language name, 'translit.', 'lit.')
 				any other text replaces language-name label - automatic wikilinking disabled
-	for those {{lang-xx}} templates that support transliteration (those template where |text= is entirely latn script):
+	for those {{lang-xx}} templates that support transliteration (those templates where |text= is not entirely latn script):
 	|translit = text that is a transliteration of text
 	|translit-std = the standard that applies to the transliteration
@@ Line 861: / Line 949: @@
 local function _lang_xx (frame)
-	local args = getArgs(frame, {parentFirst= true});							-- parameters in the template override parameters set in the {{#invoke:}}
+	local args = getArgs(frame,
+		{
+		parentFirst= true,														-- parameters in the template override parameters set in the {{#invoke:}}
+		valueFunc = function (key, value)
+			if 1 == key then													-- the 'text' parameter; do not trim wite space
+				return value;													-- return untrimmed 'text'
+			elseif value then													-- all other values: if the value is not nil
+				value = mw.text.trim (value);									-- trim whitespace
+				if '' ~= value then												-- empty string when value was only whitespace
+					return value;
+				end
+			end
+			return nil;															-- value was empty or contained only whitespace
+		end																		-- end of valueFunc
+	});
 	local out = {};
 	local language_name;														-- used to make display text, article links
@@ Line 872: / Line 975: @@
 	local translit_title;
 	local msg;																	-- for error messages
+	local tag = 'span';															-- initial value for make_text_html()
 	if args[1] and args.text then
@@ Line 883: / Line 987: @@
 		return msg;
 	end
+	args.text, tag = html_tag_select (args.text);								-- inspects text; returns appropriate html tag with text trimmed accordingly
 	if args[2] and args.translit then
@@ Line 941: / Line 1,047: @@
 	args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant);	-- format to recommended subtag styles
-	if is_set (subtags.private) and lang_data.override[table.concat ({code, '-x-', subtags.private})] then	-- look for private use tags; done this way because ...
+	subtags.private = subtags.private and table.concat ({code, '-x-', subtags.private}) or nil;		-- assemble a complete private ietf subtag; args.code does not get private subtag
-		language_name = lang_data.override[table.concat ({code, '-x-', subtags.private})][1];				-- ... args.code does not get private subtag
+	if is_set (subtags.private) and lang_data.override[subtags.private] then	-- get the language name for categorization
+		language_name = lang_data.override[subtags.private][1];					-- first look for private use tag language name
 	elseif lang_data.override[args.code:lower()] then							-- look for whole IETF tag in override table
 		language_name = lang_data.override[args.code:lower()][1];				-- args.code:lower() because format_ietf_tag() returns mixed case
@@ Line 983: / Line 1,091: @@
 	args.text = proto_prefix (args.text, language_name);						-- prefix proto-language text with a splat
-	table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size))
+	table.insert (out, make_text_html (args.code, args.text, tag, args.rtl, args.italic, args.size))
-	if is_set (args.translit) and not p.is_latn (args.text) then					-- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test)
+	if is_set (args.translit) and not unicode.is_Latin (args.text) then			-- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test)
 		table.insert (out, ', ');												-- comma to separate text from translit
 		if 'none' ~= args.label then
@@ Line 1,038: / Line 1,146: @@
 ]]
-function p.lang_xx_italic (frame)
+local function lang_xx_italic (frame)
 	initial_style_state = 'italic';
 	return _lang_xx (frame);
@@ Line 1,050: / Line 1,158: @@
 ]]
-function p.lang_xx_inherit (frame)
+local function lang_xx_inherit (frame)
 	initial_style_state = 'inherit';
 	return _lang_xx (frame);
@@ Line 1,066: / Line 1,174: @@
 ]]
-function p.name_from_code (frame)
+local function name_from_code (frame)
 	local subtags = {};															-- IETF subtags script, region, variant, and private
 	local raw_code = frame.args[1];												-- save a copy of the input IETF subtag
@@ Line 1,115: / Line 1,223: @@
 ]]
-function p.transl (frame)
+local function transl (frame)
 	local args = getArgs(frame);												-- no {{#invoke:}} parameters
 	local title_table = lang_data.translit_title_table;							-- table of transliteration standards and the language codes and scripts that apply to those standards
@@ Line 1,178: / Line 1,286: @@
+--[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
-return p;
+]]
+return {
+	lang = lang,																-- entry point for {{lang}}
+	lang_xx_inherit = lang_xx_inherit,											-- entry points for {{lang-??}}
+	lang_xx_italic = lang_xx_italic,
+	name_from_code = name_from_code,											-- used for template documentation; possible use in ISO 639 name from code templates
+	transl = transl,															-- entry point for {{transl}}
+	};

Module:Lang: Difference between revisions

Navigation menu

Search