Module:Citation/CS1/Date validation: Difference between revisions

Content deleted Content added
m 1 revision imported
Infobox>Trappist the monk
season range fix;
Line 5:
local is_set, in_array; -- imported functions from selected Module:Citation/CS1/Utilities
local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration
 
 
--[[--------------------------< F I L E - S C O P E D E C L A R A T I O N S >--------------------------------
 
File-scope variables are declared here
 
]]
 
local lang_object = mw.getContentLanguage(); -- used by is_valid_accessdate(), is_valid_year(), date_name_xlate(); TODO: move to ~/Configuration?
local year_limit; -- used by is_valid_year()
 
 
Line 27 ⟶ 37:
 
local function is_valid_accessdate (accessdate)
local lang = mw.getContentLanguage();
local good1, good2;
local access_ts, tomorrow_ts; -- to hold unix time stamps representing the dates
 
good1, access_ts = pcall( lang(lang_object.formatDate, langlang_object, 'U', accessdate ); -- convert accessdate value to unix timesatmp
good2, tomorrow_ts = pcall( lang(lang_object.formatDate, langlang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
access_ts = tonumber (access_ts) or langlang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
tomorrow_ts = tonumber (tomorrow_ts) or langlang_object:parseFormattedNumber (tomorrow_ts);
else
return false; -- one or both failed to convert to unix time stamp
Line 46 ⟶ 55:
return false; -- accessdate out of range
end
end
 
 
--[[--------------------------< G E T _ M O N T H _ N U M B E R >----------------------------------------------
 
returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. If not a valid month, returns 0
 
]]
 
local function get_month_number (month)
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
0; -- not a recognized month name
end
 
Line 81 ⟶ 77:
 
 
--[[--------------------------< G E T _ SM EO AN ST O NH _ N U M B E R >----------------------------------------------
 
returns a number according to the sequence of seasonsmonth in a yeardate: 1 for WinterJanuary, etc. Capitalization and spelling must be correct. If not a valid season, returns 0
If not a valid month, returns 0
 
]]
Uses ISO DIS 8601 2016 part 2 §4.7 Divisions of a year for hemishpere-independent seasons:
 
local function get_month_number (month)
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
0; -- not a recognized month name
end
 
 
--[[--------------------------< G E T _ S E A S O N _ N U M B E R >--------------------------------------------
 
returns a number according to the sequence of seasons in a year: 21 for Spring, etc. Capitalization and spelling
must be correct. If not a valid season, returns 0.
21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere”
 
returns 0 when <param> is not |date=
 
Season numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/)
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using
numbers 21-41. cs1|2 only supports generic seasons. EDTF does support the distinction between north and south
hemispere seasons but cs1|2 has no way to make that distinction.
 
These additional divisions not currently supported:
Line 97 ⟶ 113:
]]
 
local function get_season_number (season, param)
if 'date' ~= param then
return 0; -- season dates only supported by |date=
end
return cfg.date_names['local'].season[season] or -- look for local names first
cfg.date_names['en'].season[season] or -- failing that, look for English names
Line 104 ⟶ 123:
 
 
--[[--------------------------< IG SE T _ PQ RU OA PR T E R _ N AU M B E R >--------------------------------------------------
 
returns a non-zero number ifaccording dateto containsthe sequence of quarters in a recognizedyear: proper33 namefor first quarter, etc. Capitalization and spelling must be correct.
must be correct. If not a valid quarter, returns 0.
33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each)
 
returns 0 when <param> is not |date=
 
Quarter numbering is defined by Extended Date/Time Format (EDTF) Specification (https://www.loc.gov/standards/datetime/)
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using
numbers 21-41. cs1|2 only supports generic seasons and quarters.
 
These additional divisions not currently supported:
37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each)
40-41 = Semestral 1, Semestral-2 (6 months each)
 
]]
 
local function is_proper_nameget_quarter_number (namequarter, param)
if 'date' ~= param then
return 0; -- quarter dates only supported by |date=
end
quarter = mw.ustring.gsub (quarter, ' +', ' '); -- special case replace multiple space chars with a single space char
return cfg.date_names['local'].quarter[quarter] or -- look for local names first
cfg.date_names['en'].quarter[quarter] or -- failing that, look for English names
0; -- not a recognized quarter name
end
 
 
--[[--------------------------< G E T _ P R O P E R _ N A M E _ N U M B E R >----------------------------------
 
returns a non-zero number if date contains a recognized proper-name. Capitalization and spelling must be correct.
 
returns 0 when <param> is not |date=
 
]]
 
local function get_proper_name_number (name, param)
if 'date' ~= param then
return 0; -- proper-name dates only supported by |date=
end
return cfg.date_names['local'].named[name] or -- look for local names dates first
cfg.date_names['en'].named[name] or -- failing that, look for English names
Line 117 ⟶ 170:
 
 
--[[--------------------------< IG SE T _ V AE L I D _E M OE N T H _ ON RU _M SB E A S O NR ><------------------------------------------
 
--returns true if month or season or quarter or proper name is valid (properly spelled, capitalized, abbreviated)
 
]]
 
local function is_valid_month_or_seasonget_element_number (month_seasonelement, param)
local num;
if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season
if 0 == get_season_number (month_season) then -- not a month, is it a season?
local funcs = {get_month_number, get_season_number, get_quarter_number, get_proper_name_number}; -- list of functions to execute in order
return false; -- return false not a month or one of the five seasons
for _, func in ipairs (funcs) do -- spin through the function list
num = func (element, param); -- call the function and get the returned number
if 0 ~= num then -- non-zero when valid month season quarter
return num; -- return that number
end
end
return truenil; -- not valid
end
 
Line 135 ⟶ 193:
--[[--------------------------< I S _ V A L I D _ Y E A R >----------------------------------------------------
 
Function gets current year from the server and compares it to year from a citation parameter. Years more than one year in the future are not acceptable.
year in the future are not acceptable.
 
]]
 
local year_limit;
local function is_valid_year (year)
if not is_set(year_limit) then
year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once
end
 
return tonumber(year) <= year_limit; -- false if year is in the future more than one year
year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to numbers for the comparison;
return year and (year <= year_limit) or false;
end
 
 
--[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
 
Returns true if day is less than or equal to the number of days in month and year is no farther into the future
than next year; else returns false.
Line 222 ⟶ 284:
]]
 
local function is_valid_month_season_range(range_start, range_end, param)
local range_start_number = get_month_number (range_start);
local range_end_number;
 
if 0 == range_start_number then -- is this a month range?
range_start_number = get_season_number (range_start, param); -- not a month; is it a season? get start season number
range_end_number = get_season_number (range_end, param); -- get end season number
 
if (0 ~= range_start_number) and (0 ~= range_end_number) then
Line 270 ⟶ 332:
the output table receives:
rftdate: an IS8601 formatted date
rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and propernameproper-name dates)
rftssn: one of four season keywords: winter, spring, summer, fall (lowercase)
rftquarter: one of four values: 1, 2, 3, 4
 
]]
Line 295 ⟶ 358:
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
end
if 20 < tonumber(input.month) then -- if season or propernameproper-name date
local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [33]='1', [34]='2', [35]='3', [36]='4', [98]='Easter', [99]='Christmas'}; -- seasons lowercase, no autumn; proper -names use title case
if 0 == input.month2 then -- single season date
if 3040 <tonumber(input.month) then
tCOinS_date.rftchron = season[input.month]; -- proper -name dates
elseif 30 <tonumber(input.month) then
tCOinS_date.rftquarter = season[input.month]; -- quarters
else
tCOinS_date.rftssn = season[input.month]; -- seasons
Line 362 ⟶ 427:
['Md-dy'] = {'^(%D-) +([1-9]%d?)[%-–]([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'd2', 'a', 'y'},
-- day-initial: day month year
['dMy'] = {'^([1-9]%d?) *+(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
-- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
Line 381 ⟶ 446:
-- month/season range year; months separated by endash
['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'},
-- month/season year or proper-name year; quarter year when First Quarter YYYY etc
['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
 
-- these date formats cannot be converted
-- ['Q,y'] = {'^(Q%a* +[1-4]), +((%d%d%d%d)%a?)$'}, -- Quarter n, yyyy
 
['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
Line 414 ⟶ 482:
]]
 
local function check_date (date_string, tCOinS_dateparam, test_accessdatetCOinS_date)
local year; -- assume that year2, months, and days are not used;
local year2=0; -- second year in a year range
local month=0;
local month2=0; -- second month in a month range
local day=0;
local day2=0; -- second day in a day range
local anchor_year;
local coins_date;
Line 429 ⟶ 497:
anchor_year = year;
-- elseif mw.ustring.match(date_string, patterns['Q,y'][1]) then -- quarter n, year; here because much the same as Mdy
-- month, anchor_year, year=mw.ustring.match(date_string, patterns['Q,y'][1]);
-- if not is_valid_year(year) then return false; end
-- month = get_quarter_number (month, param); -- get quarter number or nil
-- if not month then return false; end -- not valid whatever it is
 
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
month, day, anchor_year, year=mw.ustring.match(date_string, patterns['Mdy'][1]);
Line 471 ⟶ 545:
elseif mw.ustring.match(date_string, patterns['Md-Mdy'][1]) then -- month initial month-day-range: month day – month day, year; uses spaced endash
month, day, month2, day2, anchor_year, year=mw.ustring.match(date_string, patterns['Md-Mdy'][1]);
if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end
month = get_month_number (month); -- for metadata
month2 = get_month_number (month2);
Line 498 ⟶ 572:
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
month = get_season_number (month, param);
 
elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash
Line 506 ⟶ 580:
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
month = get_season_number (month, param); -- for metadata
 
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
Line 516 ⟶ 590:
month = get_month_number(month);
month2 = get_month_number(month2);
elseif 0 ~= get_season_number(month, param) and 0 ~= get_season_number(month2, param) then -- both must be or season year, not mixed
month = get_season_number(month, param);
month2 = get_season_number(month2, param);
else
return false;
Line 525 ⟶ 599:
elseif mw.ustring.match(date_string, patterns['M-My'][1]) then -- month/season range year; months separated by endash
month, month2, anchor_year, year=mw.ustring.match(date_string, patterns['M-My'][1]);
if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season
month = get_month_number(month);
month2 = get_month_number(month2);
else
month = get_season_number(month, param);
month2 = get_season_number(month2, param);
end
year2=year;
elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season year or /quarter/proper-name year
month, anchor_year, year=mw.ustring.match(date_string, patterns['My'][1]);
if not is_valid_year(year) then return false; end
month = get_element_number (month, param); -- get month season quarter proper-name number or nil
if not is_valid_month_or_season (month) and 0 == is_proper_name (month) then return false; end
if 0not ~= get_month_number(month) then return false; end -- determined to be anot valid range so just check this one to know if monthwhatever orit seasonis
month = get_month_number(month);
elseif 0 ~= get_season_number(month) then
month = get_season_number(month);
else
month = is_proper_name (month); -- must be proper name; not supported in COinS
end
 
elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
Line 562 ⟶ 630:
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
 
elseif date_string:mw.ustring.match (date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
anchor_year, year=date_string:mw.ustring.match (date_string, patterns['y'][1]);
if false == is_valid_year(year) then
return false;
Line 572 ⟶ 640:
end
 
if test_accessdate'access-date' == param then -- test accessdate here because we have numerical date parts
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
Line 645 ⟶ 713:
good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
else
good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date
end
elseif 'year'==k then -- if the parameter is |year= it should hold only a year value
Line 651 ⟶ 719:
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
end
elseif 'access-date'==k then -- if the parameter is |date=
good_date = check_date (v.val, nil, true); -- go test the date; nil is a placeholder; true is the test_accessdate flag
elseif 'embargo'==k then -- if the parameter is |embargo=
good_date = check_date (v.val, k); -- go test the date
if true == good_date then -- if the date is a valid date
good_date, embargo_date = is_valid_embargo_date (v.val); -- is |embargo= date a single dmy, mdy, or ymd formatted date? yes:returns embargo; no: returns 9999
end
else -- any other date-holding parameter
good_date = check_date (v.val, k); -- go test the date
end
if false==good_date then -- assemble one error message so we don't add the tracking category multiple times
Line 842 ⟶ 908:
[patterns[pattern_idx][8] or 'x'] = c7;
};
 
if t.a then -- if this date has an anchor year capture
t.y = t.a; -- use the anchor year capture when reassembling the date
end
 
if tonumber(t.m) then -- if raw month is a number (converting from ymd)
Line 1,008 ⟶ 1,078:
if mode then -- might be a season
xlate = mw.getContentLanguage()lang_object:formatDate(mode, '1' .. month); -- translate the month name to this local language
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation
date_parameters_list[param_name].val = date; -- save the translated date