Module:Citation/CS1/Date validation: Difference between revisions

m
1 revision imported
wp>Trappist the monk
(update per RfC;)
m (1 revision imported)
 
(4 intermediate revisions by 3 users not shown)
Line 185: Line 185:
end
end


year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison;
year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison
if year and (100 > year) then -- years less than 100 not supported
return false;
end
if 'pmc-embargo-date' == param then -- special case for |pmc-embargo-date=
if 'pmc-embargo-date' == param then -- special case for |pmc-embargo-date=
Line 321: Line 324:
local date; -- one date or first date in a range
local date; -- one date or first date in a range
local date2 = ''; -- end of range date
local date2 = ''; -- end of range date
input.year = tonumber (input.year) or lang_object:parseFormattedNumber (input.year); -- language-aware tonumber()
input.year2 = tonumber (input.year2) or lang_object:parseFormattedNumber (input.year2); -- COinS dates are pseudo-ISO 8601 so convert to Arabic numerals
-- start temporary Julian / Gregorian calendar uncertainty detection
-- start temporary Julian / Gregorian calendar uncertainty detection
local year = tonumber(input.year); -- this temporary code to determine the extent of sources dated to the Julian/Gregorian
local year = input.year; -- this temporary code to determine the extent of sources dated to the Julian/Gregorian
local month = tonumber(input.month); -- interstice 1 October 1582 – 1 January 1926
local month = tonumber(input.month); -- interstice 1 October 1582 – 1 January 1926
local day = tonumber (input.day);
local day = tonumber (input.day);
Line 332: Line 338:
end
end
-- end temporary Julian / Gregorian calendar uncertainty detection
-- end temporary Julian / Gregorian calendar uncertainty detection
if ((1582 == year) and (10 > month)) or (1582 > year) then -- if a Julian calendar date
if 1582 > tonumber(input.year) or 20 < tonumber(input.month) then -- Julian calendar or season so &rft.date gets year only
tCOinS_date.rftdate = tostring (input.year); -- &rft.date gets year only
date = input.year;
return; -- done
end
-- here for all forms of Gregorian dates
if 20 < tonumber (input.month) then -- if season, quarter, or proper-name date
date = input.year; -- &rft.date gets year only
if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year
if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
date = string.format ('%.4d/%.4d', input.year, input.year2) -- assemble the date range
end
end
if 20 < tonumber(input.month) then -- if season or proper-name date
 
local season = {[24] = 'winter', [21] = 'spring', [22] = 'summer', [23] = 'fall', [33] = '1', [34] = '2', [35] = '3', [36] = '4', [98] = 'Easter', [99] = 'Christmas'}; -- seasons lowercase, no autumn; proper-names use title case
local season = {[24] = 'winter', [21] = 'spring', [22] = 'summer', [23] = 'fall', [33] = '1', [34] = '2', [35] = '3', [36] = '4', [98] = 'Easter', [99] = 'Christmas'}; -- seasons lowercase, no autumn; proper-names use title case
if 0 == input.month2 then -- single season date
if 0 == input.month2 then -- single season, quarter, or proper-name date
if 40 < tonumber(input.month) then
if 40 < tonumber(input.month) then
tCOinS_date.rftchron = season[input.month]; -- proper-name dates
tCOinS_date.rftchron = season[input.month]; -- proper-name date; used in journal metadata only
elseif 30 < tonumber(input.month) then
elseif 30 < tonumber(input.month) then
tCOinS_date.rftquarter = season[input.month]; -- quarters
tCOinS_date.rftquarter = season[input.month]; -- quarter date; used in journal metadata only
else
else
tCOinS_date.rftssn = season[input.month]; -- seasons
tCOinS_date.rftssn = season[input.month]; -- season date; used in journal metadata only
end
end
else -- season range with a second season specified
else -- season ranges are lumped into &rft.chron; &rft.ssn and &rft.quarter are left blank
if input.year ~= input.year2 then -- season year – season year range or season year–year
if input.year ~= input.year2 then -- season year – season year range or season year–year
tCOinS_date.rftssn = season[input.month]; -- start of range season; keep this?
if 0 ~= input.month2 then
if 0~= input.month2 then
tCOinS_date.rftchron = string.format ('%s %s – %s %s', season[input.month], input.year, season[input.month2], input.year2); -- used in journal metadata only
tCOinS_date.rftchron = string.format ('%s %s – %s %s', season[input.month], input.year, season[input.month2], input.year2);
end
else -- season–season year range
tCOinS_date.rftssn = season[input.month]; -- start of range season; keep this?
tCOinS_date.rftchron = season[input.month] .. '–' .. season[input.month2]; -- season–season year range
end
end
else -- season–season year range
tCOinS_date.rftchron = season[input.month] .. '–' .. season[input.month2]; -- season–season year range; used in journal metadata only
end
end
end
end
tCOinS_date.rftdate = date;
 
tCOinS_date.rftdate = tostring (date);
return; -- done
return; -- done
end
end
-- here for gregorian calendar dates
if 0 ~= input.day then
if 0 ~= input.day then
date = string.format ('%s-%.2d-%.2d', input.year, tonumber(input.month), tonumber(input.day)); -- whole date
date = string.format ('%s-%.2d-%.2d', input.year, tonumber(input.month), tonumber(input.day)); -- whole date
Line 569: Line 577:
elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash
elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash
month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]);
month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]);
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
month = get_season_number (month, param); -- <month> can only be winter or summer; also for metadata
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
if (month ~= cfg.date_names['en'].season['Winter']) and (month ~= cfg.date_names['en'].season['Summer']) then
return false; -- not Summer or Winter; abandon
end
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
month = get_season_number (month, param); -- for metadata


elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
Line 619: Line 629:
year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
if in_array (param, {'date', 'publication-date', 'year'}) then
add_prop_cat ('year-range-abbreviated');
end


if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003
if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003
year2 = century .. year2; -- add the century to year2 for comparisons
year2 = century .. year2; -- add the century to year2 for comparisons
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if in_array (param, {'date', 'publication-date', 'year'}) then -- here when 'valid' abbreviated year range; if one of these parameters
add_prop_cat ('year-range-abbreviated'); -- add properties cat
end


elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
Line 639: Line 649:
end
end


if 'access-date' == param then -- test accessdate here because we have numerical date parts
if param ~= 'date' then -- CITEREF disambiguation only allowed in |date=; |year= & |publication-date= promote to date
if anchor_year:match ('%l$') then
return false;
end
end
 
if 'access-date' == param then -- test access-date here because we have numerical date parts
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; access-date must not be a range
if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then
if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then
return false; -- return false when accessdate out of bounds
return false; -- return false when access-date out of bounds
end
end
else
else
return false; -- return false when accessdate is a range of two dates
return false; -- return false when access-date is a range of two dates
end
end
 
if 'archive-date' == param then -- test archive-date here because we have numerical date parts
if not (0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
0 == year2 and 0 == month2 and 0 == day2) then -- none of these; archive-date must not be a range
return false; -- return false when archive-date is a range of two dates
end
end
end
end
Line 915: Line 938:
};
};


if t.a then -- if this date has an anchor year capture
if t.a then -- if this date has an anchor year capture (all convertable date formats except ymd)
t.y = t.a; -- use the anchor year capture when reassembling the date
if t.y2 then -- for year range date formats
t.y2 = t.a; -- use the anchor year capture when reassembling the date
else -- here for single date formats (except ymd)
t.y = t.a; -- use the anchor year capture when reassembling the date
end
end
end


if tonumber(t.m) then -- if raw month is a number (converting from ymd)
if tonumber(t.m) then -- if raw month is a number (converting from ymd)
if 's' == mon_len then -- if we are to use abbreviated month names
if 's' == mon_len then -- if we are to use abbreviated month names
t.m = cfg.date_names['inv_local_s'][tonumber(t.m)]; -- convert it to a month name
t.m = cfg.date_names['inv_local_short'][tonumber(t.m)]; -- convert it to a month name
else
else
t.m = cfg.date_names['inv_local_l'][tonumber(t.m)]; -- convert it to a month name
t.m = cfg.date_names['inv_local_long'][tonumber(t.m)]; -- convert it to a month name
end
end
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present
Line 938: Line 965:
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic)
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic)
if 0 == t[mon] then return; end -- seasons and named dates can't be converted
if 0 == t[mon] then return; end -- seasons and named dates can't be converted
t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_s'][t[mon]]) or cfg.date_names['inv_local_l'][t[mon]]; -- fetch month name according to length
t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_short'][t[mon]]) or cfg.date_names['inv_local_long'][t[mon]]; -- fetch month name according to length
end
end
end
end
Line 1,019: Line 1,046:
date_parameters_list[param_name].val = new_date; -- update date in date list
date_parameters_list[param_name].val = new_date; -- update date in date list
result = true; -- and announce that changes have been made
result = true; -- and announce that changes have been made
break;
end
end
end -- if
end -- if
Line 1,057: Line 1,085:
--[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------
--[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------


Attempts to translate English month names to local-language month names using names supplied by MediaWiki's
Attempts to translate English date names to local-language date names using names supplied by MediaWiki's
date parser function.  This is simple name-for-name replacement and may not work for all languages.
date parser function.  This is simple name-for-name replacement and may not work for all languages.


Line 1,071: Line 1,099:
local date;
local date;
local sources_t = {
{cfg.date_names.en.long, cfg.date_names.inv_local_long}, -- for translating long English month names to long local month names
{cfg.date_names.en.short, cfg.date_names.inv_local_short}, -- short month names
{cfg.date_names.en.quarter, cfg.date_names.inv_local_quarter}, -- quarter date names
{cfg.date_names.en.season, cfg.date_names.inv_local_season}, -- season date nam
{cfg.date_names.en.named, cfg.date_names.inv_local_named}, -- named dates
}
local function is_xlateable (month) -- local function to get local date name that replaces existing English-language date name
for _, date_names_t in ipairs (sources_t) do -- for each sequence table in date_names_t
if date_names_t[1][month] then -- if date name is English month (long or short), quarter, season or named and
if date_names_t[2][date_names_t[1][month]] then -- if there is a matching local date name
return date_names_t[2][date_names_t[1][month]]; -- return the local date name
end
end
end
end
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
if is_set(param_val.val) then -- if the parameter has a value
if is_set(param_val.val) then -- if the parameter has a value
date = param_val.val;
date = param_val.val;
for month in mw.ustring.gmatch (date, '%a+') do -- iterate through all dates in the date (single date or date range)
for month in mw.ustring.gmatch (date, '[%a ]+') do -- iterate through all date names in the date (single date or date range)
if cfg.date_names.en.long[month] then
month = mw.text.trim (month); -- this because quarterly dates contain whitespace
mode = 'F'; -- English name is long so use long local name
xlate = is_xlateable (month); -- get translate <month>; returns translation or nil
elseif cfg.date_names.en.short[month] then
mode = 'M'; -- English name is short so use short local name
if xlate then
else
mode = nil; -- not an English month name; could be local language month name or an English season name
end
if mode then -- might be a season
xlate = lang_object:formatDate(mode, '1' .. month); -- translate the month name to this local language
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation
date_parameters_list[param_name].val = date; -- save the translated date
date_parameters_list[param_name].val = date; -- save the translated date
Line 1,118: Line 1,158:


cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration
cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration
end
--[[--------------------------< A R C H I V E _ D A T E _ C H E C K >------------------------------------------
Compare value in |archive-date= with the timestamp in Wayback machine urls.  Emits an error message when |archive-date=
does not match the timestamp.
]]
local function archive_date_check (archive_date, archive_url_timestamp)
local good, archive_date_ts = pcall (lang_object.formatDate, lang_object, 'Ymd', archive_date); -- |archive-date= value to YYYYMMDD format
-- local archive_date_ts = lang_object:formatDate ('Ymd', archive_date); -- |archive-date= value to YYYYMMDD format
if good then
if not archive_url_timestamp:find (archive_date_ts, 1, true) then -- plain text find; begin search at position 1
set_message ('err_archive_date_url_ts_mismatch'); -- emit an error message
end
end
end
end


Line 1,125: Line 1,184:


return { -- return exported functions
return { -- return exported functions
archive_date_check = archive_date_check,
date_hyphen_to_dash = date_hyphen_to_dash,
date_name_xlate = date_name_xlate,
dates = dates,
dates = dates,
reformat_dates = reformat_dates,
set_selected_modules = set_selected_modules,
year_date_check = year_date_check,
year_date_check = year_date_check,
reformat_dates = reformat_dates,
date_hyphen_to_dash = date_hyphen_to_dash,
date_name_xlate = date_name_xlate,
set_selected_modules = set_selected_modules
}
}