Module:Citation/CS1/Identifiers: Difference between revisions

Jump to navigation Jump to search
m (1 revision imported)
ww>Trappist the monk
(tweak to support new oclc limit;)
Line 164: Line 164:




--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------
--[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------


returns true if:
for biorxiv, returns true if:
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
for medrxiv, returns true if:
2020-01-01T00:00Z <= medrxiv_date < today + 2 days
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
The medrxiv identifier has a start date of 2020-01-01.  The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800


biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
<rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC
today is the current date at time 00:00:00 UTC plus 48 hours
<today> is the current date at time 00:00:00 UTC plus 48 hours
if today is 2015-01-01T00:00:00 then
if today's date is 2023-01-01T00:00:00 then
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today
adding 24 hours gives 2023-01-02T00:00:00 – one second more than today
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow
adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow


This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
inputs:
apparently doesn't understand non-English date month names. This function will always return false when the date
<y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier
contains a non-English month name because good1 is false after the call to lang_object.formatDate().  To get
<select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b'
around that call this function with date parts and create a YYYY-MM-DD format date.


]=]
]=]


local function is_valid_biorxiv_date (y, m, d)
local function is_valid_rxiv_date (y, m, d, select)
local biorxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date
if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12
return false;
end
if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer?
return false;
end
local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string
local good1, good2;
local good1, good2;
local biorxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
local lang_object = mw.getContentLanguage();
local lang_object = mw.getContentLanguage();


good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date); -- convert biorxiv_date value to Unix timestamp  
good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp  
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison;
rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison;
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
else
else
Line 200: Line 209:
end
end


return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date
local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp
 
return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date
end
end


Line 367: Line 378:
if is_set (class) then
if is_set (class) then
if id:match ('^%d+') then
if id:match ('^%d+') then
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
else
else
set_message ('err_class_ignored');
set_message ('err_class_ignored');
Line 399: Line 410:
local access = options.access;
local access = options.access;
local handler = options.handler;
local handler = options.handler;
local ignore_invalid = options.accept;
local err_type;
local err_type;
local err_msg = '';
local err_msg = '';
Line 421: Line 433:
if id:find('&%.') then
if id:find('&%.') then
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
end
if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.'
set_message ('maint_bibcode');
end
end
end
end
end
end


if is_set (err_type) then -- if there was an error detected
if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used
set_message ('err_bad_bibcode', {err_type});
set_message ('err_bad_bibcode', {err_type});
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
end
end


Line 456: Line 470:
local patterns = {
local patterns = {
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
'^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
}
}
Line 466: Line 480:


if m then -- m is nil when id is the six-digit form
if m then -- m is nil when id is the six-digit form
if not is_valid_biorxiv_date (y, m, d) then -- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit
break; -- date fail; break out early so we don't unset the error message
break; -- date fail; break out early so we don't unset the error message
end
end
Line 543: Line 557:
local text;
local text;
if is_set (inactive) then
if is_set (inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date
local inactive_month, good;
local inactive_month, good;


Line 554: Line 568:
end
end
end
end
else
end -- otherwise, |doi-broken-date= has something but it isn't a date
inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
end
if is_set (inactive_year) and is_set (inactive_month) then
if is_set (inactive_year) and is_set (inactive_month) then
Line 571: Line 583:


local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported  
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-5]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999
'^[^1-6]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
'^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
'^%d%d%d%d%d%d+', -- 6 or more digits
'^%d%d%d%d%d%d+', -- 6 or more digits
Line 1,012: Line 1,024:
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
end
--[[--------------------------< M E D R X I V >-----------------------------------------------------------------
Format medRxiv ID and do simple error checking.  Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a
yyyy.mm.dd. date and suffixed with an optional version identifier.  Ealiest date accepted is 2020.01.01
The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits:
https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2
]]
local function medrxiv (options)
local id = options.id;
local handler = options.handler;
local err_msg_flag = true; -- flag; assume that there will be an error
local patterns = {
'%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later)
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later)
}
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
if id:match (pattern) then
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier
if m then -- m is nil when id is the 8-digit form
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit
break; -- date fail; break out early so we don't unset the error message
end
end
err_msg_flag = nil; -- we found a match so unset the error message
break; -- and done
end
end -- <err_msg_flag> remains set here when no match
if err_msg_flag then
options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS
set_message ('err_bad_medrxiv'); -- and set the error message
end
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
prefix = handler.prefix, id = id, separator = handler.separator,
encode = handler.encode, access = handler.access});
end
end


Line 1,073: Line 1,131:
elseif id:match('^%d+$') then -- no prefix
elseif id:match('^%d+$') then -- no prefix
number = id; -- get the number
number = id; -- get the number
if 10 < number:len() then
if tonumber (id) > handler.id_limit then
number = nil; -- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers
number = nil; -- unset when id value exceeds the limit
end
end
end
end
Line 1,535: Line 1,593:
['JSTOR'] = jstor,
['JSTOR'] = jstor,
['LCCN'] = lccn,
['LCCN'] = lccn,
['MEDRXIV'] = medrxiv,
['MR'] = mr,
['MR'] = mr,
['OCLC'] = oclc,
['OCLC'] = oclc,