Module:Citation/CS1/Configuration: Difference between revisions

m
1 revision imported
wp>Trappist the monk
(bump ssrn;)
m (1 revision imported)
 
(6 intermediate revisions by 4 users not shown)
Line 1: Line 1:
local lang_obj = mw.language.getContentLanguage(); -- make a language object for the local language; used here for languages and dates
--[[--------------------------< S E T T I N G S >--------------------------------------------------------------
boolean settings used to control various things.  these setting located here to make them easy to find
]]
-- these settings local to this module only
local local_digits_from_mediawiki = false; -- for i18n; when true, module fills date_names['local_digits'] from MediaWiki; manual fill required else; always false at en.wiki
local local_date_names_from_mediawiki = false; -- for i18n; when true, module fills date_names['local']['long'] and date_names['local']['short'] from MediaWiki;
-- manual translation required else; ; always false at en.wiki
-- these settings exported to other modules
local use_identifier_redirects = true; -- when true use redirect name for identifier label links; always true at en.wiki
local local_lang_cat_enable = false; -- when true categorizes pages where |language=<local wiki's language>; always false at en.wiki
local date_name_auto_xlate_enable = false; -- when true translates English month-names to the local-wiki's language month names; always false at en.wiki
local date_digit_auto_xlate_enable = false; -- when true translates Western date digit to the local-wiki's language digits (date_names['local_digits']); always false at en.wiki
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------


List of namespaces that should not be included in citation error categories.
List of namespaces identifiers for namespaces that will not be included in citation error categories.
Same as setting notracking = true by default.
Same as setting notracking = true by default.


Note: Namespace names should use underscores instead of spaces.
For wikis that have a current version of Module:cs1 documentation support, this #invoke will return an unordered
list of namespace names and their associated identifiers:
{{#invoke:cs1 documentation support|uncategorized_namespace_lister|all=<anything>}}


]]
]]


local uncategorized_namespaces = { 'User', 'Talk', 'User_talk', 'Wikipedia_talk',
uncategorized_namespaces_t = {[2]=true}; -- init with user namespace id
'File_talk', 'Template_talk', 'Help_talk', 'Category_talk', 'Portal_talk',
for k, _ in pairs (mw.site.talkNamespaces) do -- add all talk namespace ids
'Book_talk', 'Draft_talk', 'Education_Program_talk', 'Module_talk', 'MediaWiki_talk' };
uncategorized_namespaces_t[k] = true;
end
 
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases', '/[^/]*[Ll]og', '/[Aa]rchive'}; -- list of Lua patterns found in page names of pages we should not categorize
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases', '/[^/]*[Ll]og', '/[Aa]rchive'}; -- list of Lua patterns found in page names of pages we should not categorize


Line 28: Line 52:
['archived-dead'] = 'Archived from $1 on $2',
['archived-dead'] = 'Archived from $1 on $2',
['archived-live'] = '$1 from the original on $2',
['archived-live'] = '$1 from the original on $2',
['archived-missing'] = 'Archived from the original$1 on $2',
['archived-unfit'] = 'Archived from the original on ',
['archived-unfit'] = 'Archived from the original on ',
['archived'] = 'Archived',
['archived'] = 'Archived',
Line 42: Line 65:
['inset'] = '$1 inset',
['inset'] = '$1 inset',
['interview'] = 'Interviewed by $1',
['interview'] = 'Interviewed by $1',
['lay summary'] = 'Lay summary',
['mismatch'] = '<code class="cs1-code">&#124;$1=</code> / <code class="cs1-code">&#124;$2=</code> mismatch', -- $1 is year param name; $2 is date param name
['mismatch'] = '<code class="cs1-code">&#124;$1=</code> / <code class="cs1-code">&#124;$2=</code> mismatch', -- $1 is year param name; $2 is date param name
['newsgroup'] = '[[Usenet newsgroup|Newsgroup]]:&nbsp;$1',
['newsgroup'] = '[[Usenet newsgroup|Newsgroup]]:&nbsp;$1',
Line 60: Line 82:


['vol'] = '$1 Vol.&nbsp;$2', -- $1 is sepc; bold journal style volume is in presentation{}
['vol'] = '$1 Vol.&nbsp;$2', -- $1 is sepc; bold journal style volume is in presentation{}
['vol-no'] = '$1 Vol.&nbsp;$2, no.&nbsp;$3', -- sepc, volume, issue (alternatively insert $1 after $2, but then we'd also have to change capitalization)
['vol-no'] = '$1 Vol.&nbsp;$2, no.&nbsp;$3', -- sepc, volume, issue (alternatively insert $1 after $2, but then we'd also have to change capitalization)
['issue'] = '$1 No.&nbsp;$2', -- $1 is sepc
['issue'] = '$1 No.&nbsp;$2', -- $1 is sepc
['art'] = '$1 Art.&nbsp;$2', -- $1 is sepc; for {{cite conference}} only
['vol-art'] = '$1 Vol.&nbsp;$2, art.&nbsp;$3', -- sepc, volume, article-number; for {{cite conference}} only


['j-vol'] = '$1 $2', -- sepc, volume; bold journal volume is in presentation{}
['j-vol'] = '$1 $2', -- sepc, volume; bold journal volume is in presentation{}
['j-issue'] = ' ($1)',
['j-issue'] = ' ($1)',
['j-article-num'] = ' $1', -- TODO: any punctuation here? static text?


['nopp'] = '$1 $2'; -- page(s) without prefix; $1 is sepc
['nopp'] = '$1 $2'; -- page(s) without prefix; $1 is sepc
Line 108: Line 134:
used as class attributes in the <cite> tag that encloses the citation so these names may not contain spaces while
used as class attributes in the <cite> tag that encloses the citation so these names may not contain spaces while
the canonical template name may.  These names are used in warning_msg_e and warning_msg_m to create links to the
the canonical template name may.  These names are used in warning_msg_e and warning_msg_m to create links to the
template's documentation when an article is displayed in preivew mode.
template's documentation when an article is displayed in preview mode.


Most cs1|2 template |CitationClass= values at en.wiki match their canonical template names so are not listed here.
Most cs1|2 template |CitationClass= values at en.wiki match their canonical template names so are not listed here.
Line 115: Line 141:


local citation_class_map_t = { -- TODO: if kept, these and all other config.CitationClass 'names' require some sort of i18n
local citation_class_map_t = { -- TODO: if kept, these and all other config.CitationClass 'names' require some sort of i18n
['audio-visual'] = 'AV media', -- TODO: move to ~/Configuration
['arxiv'] = 'arXiv',
['audio-visual'] = 'AV media',
['AV-media-notes'] = 'AV media notes',
['AV-media-notes'] = 'AV media notes',
['biorxiv'] = 'bioRxiv',
['citeseerx'] = 'CiteSeerX',
['encyclopaedia'] = 'encyclopedia',
['encyclopaedia'] = 'encyclopedia',
['mailinglist'] = 'mailing list',
['mailinglist'] = 'mailing list',
['pressrelease'] = 'press release'
['medrxiv'] = 'medRxiv',
['pressrelease'] = 'press release',
['ssrn'] = 'SSRN',
['techreport'] = 'tech report',
}
}


Line 131: Line 163:


local et_al_patterns = {
local et_al_patterns = {
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.\"']*$", -- variations on the 'et al' theme
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.;,\"']*$", -- variations on the 'et al' theme
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.\"']*$", -- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match)
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.;,\"']*$", -- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match)
"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al.
"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al.
"%[%[ *[Ee][Tt]%.? *[Aa][Ll]%.? *%]%]", -- a wikilinked form
"%[%[ *[Ee][Tt]%.? *[Aa][Ll]%.? *%]%]", -- a wikilinked form
"%(%( *[Ee][Tt]%.? *[Aa][Ll]%.? *%)%)", -- a double-bracketed form (to counter partial removal of ((...)) syntax)
"%(%( *[Ee][Tt]%.? *[Aa][Ll]%.? *%)%)", -- a double-bracketed form (to counter partial removal of ((...)) syntax)
"[%(%[] *[Ee][Tt]%.? *[Aa][Ll]%.? *[%)%]]", -- a bracketed form
"[%(%[] *[Ee][Tt]%.? *[Aa][Ll]%.? *[%)%]]", -- a bracketed form
}
}
Line 162: Line 194:


['format'] = ' <span class="cs1-format">($1)</span>', -- for |format=, |chapter-format=, etc.
['format'] = ' <span class="cs1-format">($1)</span>', -- for |format=, |chapter-format=, etc.
['interwiki'] = ' <span class="cs1-format">[in $1]</span>', -- for interwiki-language-linked author, editor, etc
['interproj'] = ' <span class="cs1-format">[at $1]</span>', -- for interwiki-project-linked author, editor, etc (:d: and :s: supported; :w: ignored)


-- various access levels, for |access=, |doi-access=, |arxiv=, ...
-- various access levels, for |access=, |doi-access=, |arxiv=, ...
Line 227: Line 261:
['ArchiveFormat'] = 'archive-format',
['ArchiveFormat'] = 'archive-format',
['ArchiveURL'] = {'archive-url', 'archiveurl'}, -- Used by InternetArchiveBot
['ArchiveURL'] = {'archive-url', 'archiveurl'}, -- Used by InternetArchiveBot
['ArticleNumber'] = 'article-number',
['ASINTLD'] = 'asin-tld',
['ASINTLD'] = 'asin-tld',
['At'] = 'at', -- Used by InternetArchiveBot
['At'] = 'at', -- Used by InternetArchiveBot
Line 262: Line 297:
['Issue'] = {'issue', 'number'},
['Issue'] = {'issue', 'number'},
['Language'] = {'language', 'lang'},
['Language'] = {'language', 'lang'},
['LayDate'] = 'lay-date',
['LayFormat'] = 'lay-format',
['LaySource'] = 'lay-source',
['LayURL'] = 'lay-url',
['MailingList'] = {'mailing-list', 'mailinglist'}, -- cite mailing list only
['MailingList'] = {'mailing-list', 'mailinglist'}, -- cite mailing list only
['Map'] = 'map', -- cite map only
['Map'] = 'map', -- cite map only
Line 313: Line 344:
['Title'] = 'title', -- Used by InternetArchiveBot
['Title'] = 'title', -- Used by InternetArchiveBot
['TitleLink'] = {'title-link', 'episode-link', 'episodelink'}, -- Used by InternetArchiveBot
['TitleLink'] = {'title-link', 'episode-link', 'episodelink'}, -- Used by InternetArchiveBot
['TitleNote'] = 'department',
['TitleNote'] = {'title-note', 'department'},
['TitleType'] = {'type', 'medium'},
['TitleType'] = {'type', 'medium'},
['TransChapter'] = {'trans-article', 'trans-chapter', 'trans-contribution',
['TransChapter'] = {'trans-article', 'trans-chapter', 'trans-contribution',
Line 319: Line 350:
['Transcript'] = 'transcript',
['Transcript'] = 'transcript',
['TranscriptFormat'] = 'transcript-format',
['TranscriptFormat'] = 'transcript-format',
['TranscriptURL'] = {'transcript-url', 'transcripturl'}, -- Used by InternetArchiveBot
['TranscriptURL'] = 'transcript-url', -- Used by InternetArchiveBot
['TransMap'] = 'trans-map', -- cite map only
['TransMap'] = 'trans-map', -- cite map only
['TransPeriodical'] = {'trans-journal', 'trans-magazine', 'trans-newspaper',
['TransPeriodical'] = {'trans-journal', 'trans-magazine', 'trans-newspaper',
Line 334: Line 365:
['Year'] = 'year',
['Year'] = 'year',


['AuthorList-First'] = {"first#", "author-first#", "author#-first", "given#",
['AuthorList-First'] = {"first#", "author-first#", "author#-first", "author-given#", "author#-given",
"author-given#", "author#-given"},
"subject-first#", "subject#-first", "subject-given#", "subject#-given",
['AuthorList-Last'] = {"last#", "author-last#", "author#-last", "surname#",
"given#"},
"author-surname#", "author#-surname", "author#", "subject#", 'host#'},
['AuthorList-Last'] = {"last#", "author-last#", "author#-last", "author-surname#", "author#-surname",
"subject-last#", "subject#-last", "subject-surname#", "subject#-surname",
"author#", 'host#', "subject#", "surname#"},
['AuthorList-Link'] = {"author-link#", "author#-link", "subject-link#",
['AuthorList-Link'] = {"author-link#", "author#-link", "subject-link#",
"subject#-link", "authorlink#", "author#link"},
"subject#-link", "authorlink#", "author#link"},
Line 381: Line 414:
'AuthorList-Mask', 'ContributorList-Mask', 'EditorList-Mask', 'InterviewerList-Mask', 'TranslatorList-Mask', -- name-list mask may have name separators
'AuthorList-Mask', 'ContributorList-Mask', 'EditorList-Mask', 'InterviewerList-Mask', 'TranslatorList-Mask', -- name-list mask may have name separators
'PostScript', 'Quote', 'ScriptQuote', 'TransQuote', 'Ref', -- miscellaneous
'PostScript', 'Quote', 'ScriptQuote', 'TransQuote', 'Ref', -- miscellaneous
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'LayURL', 'MapURL', 'TranscriptURL', 'URL', -- URL-holding parameters
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'MapURL', 'TranscriptURL', 'URL', -- URL-holding parameters
}
}


local url_meta_params = { -- table of aliases[] keys (meta parameters); each key has a table of parameter names for a value
local url_meta_params = { -- table of aliases[] keys (meta parameters); each key has a table of parameter names for a value
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'ID', 'LayURL', 'MapURL', 'TranscriptURL', 'URL', -- parameters allowed to hold urls
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'ID', 'MapURL', 'TranscriptURL', 'URL', -- parameters allowed to hold urls
'Page', 'Pages', 'At', 'QuotePage', 'QuotePages', -- insource locators allowed to hold urls
'Page', 'Pages', 'At', 'QuotePage', 'QuotePages', -- insource locators allowed to hold urls
}
}
Line 410: Line 443:
local punct_skip = {};
local punct_skip = {};
local url_skip = {};
local url_skip = {};
--[[--------------------------< S I N G L E - L E T T E R  S E C O N D - L E V E L  D O M A I N S >----------
this is a list of tlds that are known to have single-letter second-level domain names.  This list does not include
ccTLDs which are accepted in is_domain_name().
]]
local single_letter_2nd_lvl_domains_t = {'cash', 'company', 'foundation', 'org', 'today'};




Line 423: Line 466:
local special_case_translation = {
local special_case_translation = {
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
['ContributorList'] = 'contributors list', -- translation of these names plus translation of the base mainenance category names in maint_cats{} table below
['ContributorList'] = 'contributors list', -- translation of these names plus translation of the base maintenance category names in maint_cats{} table below
['EditorList'] = 'editors list', -- must match the names of the actual categories
['EditorList'] = 'editors list', -- must match the names of the actual categories
['InterviewerList'] = 'interviewers list', -- this group or translations used by name_has_ed_markup() and name_has_mult_names()
['InterviewerList'] = 'interviewers list', -- this group or translations used by name_has_ed_markup() and name_has_mult_names()
Line 436: Line 479:
-- Lua patterns to match generic titles; usually created by bots or reference filling tools
-- Lua patterns to match generic titles; usually created by bots or reference filling tools
-- translators: replace ['local'] = nil with lowercase translation only when bots or tools create generic titles in your language
-- translators: replace ['local'] = nil with lowercase translation only when bots or tools create generic titles in your language
['generic_titles'] = {
-- generic titles and patterns in this table should be lowercase only
-- generic titles and patterns in this table should be lowercase only
-- leave ['local'] nil except when there is a matching generic title in your language
-- leave ['local'] nil except when there is a matching generic title in your language
-- boolean 'true' for plain-text searches; 'false' for pattern searches
-- boolean 'true' for plain-text searches; 'false' for pattern searches
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
 
{['en'] = {'are you a robot', true}, ['local'] = nil},
['generic_titles'] = {
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
['accept'] = {
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
},
{['en'] = {'page not found', true}, ['local'] = nil},
['reject'] = {
{['en'] = {'subscribe to read', true}, ['local'] = nil},
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?unknown[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'are you a robot', true}, ['local'] = nil},
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
{['en'] = {'^404', false}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'page not found', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
{['en'] = {'subscribe to read', true}, ['local'] = nil},
{['en'] = {'login • instagram', true}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?unknown[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'redirecting...', true}, ['local'] = nil},
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'usurped title', true}, ['local'] = nil}, -- added by a GreenC bot
{['en'] = {'^404', false}, ['local'] = nil},
{['en'] = {'webcite query result', true}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'wikiwix\'s cache', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
{['en'] = {'login • instagram', true}, ['local'] = nil},
{['en'] = {'redirecting...', true}, ['local'] = nil},
{['en'] = {'usurped title', true}, ['local'] = nil}, -- added by a GreenC bot
{['en'] = {'webcite query result', true}, ['local'] = nil},
{['en'] = {'wikiwix\'s cache', true}, ['local'] = nil},
}
},
},
-- boolean 'true' for plain-text searches, search string must be lowercase only
-- boolean 'false' for pattern searches
-- leave ['local'] nil except when there is a matching generic name in your language


['generic_names'] = {
['generic_names'] = {
-- generic names and patterns in this table should be lowercase only
['accept'] = {
-- leave ['local'] nil except when there is a matching generic name in your language
{['en'] = {'%[%[[^|]*%(author%) *|[^%]]*%]%]', false}, ['local'] = nil},
-- boolean 'true' for plain-text searches; 'false' for pattern searches
},
{['en'] = {'about us', true}, ['local'] = nil},
['reject'] = {
{['en'] = {'%f[%a][Aa]dvisor%f[%A]', false}, ['local'] = nil},
{['en'] = {'about us', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]dvisor%f[%A]', false}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'allmusic', true}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'business', true}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'cnn', true}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'[,%.%s]%f[e]eds?%.?$', false}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'^eds?[%.,;]', false}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', false}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil},
{['en'] = {'[,%.%s]%f[e]eds?%.?$', false}, ['local'] = nil},
{['en'] = {'%f[%a]]Ee]mail%f[%A]', false}, ['local'] = nil},
{['en'] = {'^eds?[%.,;]', false}, ['local'] = nil},
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', false}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'%f[%a]]Ee]mail%f[%A]', false}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
{['en'] = {'^[Ii]nc%.?$', false}, ['local'] = nil},
{['en'] = {'privacy', true}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'translator', true}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'tumblr', true}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'twitter', true}, ['local'] = nil},
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil},
{['en'] = {'site name', true}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'statement', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
{['en'] = {'submitted', true}, ['local'] = nil},
{['en'] = {'privacy', true}, ['local'] = nil},
{['en'] = {'super.?user', false}, ['local'] = nil},
{['en'] = {'reuters', true}, ['local'] = nil},
{['en'] = {'%f['..is_Latn..'][Uu]ser%f[^'..is_Latn..']', false}, ['local'] = nil},
{['en'] = {'translator', true}, ['local'] = nil},
{['en'] = {'verfasser', true}, ['local'] = nil},
{['en'] = {'tumblr', true}, ['local'] = nil},
}
{['en'] = {'twitter', true}, ['local'] = nil},
{['en'] = {'site name', true}, ['local'] = nil},
{['en'] = {'statement', true}, ['local'] = nil},
{['en'] = {'submitted', true}, ['local'] = nil},
{['en'] = {'super.?user', false}, ['local'] = nil},
{['en'] = {'%f['..is_Latn..'][Uu]ser%f[^'..is_Latn..']', false}, ['local'] = nil},
{['en'] = {'verfasser', true}, ['local'] = nil},
}
}
}
}


Line 519: Line 579:
Easter and Christmas are defined here as 98 and 99, which should be out of the
Easter and Christmas are defined here as 98 and 99, which should be out of the
ISO 8601 (EDTF) range of uses for a while.
ISO 8601 (EDTF) range of uses for a while.
local_date_names_from_mediawiki is a boolean.  When set to:
true – module will fetch local month names from MediaWiki for both date_names['local']['long'] and date_names['local']['short']
false – module will *not* fetch local month names from MediaWiki
Caveat lector:  There is no guarantee that MediaWiki will provide short month names.  At your wiki you can test
the results of the MediaWiki fetch in the debug console with this command (the result is alpha sorted):
=mw.dumpObject (p.date_names['local'])
While the module can fetch month names from MediaWiki, it cannot fetch the quarter, season, and named date names
from MediaWiki.  Those must be translated manually.


]]
]]


local local_date_names_from_mediawiki = true; -- when false, manual translation required for date_names['local']['long'] and date_names['local']['short']
-- when true, module fetches long and short month names from MediaWiki
local date_names = {
local date_names = {
['en'] = { -- English
['en'] = { -- English
Line 530: Line 603:
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
},
},
-- when local_date_names_from_mediawiki = false
['local'] = { -- replace these English date names with the local language equivalents
['local'] = { -- replace these English date names with the local language equivalents
['long'] = {['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12},
['long'] = {['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12},
Line 537: Line 611:
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
},
},
['inv_local_l'] = {}, -- used in date reformatting; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc.
['inv_local_long'] = {}, -- used in date reformatting & translation; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc.
['inv_local_s'] = {}, -- used in date reformatting; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc.
['inv_local_short'] = {}, -- used in date reformatting & translation; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc.
['inv_local_quarter'] = {}, -- used in date translation; copy of date_names['local'].quarter where k/v are inverted: [1]='<local name>' etc.
['inv_local_season'] = {}, -- used in date translation; copy of date_names['local'].season where k/v are inverted: [1]='<local name>' etc.
['inv_local_named'] = {}, -- used in date translation; copy of date_names['local'].named where k/v are inverted: [1]='<local name>' etc.
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9
['xlate_digits'] = {},
['xlate_digits'] = {},
}
}


for name, i in pairs (date_names['local'].long) do -- this table is ['name'] = i
if local_date_names_from_mediawiki then -- if fetching local month names from MediaWiki is enabled
date_names['inv_local_l'][i] = name; -- invert to get [i] = 'name' for conversions from ymd
local long_t = {};
local short_t = {};
for i=1, 12 do -- loop 12x and
local name = lang_obj:formatDate('F', '2022-' .. i .. '-1'); -- get long month name for each i
long_t[name] = i; -- save it
name = lang_obj:formatDate('M', '2022-' .. i .. '-1'); -- get short month name for each i
short_t[name] = i; -- save it
end
date_names['local']['long'] = long_t; -- write the long table – overwrites manual translation
date_names['local']['short'] = short_t; -- write the short table – overwrites manual translation
end
-- create inverted date-name tables for reformatting and/or translation
for _, invert_t in pairs {{'long', 'inv_local_long'}, {'short', 'inv_local_short'}, {'quarter', 'inv_local_quarter'}, {'season', 'inv_local_season'}, {'named', 'inv_local_named'}} do
for name, i in pairs (date_names['local'][invert_t[1]]) do -- this table is ['name'] = i
date_names[invert_t[2]][i] = name; -- invert to get [i] = 'name' for conversions from ymd
end
end
end


for name, i in pairs (date_names['local'].short) do -- this table is ['name'] = i
if local_digits_from_mediawiki then -- if fetching local digits from MediaWiki is enabled
date_names['inv_local_s'][i] = name; -- invert to get [i] = 'name' for conversions from ymd
local digits_t = {};
for i=0, 9 do -- loop 10x and
digits_t [lang_obj:formatNum (i)] = tostring (i); -- format the loop indexer as local lang table index and assign loop indexer (a string) as the value
end
date_names['local_digits'] = digits_t;
end
end


Line 570: Line 666:
'{{ *([Mm]DY) *[|}]', -- 0
'{{ *([Mm]DY) *[|}]', -- 0
}
}
local title_object = mw.title.getCurrentTitle();
local content; -- done this way  so that unused templates appear in unused-template-reports; self-transcluded makes them look like they are used
if 10 ~= title_object.namespace then -- all namespaces except Template
content = title_object:getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
end


local function get_date_format ()
local function get_date_format ()
local content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
-- if title_object.namespace == 10 then -- not in template space so that unused templates appear in unused-template-reports;  
if not content then -- nil content when we're in template
return nil; -- auto-formatting does not work in Template space so don't set global_df
end
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
if match then
if match then
content = content:match ('%b{}', start); -- get the whole template
local use_dates_template = content:match ('%b{}', start); -- get the whole template
if content:match ('| *cs1%-dates *= *[lsy][sy]?') then -- look for |cs1-dates=publication date length access-/archive-date length
if use_dates_template:match ('| *cs1%-dates *= *[lsy][sy]?') then -- look for |cs1-dates=publication date length access-/archive-date length
return match:lower() .. '-' .. content:match ('| *cs1%-dates *= *([lsy][sy]?)');
return match:lower() .. '-' .. use_dates_template:match ('| *cs1%-dates *= *([lsy][sy]?)');
else
else
return match:lower() .. '-all'; -- no |cs1-dates= k/v pair; return value appropriate for use in |df=
return match:lower() .. '-all'; -- no |cs1-dates= k/v pair; return value appropriate for use in |df=
Line 586: Line 691:
end
end


local global_df;
local global_df; -- TODO: add this to <global_cs1_config_t>?




Line 601: Line 706:
local templates_not_using_page = {'audio-visual', 'episode', 'mailinglist', 'newsgroup', 'podcast', 'serial', 'sign', 'speech'}
local templates_not_using_page = {'audio-visual', 'episode', 'mailinglist', 'newsgroup', 'podcast', 'serial', 'sign', 'speech'}


--[[
These tables control when it is appropriate for {{citation}} to render |volume= and/or |issue=.  The parameter
names in the tables constrain {{citation}} so that its renderings match the renderings of the equivalent cs1
templates.  For example, {{cite web}} does not support |volume= so the equivalent {{citation |website=...}} must
not support |volume=.
]]
local citation_no_volume_t = { -- {{citation}} does not render |volume= when these parameters are used
'website', 'mailinglist', 'script-website',
}
local citation_issue_t = { -- {{citation}} may render |issue= when these parameters are used
'journal', 'magazine', 'newspaper', 'periodical', 'work',
'script-journal', 'script-magazine', 'script-newspaper', 'script-periodical', 'script-work',
}


--[[
--[[
Line 762: Line 883:




--[[---------------------< S T R I P M A R K E R S >----------------------------
--[[--------------------------< C S 1 _ C O N F I G _ G E T >--------------------------------------------------
 
fetch and validate values from {{cs1 config}} template to fill <global_cs1_config_t>
 
no error messages; when errors are detected, the parameter value from {{cs1 config}} is blanked.


Common pattern definition location for stripmarkers so that we don't have to go
Supports all parameters and aliases associated with the metaparameters: DisplayAuthors, DisplayContributors,
hunting for them if (when) MediaWiki changes their form.
DisplayEditors, DisplayInterviewers, DisplayTranslators, NameListStyle, and Mode.  The DisplayWhatever metaparameters
accept numeric values only (|display-authors=etal and the like is not supported).


]]
]]


local stripmarkers = {
local global_cs1_config_t = {}; -- TODO: add value returned from get_date_format() to this table?
['any'] = '\127[^\127]*UNIQ%-%-(%a+)%-[%a%d]+%-QINU[^\127]*\127', -- capture returns name of stripmarker
 
['math'] = '\127[^\127]*UNIQ%-%-math%-[%a%d]+%-QINU[^\127]*\127' -- math stripmarkers used in coins_cleanup() and coins_replace_math_stripmarker()
local function get_cs1_config ()
}
-- if title_object.namespace == 10 then -- not in template space so that unused templates appear in unused-template-reports;
if not content then -- nil content when we're in template
return nil; -- auto-formatting does not work in Template space so don't set global_df
end
 
local start = content:find('{{ *[Cc][Ss]1 config *[|}]'); -- <start> is offset into <content> when {{cs1 config}} found; nil else
if start then
local cs1_config_template = content:match ('%b{}', start); -- get the whole template


if not cs1_config_template then
return nil;
end


--[[------------< I N V I S I B L E _ C H A R A C T E R S >---------------------
local params_t = mw.text.split (cs1_config_template:gsub ('^{{%s*', ''):gsub ('%s*}}$', ''), '%s*|%s*'); -- remove '{{' and '}}'; make a sequence of parameter/value pairs (split on the pipe)
table.remove (params_t, 1); -- remove the template name because it isn't a parameter/value pair


This table holds non-printing or invisible characters indexed either by name or
local config_meta_params_t = {'DisplayAuthors', 'DisplayContributors', 'DisplayEditors', 'DisplayInterviewers', 'DisplayTranslators', 'NameListStyle', 'Mode'};
by Unicode group. Values are decimal representations of UTF-8 codes.  The table
local meta_param_map_t = {}; -- list of accepted parameter names usable in {{cs1 config}} goes here
is organized as a table of tables because the Lua pairs keyword returns table
data in an arbitrary order. Here, we want to process the table from top to bottom
for _, meta_param in ipairs (config_meta_params_t) do -- for i18n using <config_meta_params_t>, map template parameter names to their metaparameter equivalents
because the entries at the top of the table are also found in the ranges specified
if 'table' == type (aliases[meta_param]) then -- if <meta_param> is a sequence,
by the entries at the bottom of the table.
for _, param in ipairs (aliases[meta_param]) do -- extract its contents
meta_param_map_t[param] = meta_param; -- and add to <meta_param_map_t>
end
else
meta_param_map_t[aliases[meta_param]] = meta_param; -- not a sequence so just add the parameter to <meta_param_map_t>
end
end
 
local keywords_t = {}; -- map valid keywords to their associate metaparameter; reverse form of <keyword_lists[key] for these metaparameters
for _, metaparam_t in ipairs ({{'NameListStyle', 'name-list-style'}, {'Mode', 'mode'}}) do -- only these metaparameter / keywords_lists key pairs
for _, keyword in ipairs (keywords_lists[metaparam_t[2]]) do -- spin through the list of keywords
keywords_t[keyword] = metaparam_t[1]; -- add [keyword] = metaparameter to the map
end
end
 
for _, param in ipairs (params_t) do -- spin through the {{cs1 config}} parameters and fill <global_cs1_config_t>
local k, v = param:match ('([^=]-)%s*=%s*(.+)'); -- <k> is the parameter name; <v> is parameter's assigned value
if k then
if k:find ('^display') then -- if <k> is one of the |display-<namelist>= parameters
if v:match ('%d+') then -- the assigned value must be digits; doesn't accept 'etal'
global_cs1_config_t[meta_param_map_t[k]]=v; -- add the display param and its value to globals table
end
else
if keywords_t[v] == meta_param_map_t[k] then -- keywords_t[v] returns nil or the metaparam name; these must be the same
global_cs1_config_t[meta_param_map_t[k]]=v; -- add the parameter and its value to globals table
end
end
end
end
end
end


Also here is a pattern that recognizes stripmarkers that begin and end with the
get_cs1_config (); -- fill <global_cs1_config_t>
delete characters.  The nowiki stripmarker is not an error but some others are
because the parameter values that include them become part of the template's
metadata before stripmarker replacement.


]]


local invisible_defs = {
--[[---------------------< S T R I P M A R K E R S >----------------------------
del = '\127', -- used to distinguish between stripmarker and del char
zwj = '\226\128\141', -- used with capture because zwj may be allowed
}


local invisible_chars = {
Common pattern definition location for stripmarkers so that we don't have to go
{'replacement', '\239\191\189'}, -- U+FFFD, EF BF BD
hunting for them if (when) MediaWiki changes their form.
{'zero width joiner', '('.. invisible_defs.zwj .. ')'}, -- U+200D, E2 80 8D; capture because zwj may be allowed
 
{'zero width space', '\226\128\139'}, -- U+200B, E2 80 8B
]]
{'hair space', '\226\128\138'}, -- U+200A, E2 80 8A
 
{'soft hyphen', '\194\173'}, -- U+00AD, C2 AD
local stripmarkers = {
{'horizontal tab', '\009'}, -- U+0009 (HT), 09
['any'] = '\127[^\127]*UNIQ%-%-(%a+)%-[%a%d]+%-QINU[^\127]*\127', -- capture returns name of stripmarker
{'line feed', '\010'}, -- U+000A (LF), 0A
['math'] = '\127[^\127]*UNIQ%-%-math%-[%a%d]+%-QINU[^\127]*\127' -- math stripmarkers used in coins_cleanup() and coins_replace_math_stripmarker()
{'no-break space', '\194\160'}, -- U+00A0 (NBSP), C2 A0
{'carriage return', '\013'}, -- U+000D (CR), 0D
{'stripmarker', stripmarkers.any}, -- stripmarker; may or may not be an error; capture returns the stripmaker type
{'delete', '('.. invisible_defs.del .. ')'}, -- U+007F (DEL), 7F; must be done after stripmarker test; capture to distinguish isolated del chars not part of stripmarker
{'C0 control', '[\000-\008\011\012\014-\031]'}, -- U+0000–U+001F (NULL–US), 00–1F (except HT, LF, CR (09, 0A, 0D))
{'C1 control', '[\194\128-\194\159]'}, -- U+0080–U+009F (XXX–APC), C2 80 – C2 9F
-- {'Specials', '[\239\191\185-\239\191\191]'}, -- U+FFF9-U+FFFF, EF BF B9 – EF BF BF
-- {'Private use area', '[\238\128\128-\239\163\191]'}, -- U+E000–U+F8FF, EE 80 80 – EF A3 BF
-- {'Supplementary Private Use Area-A', '[\243\176\128\128-\243\191\191\189]'}, -- U+F0000–U+FFFFD, F3 B0 80 80 – F3 BF BF BD
-- {'Supplementary Private Use Area-B', '[\244\128\128\128-\244\143\191\189]'}, -- U+100000–U+10FFFD, F4 80 80 80 – F4 8F BF BD
}
}


--[[


Indic script makes use of zero width joiner as a character modifier so zwj
--[[------------< I N V I S I B L E _ C H A R A C T E R S >---------------------
characters must be left in.  This pattern covers all of the unicode characters
 
for these languages:
This table holds non-printing or invisible characters indexed either by name or
Devanagari 0900–097F – https://unicode.org/charts/PDF/U0900.pdf
by Unicode group. Values are decimal representations of UTF-8 codes. The table
Devanagari extended A8E0–A8FF – https://unicode.org/charts/PDF/UA8E0.pdf
is organized as a table of tables because the Lua pairs keyword returns table
Bengali 0980–09FF – https://unicode.org/charts/PDF/U0980.pdf
data in an arbitrary order. Here, we want to process the table from top to bottom
Gurmukhi 0A00–0A7F – https://unicode.org/charts/PDF/U0A00.pdf
because the entries at the top of the table are also found in the ranges specified
Gujarati 0A80–0AFF – https://unicode.org/charts/PDF/U0A80.pdf
by the entries at the bottom of the table.
Oriya 0B00–0B7F – https://unicode.org/charts/PDF/U0B00.pdf
 
Tamil 0B80–0BFF – https://unicode.org/charts/PDF/U0B80.pdf
Also here is a pattern that recognizes stripmarkers that begin and end with the
Telugu 0C00–0C7F – https://unicode.org/charts/PDF/U0C00.pdf
delete characters. The nowiki stripmarker is not an error but some others are
Kannada 0C80–0CFF – https://unicode.org/charts/PDF/U0C80.pdf
because the parameter values that include them become part of the template's
Malayalam 0D00–0D7F – https://unicode.org/charts/PDF/U0D00.pdf
metadata before stripmarker replacement.
plus the not-necessarily Indic scripts for Sinhala and Burmese:
Sinhala 0D80-0DFF - https://unicode.org/charts/PDF/U0D80.pdf
Myanmar 1000-109F - https://unicode.org/charts/PDF/U1000.pdf
Myanmar extended A AA60-AA7F - https://unicode.org/charts/PDF/UAA60.pdf
Myanmar extended B A9E0-A9FF - https://unicode.org/charts/PDF/UA9E0.pdf
the pattern is used by has_invisible_chars() and coins_cleanup()


]]
]]


local indic_script = '[\224\164\128-\224\181\191\224\163\160-\224\183\191\225\128\128-\225\130\159\234\167\160-\234\167\191\234\169\160-\234\169\191]';
local invisible_defs = {
 
del = '\127', -- used to distinguish between stripmarker and del char
-- list of emoji that use zwj character (U+200D) to combine with another emoji
zwj = '\226\128\141', -- used with capture because zwj may be allowed
local emoji = { -- indexes are decimal forms of the hex values in U+xxxx
}
[127752] = true, -- U+1F308 🌈 rainbow
 
[127806] = true, -- U+1F33E 🌾 ear of rice
local invisible_chars = {
[127859] = true, -- U+1F373 🍳 cooking
{'replacement', '\239\191\189'}, -- U+FFFD, EF BF BD
[127891] = true, -- U+1F393 🎓 graduation cap
{'zero width joiner', '('.. invisible_defs.zwj .. ')'}, -- U+200D, E2 80 8D; capture because zwj may be allowed
[127908] = true, -- U+1F3A4 🎤 microphone
{'zero width space', '\226\128\139'}, -- U+200B, E2 80 8B
[127912] = true, -- U+1F3A8 🎨 artist palette
{'hair space', '\226\128\138'}, -- U+200A, E2 80 8A
[127979] = true, -- U+1F3EB 🏫 school
{'soft hyphen', '\194\173'}, -- U+00AD, C2 AD
[127981] = true, -- U+1F3ED 🏭 factory
{'horizontal tab', '\009'}, -- U+0009 (HT), 09
[128102] = true, -- U+1F466 👦 boy
{'line feed', '\010'}, -- U+000A (LF), 0A
[128103] = true, -- U+1F467 👧 girl
{'no-break space', '\194\160'}, -- U+00A0 (NBSP), C2 A0
[128104] = true, -- U+1F468 👨 man
{'carriage return', '\013'}, -- U+000D (CR), 0D
[128105] = true, -- U+1F469 👩 woman
{'stripmarker', stripmarkers.any}, -- stripmarker; may or may not be an error; capture returns the stripmaker type
[128139] = true, -- U+1F48B 💋 kiss mark
{'delete', '('.. invisible_defs.del .. ')'}, -- U+007F (DEL), 7F; must be done after stripmarker test; capture to distinguish isolated del chars not part of stripmarker
[128187] = true, -- U+1F4BB 💻 personal computer
{'C0 control', '[\000-\008\011\012\014-\031]'}, -- U+0000–U+001F (NULL–US), 00–1F (except HT, LF, CR (09, 0A, 0D))
[128188] = true, -- U+1F4BC 💼 brief case
{'C1 control', '[\194\128-\194\159]'}, -- U+0080–U+009F (XXX–APC), C2 80 – C2 9F
[128295] = true, -- U+1F527 🔧 wrench
-- {'Specials', '[\239\191\185-\239\191\191]'}, -- U+FFF9-U+FFFF, EF BF B9 – EF BF BF
[128300] = true, -- U+1F52C 🔬 microscope
-- {'Private use area', '[\238\128\128-\239\163\191]'}, -- U+E000–U+F8FF, EE 80 80 – EF A3 BF
[128488] = true, -- U+1F5E8 🗨 left speech bubble
-- {'Supplementary Private Use Area-A', '[\243\176\128\128-\243\191\191\189]'}, -- U+F0000–U+FFFFD, F3 B0 80 80 – F3 BF BF BD
[128640] = true, -- U+1F680 🚀 rocket
-- {'Supplementary Private Use Area-B', '[\244\128\128\128-\244\143\191\189]'}, -- U+100000–U+10FFFD, F4 80 80 80 – F4 8F BF BD
[128658] = true, -- U+1F692 🚒 fire engine
[129309] = true, -- U+1F91D 🤝 handshake
[129455] = true, -- U+1F9AF 🦯 probing cane
[129456] = true, -- U+1F9B0 🦰 emoji component red hair
[129457] = true, -- U+1F9B1 🦱 emoji component curly hair
[129458] = true, -- U+1F9B2 🦲 emoji component bald
[129459] = true, -- U+1F9B3 🦳 emoji component white hair
[129466] = true, -- U+1F9BA 🦺 safety vest
[129468] = true, -- U+1F9BC 🦼 motorized wheelchair
[129469] = true, -- U+1F9BD 🦽 manual wheelchair
[129489] = true, -- U+1F9D1 🧑 adult
[9760] = true, -- U+2620 ☠ skull and crossbones
[9792] = true, -- U+2640 ♀ female sign
[9794] = true, -- U+2642 ♂ male sign
[9877] = true, -- U+2695 ⚕ staff of aesculapius
[9878] = true, -- U+2696 ⚖ scales
[9992] = true, -- U+2708 ✈ airplane
[10084] = true, -- U+2764 ❤ heavy black heart
}
}


--[[


--[[----------------------< L A N G U A G E  S U P P O R T >-------------------
Indic script makes use of zero width joiner as a character modifier so zwj
 
characters must be left in.  This pattern covers all of the unicode characters
These tables and constants support various language-specific functionality.
for these languages:
Devanagari 0900–097F – https://unicode.org/charts/PDF/U0900.pdf
Devanagari extended A8E0–A8FF – https://unicode.org/charts/PDF/UA8E0.pdf
Bengali 0980–09FF – https://unicode.org/charts/PDF/U0980.pdf
Gurmukhi 0A00–0A7F – https://unicode.org/charts/PDF/U0A00.pdf
Gujarati 0A80–0AFF – https://unicode.org/charts/PDF/U0A80.pdf
Oriya 0B00–0B7F – https://unicode.org/charts/PDF/U0B00.pdf
Tamil 0B80–0BFF – https://unicode.org/charts/PDF/U0B80.pdf
Telugu 0C00–0C7F – https://unicode.org/charts/PDF/U0C00.pdf
Kannada 0C80–0CFF – https://unicode.org/charts/PDF/U0C80.pdf
Malayalam 0D00–0D7F – https://unicode.org/charts/PDF/U0D00.pdf
plus the not-necessarily Indic scripts for Sinhala and Burmese:
Sinhala 0D80-0DFF - https://unicode.org/charts/PDF/U0D80.pdf
Myanmar 1000-109F - https://unicode.org/charts/PDF/U1000.pdf
Myanmar extended A AA60-AA7F - https://unicode.org/charts/PDF/UAA60.pdf
Myanmar extended B A9E0-A9FF - https://unicode.org/charts/PDF/UA9E0.pdf
the pattern is used by has_invisible_chars() and coins_cleanup()


]]
]]


local this_wiki_code = mw.getContentLanguage():getCode(); -- get this wiki's language code
local indic_script = '[\224\164\128-\224\181\191\224\163\160-\224\183\191\225\128\128-\225\130\159\234\167\160-\234\167\191\234\169\160-\234\169\191]';
if string.match (mw.site.server, 'wikidata') then
this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on Wikidata so use interface language setting instead
end


local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_code, 'all'); -- get a table of language tag/name pairs known to Wikimedia; used for interwiki tests
-- list of emoji that use a zwj character (U+200D) to combine with another emoji
local mw_languages_by_name_t = {};
-- from: https://unicode.org/Public/emoji/15.0/emoji-zwj-sequences.txt; version: 15.0; 2022-05-06
for k, v in pairs (mw_languages_by_tag_t) do -- build a 'reversed' table name/tag language pairs know to MediaWiki; used for |language=
-- table created by: [[:en:Module:Make emoji zwj table]]
v = mw.ustring.lower (v); -- lowercase for tag fetch; get name's proper case from mw_languages_by_tag_t[<tag>]
local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx
if mw_languages_by_name_t[v] then -- when name already in the table
[9760] = true, -- U+2620 ☠ skull and crossbones
if 2 == #k or 3 == #k then -- if tag does not have subtags
[9792] = true, -- U+2640 ♀ female sign
mw_languages_by_name_t[v] = k; -- prefer the shortest tag for this name
[9794] = true, -- U+2642 ♂ male sign
end
[9877] = true, -- U+2695 ⚕ staff of aesculapius
else -- here when name not in the table
[9878] = true, -- U+2696 ⚖ scales
mw_languages_by_name_t[v] = k; -- so add name and matching tag
[9895] = true, -- U+26A7 ⚧ male with stroke and male and female sign
end
[9992] = true, -- U+2708 ✈ airplane
end
[10052] = true, -- U+2744 ❄ snowflake
 
[10084] = true, -- U+2764 ❤ heavy black heart
local inter_wiki_map = {}; -- map of interwiki prefixes that are language-code prefixes
[11035] = true, -- U+2B1B ⬛ black large square
for k, v in pairs (mw.site.interwikiMap ('local')) do -- spin through the base interwiki map (limited to local)
[127752] = true, -- U+1F308 🌈 rainbow
if mw_languages_by_tag_t[v["prefix"]] then -- if the prefix matches a known language tag
[127787] = true, -- U+1F32B 🌫 fog
inter_wiki_map[v["prefix"]] = true; -- add it to our local map
[127806] = true, -- U+1F33E 🌾 ear of rice
end
[127859] = true, -- U+1F373 🍳 cooking
end
[127868] = true, -- U+1F37C 🍼 baby bottle
[127876] = true, -- U+1F384 🎄 christmas tree
[127891] = true, -- U+1F393 🎓 graduation cap
[127908] = true, -- U+1F3A4 🎤 microphone
[127912] = true, -- U+1F3A8 🎨 artist palette
[127979] = true, -- U+1F3EB 🏫 school
[127981] = true, -- U+1F3ED 🏭 factory
[128102] = true, -- U+1F466 👦 boy
[128103] = true, -- U+1F467 👧 girl
[128104] = true, -- U+1F468 👨 man
[128105] = true, -- U+1F469 👩 woman
[128139] = true, -- U+1F48B 💋 kiss mark
[128168] = true, -- U+1F4A8 💨 dash symbol
[128171] = true, -- U+1F4AB 💫 dizzy symbol
[128187] = true, -- U+1F4BB 💻 personal computer
[128188] = true, -- U+1F4BC 💼 brief case
[128293] = true, -- U+1F525 🔥 fire
[128295] = true, -- U+1F527 🔧 wrench
[128300] = true, -- U+1F52C 🔬 microscope
[128488] = true, -- U+1F5E8 🗨 left speech bubble
[128640] = true, -- U+1F680 🚀 rocket
[128658] = true, -- U+1F692 🚒 fire engine
[129309] = true, -- U+1F91D 🤝 handshake
[129455] = true, -- U+1F9AF 🦯 probing cane
[129456] = true, -- U+1F9B0 🦰 emoji component red hair
[129457] = true, -- U+1F9B1 🦱 emoji component curly hair
[129458] = true, -- U+1F9B2 🦲 emoji component bald
[129459] = true, -- U+1F9B3 🦳 emoji component white hair
[129466] = true, -- U+1F9BA 🦺 safety vest
[129468] = true, -- U+1F9BC 🦼 motorized wheelchair
[129469] = true, -- U+1F9BD 🦽 manual wheelchair
[129489] = true, -- U+1F9D1 🧑 adult
[129657] = true, -- U+1FA79 🩹 adhesive bandage
[129778] = true, -- U+1FAF2 🫲 leftwards hand
}




--[[--------------------< S C R I P T _ L A N G _ C O D E S >-------------------
--[[----------------------< L A N G U A G E   S U P P O R T >-------------------


This table is used to hold ISO 639-1 two-character and ISO 639-3 three-character
These tables and constants support various language-specific functionality.
language codes that apply only to |script-title= and |script-chapter=


]]
]]


local script_lang_codes = {
--local this_wiki_code = mw.getContentLanguage():getCode(); -- get this wiki's language code
'ab', 'am', 'ar', 'be', 'bg', 'bn', 'bo', 'bs', 'dv', 'dz', 'el', 'fa', 'gu',
local this_wiki_code = lang_obj:getCode(); -- get this wiki's language code
'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'lo', 'mk',
if string.match (mw.site.server, 'wikidata') then
'ml', 'mn', 'mr', 'my', 'ne', 'or', 'ota', 'ps', 'ru', 'sd', 'si', 'sr', 'syc',
this_wiki_code = mw.getCurrentFrame():callParserFunction('int', {'lang'}); -- on Wikidata so use interface language setting instead
'ta', 'te', 'tg', 'th', 'ti', 'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh'
end
};


local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_code, 'all'); -- get a table of language tag/name pairs known to Wikimedia; used for interwiki tests
local mw_languages_by_name_t = {};
for k, v in pairs (mw_languages_by_tag_t) do -- build a 'reversed' table name/tag language pairs know to MediaWiki; used for |language=
v = mw.ustring.lower (v); -- lowercase for tag fetch; get name's proper case from mw_languages_by_tag_t[<tag>]
if mw_languages_by_name_t[v] then -- when name already in the table
if 2 == #k or 3 == #k then -- if tag does not have subtags
mw_languages_by_name_t[v] = k; -- prefer the shortest tag for this name
end
else -- here when name not in the table
mw_languages_by_name_t[v] = k; -- so add name and matching tag
end
end


--[[---------------< L A N G U A G E  R E M A P P I N G >----------------------
local inter_wiki_map = {}; -- map of interwiki prefixes that are language-code prefixes
 
for k, v in pairs (mw.site.interwikiMap ('local')) do -- spin through the base interwiki map (limited to local)
These tables hold language information that is different (correct) from MediaWiki's definitions
if mw_languages_by_tag_t[v["prefix"]] then -- if the prefix matches a known language tag
inter_wiki_map[v["prefix"]] = true; -- add it to our local map
end
end


For each ['code'] = 'language name' in lang_code_remap{} there must be a matching ['language name'] = {'language name', 'code'} in lang_name_remap{}


lang_code_remap{}:
--[[--------------------< S C R I P T _ L A N G _ C O D E S >-------------------
key is always lowercase ISO 639-1, -2, -3 language code or a valid lowercase IETF language tag
value is properly spelled and capitalized language name associated with key
only one language name per key;
key/value pair must have matching entry in lang_name_remap{}


lang_name_remap{}:
This table is used to hold ISO 639-1 two-character and ISO 639-3 three-character
key is always lowercase language name
language codes that apply only to |script-title= and |script-chapter=
value is a table the holds correctly spelled and capitalized language name [1] and associated code [2] (code must match a code key in lang_code_remap{})
may have multiple keys referring to a common preferred name and code; For example:
['kolsch'] and ['kölsch'] both refer to 'Kölsch' and 'ksh'


]]
]]


local lang_code_remap = { -- used for |language= and |script-title= / |script-chapter=
local script_lang_codes = {
['als'] = 'Tosk Albanian', -- MediaWiki returns Alemannisch  
'ab', 'am', 'ar', 'be', 'bg', 'bn', 'bo', 'bs', 'dv', 'dz', 'el', 'fa', 'gu', 'he',
['bh'] = 'Bihari', -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri Wikipedia: bh.wikipedia.org
'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'lo', 'mk', 'ml', 'mn',
'mni', 'mr', 'my', 'ne', 'or', 'ota', 'pa', 'ps', 'ru', 'sd', 'si', 'sr', 'syc', 'ta',
'te', 'tg', 'th', 'ti', 'tt', 'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh'
};
 
 
--[[---------------< L A N G U A G E  R E M A P P I N G >----------------------
 
These tables hold language information that is different (correct) from MediaWiki's definitions
 
For each ['<tag>'] = 'language name' in lang_code_remap{} there must be a matching ['language name'] = {'language name', '<tag>'} in lang_name_remap{}
 
lang_tag_remap{}:
key is always lowercase ISO 639-1, -2, -3 language tag or a valid lowercase IETF language tag
value is properly spelled and capitalized language name associated with <tag>
only one language name per <tag>;
key/value pair must have matching entry in lang_name_remap{}
 
lang_name_remap{}:
key is always lowercase language name
value is a table the holds correctly spelled and capitalized language name [1] and associated tag [2] (tag must match a tag key in lang_tag_remap{})
may have multiple keys referring to a common preferred name and tag; For example:
['kolsch'] and ['kölsch'] both refer to 'Kölsch' and 'ksh'
 
]]
 
local lang_tag_remap = { -- used for |language= and |script-title= / |script-chapter=
['als'] = 'Tosk Albanian', -- MediaWiki returns Alemannisch  
['bh'] = 'Bihari', -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri Wikipedia: bh.wikipedia.org
['bla'] = 'Blackfoot', -- MediaWiki/IANA/ISO 639: Siksika; use en.wiki preferred name
['bla'] = 'Blackfoot', -- MediaWiki/IANA/ISO 639: Siksika; use en.wiki preferred name
['bn'] = 'Bengali', -- MediaWiki returns Bangla
['bn'] = 'Bengali', -- MediaWiki returns Bangla
['ca-valencia'] = 'Valencian', -- IETF variant of Catalan
['ca-valencia'] = 'Valencian', -- IETF variant of Catalan
['fkv'] = 'Kven', -- MediaWiki returns Kvensk
['ilo'] = 'Ilocano', -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ilo'] = 'Ilocano', -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ksh'] = 'Kölsch', -- MediaWiki: Colognian; use IANA/ISO 639 preferred name
['ksh'] = 'Kölsch', -- MediaWiki: Colognian; use IANA/ISO 639 preferred name
Line 964: Line 1,171:
}
}


local lang_name_remap = { -- used for |language=
local lang_name_remap = { -- used for |language=; names require proper capitalization; tags must be lowercase
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
Line 975: Line 1,182:
['kolsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name (use non-diacritical o instead of umlaut ö)
['kolsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name (use non-diacritical o instead of umlaut ö)
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name
['kven'] = {'Kven', 'fkv'}, -- Unicode CLDR have decided not to support English language name for these two...
['kvensk'] = {'Kven', 'fkv'}, -- ...they say to refer to IANA registry for English names
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-TW'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese  
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-tw'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese  
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan
['valencian'] = {'Valencian', 'ca-valencia'}, -- variant of Catalan; categorizes as Valencian
}
}


Line 994: Line 1,203:
['local-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['local-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['location-test'] = 'CS1 location test',
['location-test'] = 'CS1 location test',
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 charachters
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 characters
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1 code
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1 code
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
Line 1,009: Line 1,218:
local title_types = {
local title_types = {
['AV-media-notes'] = 'Media notes',
['AV-media-notes'] = 'Media notes',
['document'] = 'Document',
['interview'] = 'Interview',
['interview'] = 'Interview',
['mailinglist'] = 'Mailing list',
['mailinglist'] = 'Mailing list',
Line 1,089: Line 1,299:
hidden = false
hidden = false
  },
  },
err_archive_date_missing_url = {
message = '<code class="cs1-code">&#124;archive-date=</code> requires <code class="cs1-code">&#124;archive-url=</code>',
anchor = 'archive_date_missing_url',
category = 'CS1 errors: archive-url',
hidden = false
},
err_archive_date_url_ts_mismatch = {
message = '<code class="cs1-code">&#124;archive-date=</code> / <code class="cs1-code">&#124;archive-url=</code> timestamp mismatch',
anchor = 'archive_date_url_ts_mismatch',
category = 'CS1 errors: archive-url',
hidden = false
},
err_archive_missing_date = {
err_archive_missing_date = {
message = '<code class="cs1-code">&#124;archive-url=</code> requires <code class="cs1-code">&#124;archive-date=</code>',
message = '<code class="cs1-code">&#124;archive-url=</code> requires <code class="cs1-code">&#124;archive-date=</code>',
Line 1,207: Line 1,429:
anchor = 'bad_lccn',
anchor = 'bad_lccn',
category = 'CS1 errors: LCCN',
category = 'CS1 errors: LCCN',
hidden = false
},
err_bad_medrxiv = {
message = 'Check <code class="cs1-code">&#124;medrxiv=</code> value',
anchor = 'bad_medrxiv',
category = 'CS1 errors: medRxiv',
hidden = false
hidden = false
},
},
Line 1,410: Line 1,638:
message = '<code class="cs1-code">&#124;$1=</code> missing <code class="cs1-code">&#124;$2=</code>', -- $1 is first alias, $2 is matching last alias
message = '<code class="cs1-code">&#124;$1=</code> missing <code class="cs1-code">&#124;$2=</code>', -- $1 is first alias, $2 is matching last alias
anchor = 'first_missing_last',
anchor = 'first_missing_last',
category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator
category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator
hidden = false
hidden = false
},
},
Line 1,441: Line 1,669:
anchor = 'invisible_char',
anchor = 'invisible_char',
category = 'CS1 errors: invisible characters',
category = 'CS1 errors: invisible characters',
hidden = false
},
err_medrxiv_missing = {
message = '<code class="cs1-code">&#124;medrxiv=</code> required',
anchor = 'medrxiv_missing',
category = 'CS1 errors: medRxiv', -- same as bad medRxiv
hidden = false
hidden = false
},
},
Line 1,459: Line 1,693:
anchor = 'missing_pipe',
anchor = 'missing_pipe',
category = 'CS1 errors: missing pipe',
category = 'CS1 errors: missing pipe',
hidden = false
},
err_missing_publisher = {
message = 'Cite $1 requires <code class="cs1-code">&#124;$2=</code>', -- $1 is cs1 template name; $2 is canonical publisher parameter name for cite $1
anchor = 'missing_publisher',
category = 'CS1 errors: missing publisher',
hidden = false
hidden = false
},
},
Line 1,483: Line 1,723:
anchor = 'parameter_ignored_suggest',
anchor = 'parameter_ignored_suggest',
category = 'CS1 errors: unsupported parameter',
category = 'CS1 errors: unsupported parameter',
hidden = false
},
err_periodical_ignored = {
message = '<code class="cs1-code">&#124;$1=</code> ignored', -- $1 is parameter name
anchor = 'periodical_ignored',
category = 'CS1 errors: periodical ignored',
hidden = false
hidden = false
},
},
Line 1,500: Line 1,746:
message = '<code class="cs1-code">&#124;ssrn=</code> required',
message = '<code class="cs1-code">&#124;ssrn=</code> required',
anchor = 'ssrn_missing',
anchor = 'ssrn_missing',
category = 'CS1 errors: SSRN', -- same as bad arxiv
category = 'CS1 errors: SSRN',
hidden = false
hidden = false
},
},
Line 1,552: Line 1,798:
anchor = 'authors',
anchor = 'authors',
category = 'CS1 maint: uses authors parameter',
category = 'CS1 maint: uses authors parameter',
hidden = true,
},
maint_bibcode = {
message = nil,
anchor = 'bibcode',
category = 'CS1 maint: bibcode',
hidden = true,
},
maint_location_no_publisher = { -- cite book, conference, encyclopedia; citation as book cite or encyclopedia cite
message = nil,
anchor = 'location_no_publisher',
category = 'CS1 maint: location missing publisher',
hidden = true,
hidden = true,
},
},
Line 1,625: Line 1,883:
category = 'CS1 maint: location',
category = 'CS1 maint: location',
hidden = true,
hidden = true,
},
},
maint_mr_format = {
maint_mr_format = {
message = nil,
message = nil,
Line 1,631: Line 1,889:
category = 'CS1 maint: MR format',
category = 'CS1 maint: MR format',
hidden = true,
hidden = true,
},
},
maint_mult_names = {
maint_mult_names = {
message = nil,
message = nil,
Line 1,655: Line 1,913:
category = 'CS1 maint: others in cite AV media (notes)',
category = 'CS1 maint: others in cite AV media (notes)',
hidden = true,
hidden = true,
},
},
maint_overridden_setting = {
message = nil,
anchor = 'overridden',
category = 'CS1 maint: overridden setting',
hidden = true,
},
maint_pmc_embargo = {
maint_pmc_embargo = {
message = nil,
message = nil,
Line 1,723: Line 1,987:
redirect: a local redirect to a local Wikipedia article name;  at en.wiki, 'ISBN (identifier)' is a redirect to 'International Standard Book Number'
redirect: a local redirect to a local Wikipedia article name;  at en.wiki, 'ISBN (identifier)' is a redirect to 'International Standard Book Number'
q: Wikidata q number for the identifier
q: Wikidata q number for the identifier
label: the label preceeding the identifier; label is linked to a Wikipedia article (in this order):
label: the label preceding the identifier; label is linked to a Wikipedia article (in this order):
redirect from id_handlers['<id>'].redirect when use_identifier_redirects is true
redirect from id_handlers['<id>'].redirect when use_identifier_redirects is true
Wikidata-supplied article name for the local wiki from id_handlers['<id>'].q
Wikidata-supplied article name for the local wiki from id_handlers['<id>'].q
Line 1,752: Line 2,016:
q = 'Q118398',
q = 'Q118398',
label = 'arXiv',
label = 'arXiv',
prefix = '//arxiv.org/abs/', -- protocol-relative tested 2013-09-04
prefix = 'https://arxiv.org/abs/',
encode = false,
encode = false,
COinS = 'info:arxiv',
COinS = 'info:arxiv',
Line 1,764: Line 2,028:
q = 'Q1753278',
q = 'Q1753278',
label = 'ASIN',
label = 'ASIN',
prefix = '//www.amazon.',
prefix = 'https://www.amazon.',
COinS = 'url',
COinS = 'url',
separator = '&nbsp;',
separator = '&nbsp;',
Line 1,787: Line 2,051:
q = 'Q19835482',
q = 'Q19835482',
label = 'bioRxiv',
label = 'bioRxiv',
prefix = '//doi.org/',
prefix = 'https://doi.org/',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
access = 'free', -- free to read
access = 'free', -- free to read
Line 1,799: Line 2,063:
q = 'Q2715061',
q = 'Q2715061',
label = 'CiteSeerX',
label = 'CiteSeerX',
prefix = '//citeseerx.ist.psu.edu/viewdoc/summary?doi=',
prefix = 'https://citeseerx.ist.psu.edu/viewdoc/summary?doi=',
COinS =  'pre', -- use prefix value
COinS =  'pre', -- use prefix value
access = 'free', -- free to read
access = 'free', -- free to read
Line 1,811: Line 2,075:
q = 'Q25670',
q = 'Q25670',
label = 'doi',
label = 'doi',
prefix = '//doi.org/',
prefix = 'https://doi.org/',
COinS = 'info:doi',
COinS = 'info:doi',
separator = ':',
separator = ':',
Line 1,823: Line 2,087:
q = 'Q46339674',
q = 'Q46339674',
label = 'eISSN',
label = 'eISSN',
prefix = '//www.worldcat.org/issn/',
prefix = 'https://www.worldcat.org/issn/',
COinS = 'rft.eissn',
COinS = 'rft.eissn',
encode = false,
encode = false,
Line 1,834: Line 2,098:
q = 'Q3126718',
q = 'Q3126718',
label = 'hdl',
label = 'hdl',
prefix = '//hdl.handle.net/',
prefix = 'https://hdl.handle.net/',
COinS = 'info:hdl',
COinS = 'info:hdl',
separator = ':',
separator = ':',
Line 1,866: Line 2,130:
q = 'Q131276',
q = 'Q131276',
label = 'ISSN',
label = 'ISSN',
prefix = '//www.worldcat.org/issn/',
prefix = 'https://www.worldcat.org/issn/',
COinS = 'rft.issn',
COinS = 'rft.issn',
encode = false,
encode = false,
Line 1,877: Line 2,141:
q = '',
q = '',
label = 'JFM',
label = 'JFM',
prefix = '//zbmath.org/?format=complete&q=an:',
prefix = 'https://zbmath.org/?format=complete&q=an:',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
Line 1,888: Line 2,152:
q = 'Q1420342',
q = 'Q1420342',
label = 'JSTOR',
label = 'JSTOR',
prefix = '//www.jstor.org/stable/', -- protocol-relative tested 2013-09-04
prefix = 'https://www.jstor.org/stable/',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = false,
encode = false,
Line 1,900: Line 2,164:
q = 'Q620946',
q = 'Q620946',
label = 'LCCN',
label = 'LCCN',
prefix = '//lccn.loc.gov/', -- protocol-relative tested 2015-12-28
prefix = 'https://lccn.loc.gov/',
COinS = 'info:lccn',
COinS = 'info:lccn',
encode = false,
separator = '&nbsp;',
},
['MEDRXIV'] = {
parameters = {'medrxiv'},
link = 'medRxiv',
redirect = 'medRxiv (identifier)',
q = 'Q58465838',
label = 'medRxiv',
prefix = 'https://www.medrxiv.org/content/',
COinS = 'pre', -- use prefix value
access = 'free', -- free to read
encode = false,
encode = false,
separator = '&nbsp;',
separator = '&nbsp;',
Line 1,911: Line 2,187:
q = 'Q211172',
q = 'Q211172',
label = 'MR',
label = 'MR',
prefix = '//www.ams.org/mathscinet-getitem?mr=', -- protocol-relative tested 2013-09-04
prefix = 'https://mathscinet.ams.org/mathscinet-getitem?mr=',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
Line 1,922: Line 2,198:
q = 'Q190593',
q = 'Q190593',
label = 'OCLC',
label = 'OCLC',
prefix = '//www.worldcat.org/oclc/',
prefix = 'https://www.worldcat.org/oclc/',
COinS = 'info:oclcnum',
COinS = 'info:oclcnum',
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 9999999999, -- 10-digits
id_limit = 10030000000,
},
},
['OL'] = {
['OL'] = {
Line 1,934: Line 2,210:
q = 'Q1201876',
q = 'Q1201876',
label = 'OL',
label = 'OL',
prefix = '//openlibrary.org/',
prefix = 'https://openlibrary.org/',
COinS = 'url',
COinS = 'url',
separator = '&nbsp;',
separator = '&nbsp;',
Line 1,946: Line 2,222:
q = 'Q2015776',
q = 'Q2015776',
label = 'OSTI',
label = 'OSTI',
prefix = '//www.osti.gov/biblio/', -- protocol-relative tested 2018-09-12
prefix = 'https://www.osti.gov/biblio/',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 23000000,
id_limit = 23010000,
custom_access = 'osti-access',
custom_access = 'osti-access',
},
},
Line 1,959: Line 2,235:
q = 'Q229883',
q = 'Q229883',
label = 'PMC',
label = 'PMC',
prefix = '//www.ncbi.nlm.nih.gov/pmc/articles/PMC',
prefix = 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC',
suffix = '',
suffix = '',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 9100000,
id_limit = 10700000,
access = 'free', -- free to read
access = 'free', -- free to read
},
},
Line 1,973: Line 2,249:
q = 'Q2082879',
q = 'Q2082879',
label = 'PMID',
label = 'PMID',
prefix = '//pubmed.ncbi.nlm.nih.gov/',
prefix = 'https://pubmed.ncbi.nlm.nih.gov/',
COinS = 'info:pmid',
COinS = 'info:pmid',
encode = false,
encode = false,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 35400000,
id_limit = 37900000,
},
},
['RFC'] = {
['RFC'] = {
Line 1,985: Line 2,261:
q = 'Q212971',
q = 'Q212971',
label = 'RFC',
label = 'RFC',
prefix = '//tools.ietf.org/html/rfc',
prefix = 'https://tools.ietf.org/html/rfc',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = false,
encode = false,
Line 2,007: Line 2,283:
q = 'Q7550801',
q = 'Q7550801',
label = 'SSRN',
label = 'SSRN',
prefix = '//ssrn.com/abstract=', -- protocol-relative tested 2013-09-04
prefix = 'https://papers.ssrn.com/sol3/papers.cfm?abstract_id=',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 4100000,
id_limit = 4600000,
custom_access = 'ssrn-access',
custom_access = 'ssrn-access',
},
},
Line 2,024: Line 2,300:
encode = false,
encode = false,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 250000000,
id_limit = 264000000,
custom_access = 's2cid-access',
custom_access = 's2cid-access',
},
},
Line 2,044: Line 2,320:
q = 'Q190269',
q = 'Q190269',
label = 'Zbl',
label = 'Zbl',
prefix = '//zbmath.org/?format=complete&q=an:',
prefix = 'https://zbmath.org/?format=complete&q=an:',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
Line 2,056: Line 2,332:


return {
return {
use_identifier_redirects = true, -- when true use redirect name for identifier label links; always true at en.wiki
use_identifier_redirects = use_identifier_redirects, -- booleans defined in the settings at the top of this module
local_lang_cat_enable = false; -- when true categorizes pages where |language=<local wiki's language>; always false at en.wiki
local_lang_cat_enable = local_lang_cat_enable,
date_name_auto_xlate_enable = false; -- when true translates English month-names to the local-wiki's language month names; always false at en.wiki
date_name_auto_xlate_enable = date_name_auto_xlate_enable,
date_digit_auto_xlate_enable = false; -- when true translates Western date digit to the local-wiki's language digits (date_names['local_digits']); always false at en.wiki
date_digit_auto_xlate_enable = date_digit_auto_xlate_enable,
global_df = get_date_format (), -- tables and variables created when this module is loaded
-- tables and variables created when this module is loaded
global_df = get_date_format (), -- this line can be replaced with "global_df = 'dmy-all'," to have all dates auto translated to dmy format.
global_cs1_config_t = global_cs1_config_t, -- global settings from {{cs1 config}}
punct_skip = build_skip_table (punct_skip, punct_meta_params),
punct_skip = build_skip_table (punct_skip, punct_meta_params),
url_skip = build_skip_table (url_skip, url_meta_params),
url_skip = build_skip_table (url_skip, url_meta_params),
Line 2,075: Line 2,353:
keywords_lists = keywords_lists,
keywords_lists = keywords_lists,
keywords_xlate = keywords_xlate,
keywords_xlate = keywords_xlate,
stripmarkers=stripmarkers,
stripmarkers = stripmarkers,
invisible_chars = invisible_chars,
invisible_chars = invisible_chars,
invisible_defs = invisible_defs,
invisible_defs = invisible_defs,
indic_script = indic_script,
indic_script = indic_script,
emoji = emoji,
emoji_t = emoji_t,
local_lang_cat_enable = local_lang_cat_enable,
maint_cats = maint_cats,
maint_cats = maint_cats,
messages = messages,
messages = messages,
Line 2,086: Line 2,363:
prop_cats = prop_cats,
prop_cats = prop_cats,
script_lang_codes = script_lang_codes,
script_lang_codes = script_lang_codes,
lang_code_remap = lang_code_remap,
lang_tag_remap = lang_tag_remap,
lang_name_remap = lang_name_remap,
lang_name_remap = lang_name_remap,
this_wiki_code = this_wiki_code,
this_wiki_code = this_wiki_code,
title_types = title_types,
title_types = title_types,
uncategorized_namespaces = uncategorized_namespaces,
uncategorized_namespaces = uncategorized_namespaces_t,
uncategorized_subpages = uncategorized_subpages,
uncategorized_subpages = uncategorized_subpages,
templates_using_volume = templates_using_volume,
templates_using_volume = templates_using_volume,
Line 2,096: Line 2,373:
templates_not_using_page = templates_not_using_page,
templates_not_using_page = templates_not_using_page,
vol_iss_pg_patterns = vol_iss_pg_patterns,
vol_iss_pg_patterns = vol_iss_pg_patterns,
single_letter_2nd_lvl_domains_t = single_letter_2nd_lvl_domains_t,
inter_wiki_map = inter_wiki_map,
inter_wiki_map = inter_wiki_map,
Line 2,101: Line 2,379:
mw_languages_by_name_t = mw_languages_by_name_t,
mw_languages_by_name_t = mw_languages_by_name_t,
citation_class_map_t = citation_class_map_t,
citation_class_map_t = citation_class_map_t,
citation_issue_t = citation_issue_t,
citation_no_volume_t = citation_no_volume_t,
}
}