-1096 Removals
+2544 Additions

Created by:srdjanm

local z = {local z = {
error_categories = {}; -- for categorizing citations that contain errors error_categories = {}; -- for categorizing citations that contain errors
error_ids = {}; error_ids = {};
message_tail = {}; message_tail = {};
maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work
properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance
}}
-- Whether variable is set or not--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]
local dates, year_date_check -- functions in Module:Citation/CS1/Date_validation
local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist
--[[--------------------------< I S _ S E T >------------------------------------------------------------------
Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
This function is global because it is called from both this module and from Date validation
]]
function is_set( var )function is_set( var )
return not (var == nil or var == ''); return not (var == nil or var == '');
endend
-- First set variable or nil if none--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
function first_set(...)
local list = {...};Locates and returns the first set value in a table of values where the order established in the table,
for _, var in pairs(list) doleft-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set.
if is_set( var ) then
return var;This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs
version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate
the for-loop before it reached the actual end of the list.
]]
local function first_set (list, count)
local i = 1;
while i <= count do -- loop through all items in list
if is_set( list[i] ) then
return list[i]; -- return the first set list member
end end
i = i + 1; -- point to next
end end
endend
-- Whether needle is in haystack--[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------
function inArray( needle, haystack )
Whether needle is in haystack
]]
local function in_array( needle, haystack )
if needle == nil then if needle == nil then
return false; return false;
end end
for n,v in ipairs( haystack ) do for n,v in ipairs( haystack ) do
if v == needle then if v == needle then
return n; return n;
end end
end end
return false; return false;
endend
--[[--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
Categorize and emit an error message when the citation contains one or more deprecated parameters. Because deprecated parameters (currently |month=,
|coauthor=, and |coauthors=) aren't related to each other and because these parameters may be concatenated into the variables used by |date= and |author#= (and aliases)Populates numbered arguments in a message string using an argument table.
details of which parameter caused the error message are not provided. Only one error message is emitted regardless of the number of deprecated parameters in the citation.
]]
local function substitute( msg, args )
return args and mw.message.newRawMessage( msg, args ):plain() or msg;
end
--[[--------------------------< E R R O R _ C O M M E N T >----------------------------------------------------
Wraps error messages with css markup according to the state of hidden.
]]
local function error_comment( content, hidden )
return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
end
--[[--------------------------< S E T _ E R R O R >--------------------------------------------------------------
Sets an error condition and returns the appropriate error message. The actual placement of the error message in the output is
the responsibility of the calling function.
]]
local function set_error( error_id, arguments, raw, prefix, suffix )
local error_state = cfg.error_conditions[ error_id ];
prefix = prefix or "";
suffix = suffix or "";
if error_state == nil then
error( cfg.messages['undefined_error'] );
elseif is_set( error_state.category ) then
table.insert( z.error_categories, error_state.category );
end
local message = substitute( error_state.message, arguments );
message = message .. " ([[" .. cfg.messages['help page link'] ..
"#" .. error_state.anchor .. "|" ..
cfg.messages['help page label'] .. "]])";
z.error_ids[ error_id ] = true;
if in_array( error_id, { 'bare_url_missing_title', 'trans_missing_title' } )
and z.error_ids['citation_missing_title'] then
return '', false;
end
message = table.concat({ prefix, message, suffix });
if raw == true then
return message, error_state.hidden;
end
return error_comment( message, error_state.hidden );
end
--[[--------------------------< A D D _ M A I N T _ C A T >------------------------------------------------------
Adds a category to z.maintenance_cats using names from the configuration file with additional text if any.
To prevent duplication, the added_maint_cats table lists the categories by key that have been added to z.maintenance_cats.
]]
local added_maint_cats = {} -- list of maintenance categories that have been added to z.maintenance_cats
local function add_maint_cat (key, arguments)
if not added_maint_cats [key] then
added_maint_cats [key] = true; -- note that we've added this category
table.insert( z.maintenance_cats, substitute (cfg.maint_cats [key], arguments)); -- make name then add to table
end
end
--[[--------------------------< A D D _ P R O P _ C A T >--------------------------------------------------------
Adds a category to z.properties_cats using names from the configuration file with additional text if any.
]]
local added_prop_cats = {} -- list of property categories that have been added to z.properties_cats
local function add_prop_cat (key, arguments)
if not added_prop_cats [key] then
added_prop_cats [key] = true; -- note that we've added this category
table.insert( z.properties_cats, substitute (cfg.prop_cats [key], arguments)); -- make name then add to table
end
end
--[[--------------------------< A D D _ V A N C _ E R R O R >----------------------------------------------------
Adds a single Vancouver system error message to the template's output regardless of how many error actually exist.
To prevent duplication, added_vanc_errs is nil until an error message is emitted.
]]
local added_vanc_errs; -- flag so we only emit one Vancouver error / category
local function add_vanc_error ()
if not added_vanc_errs then
added_vanc_errs = true; -- note that we've added this category
table.insert( z.message_tail, { set_error( 'vancouver', {}, true ) } );
end
end
--[[--------------------------< I S _ S C H E M E >------------------------------------------------------------
does this thing that purports to be a uri scheme seem to be a valid scheme? The scheme is checked to see if it
is in agreement with http://tools.ietf.org/html/std66#section-3.1 which says:
Scheme names consist of a sequence of characters beginning with a
letter and followed by any combination of letters, digits, plus
("+"), period ("."), or hyphen ("-").
returns true if it does, else false
]]
local function is_scheme (scheme)
return scheme and scheme:match ('^%a[%a%d%+%.%-]*:'); -- true if scheme is set and matches the pattern
end
--[=[-------------------------< I S _ D O M A I N _ N A M E >--------------------------------------------------
Does this thing that purports to be a domain name seem to be a valid domain name?
Syntax defined here: http://tools.ietf.org/html/rfc1034#section-3.5
BNF defined here: https://tools.ietf.org/html/rfc4234
Single character names are generally reserved; see https://tools.ietf.org/html/draft-ietf-dnsind-iana-dns-01#page-15;
see also [[Single-letter second-level domain]]
list of tlds: https://www.iana.org/domains/root/db
rfc952 (modified by rfc 1123) requires the first and last character of a hostname to be a letter or a digit. Between
the first and last characters the name may use letters, digits, and the hyphen.
Also allowed are IPv4 addresses. IPv6 not supported
domain is expected to be stripped of any path so that the last character in the last character of the tld. tld
is two or more alpha characters. Any preceding '//' (from splitting a url with a scheme) will be stripped
here. Perhaps not necessary but retained incase it is necessary for IPv4 dot decimal.
There are several tests:
the first character of the whole domain name including subdomains must be a letter or a digit
single-letter/digit second-level domains in the .org TLD
q, x, and z SL domains in the .com TLD
i and q SL domains in the .net TLD
single-letter SL domains in the ccTLDs (where the ccTLD is two letters)
two-character SL domains in gTLDs (where the gTLD is two or more letters)
three-plus-character SL domains in gTLDs (where the gTLD is two or more letters)
IPv4 dot-decimal address format; TLD not allowed
returns true if domain appears to be a proper name and tld or IPv4 address, else false
]=]
local function is_domain_name (domain)
if not domain then
return false; -- if not set, abandon
end
domain = domain:gsub ('^//', ''); -- strip '//' from domain name if present; done here so we only have to do it once
if not domain:match ('^[%a%d]') then -- first character must be letter or digit
return false;
end
if domain:match ('%f[%a%d][%a%d]%.org$') then -- one character .org hostname
return true;
elseif domain:match ('%f[%a][qxz]%.com$') then -- assigned one character .com hostname (x.com times out 2015-12-10)
return true;
elseif domain:match ('%f[%a][iq]%.net$') then -- assigned one character .net hostname (q.net registered but not active 2015-12-10)
return true;
elseif domain:match ('%f[%a%d][%a%d]%.%a%a$') then -- one character hostname and cctld (2 chars)
return true;
elseif domain:match ('%f[%a%d][%a%d][%a%d]%.%a%a+$') then -- two character hostname and tld
return true;
elseif domain:match ('%f[%a%d][%a%d][%a%d%-]+[%a%d]%.%a%a+$') then -- three or more character hostname.hostname or hostname.tld
return true;
elseif domain:match ('^%d%d?%d?%.%d%d?%d?%.%d%d?%d?%.%d%d?%d?') then -- IPv4 address
return true;
else
return false;
end
end
--[[--------------------------< I S _ U R L >------------------------------------------------------------------
returns true if the scheme and domain parts of a url appear to be a valid url; else false.
This function is the last step in the validation process. This function is separate because there are cases that
are not covered by split_url(), for example is_parameter_ext_wikilink() which is looking for bracketted external
wikilinks.
]]]]
function deprecated_parameter(name)
if true ~= Page_in_deprecated_cat then -- if we haven't been here before then set a local function is_url (scheme, domain)
Page_in_deprecated_cat=true; -- sticky flag so that if there are more than one deprecated parameter the category is added only once if is_set (scheme) then -- if scheme is set check it and domain
table.insert( z.message_tail, { seterror( 'deprecated_params', {name}, true ) } ); -- add error message return is_scheme (scheme) and is_domain_name (domain);
else
return is_domain_name (domain); -- scheme not set when url is protocol relative
end
end
--[[--------------------------< S P L I T _ U R L >------------------------------------------------------------
Split a url into a scheme, authority indicator, and domain.
If protocol relative url, return nil scheme and domain else return nil for both scheme and domain.
When not protocol relative, get scheme, authority indicator, and domain. If there is an authority indicator (one
or more '/' characters following the scheme's colon), make sure that there are only 2.
]]
local function split_url (url_str)
local scheme, authority, domain;
url_str = url_str:gsub ('(%a)/.*', '%1'); -- strip path information (the capture prevents false replacement of '//')
if url_str:match ('^//%S*') then -- if there is what appears to be a protocol relative url
domain = url_str:match ('^//(%S*)')
elseif url_str:match ('%S-:/*%S+') then -- if there is what appears to be a scheme, optional authority indicator, and domain name
scheme, authority, domain = url_str:match ('(%S-:)(/*)(%S+)'); -- extract the scheme, authority indicator, and domain portions
authority = authority:gsub ('//', '', 1); -- replace place 1 pair of '/' with nothing;
if is_set(authority) then -- if anything left (1 or 3+ '/' where authority should be) then
domain = nil; -- set to nil which will cause an error message
end
end end
return scheme, domain;
endend
-- Populates numbered arguments in a message string using an argument table.
function substitute( msg, args )--[[--------------------------< L I N K _ P A R A M _ O K >---------------------------------------------------
return args and mw.message.newRawMessage( msg, args ):plain() or msg;
checks the content of |title-link=, |series-link=, |author-link= etc for properly formatted content: no wikilinks, no urls
Link parameters are to hold the title of a wikipedia article so none of the WP:TITLESPECIALCHARACTERS are allowed:
# < > [ ] | { } _
except the underscore which is used as a space in wiki urls and # which is used for section links
returns false when the value contains any of these characters.
When there are no illegal characters, this function returns TRUE if value DOES NOT appear to be a valid url (the
|<param>-link= parameter is ok); else false when value appears to be a valid url (the |<param>-link= parameter is NOT ok).
]]
local function link_param_ok (value)
local scheme, domain;
if value:find ('[<>%[%]|{}]') then -- if any prohibited characters
return false;
end
scheme, domain = split_url (value); -- get scheme or nil and domain or nil from url;
return not is_url (scheme, domain); -- return true if value DOES NOT appear to be a valid url
end
--[[--------------------------< C H E C K _ U R L >------------------------------------------------------------
Determines whether a URL string appears to be valid.
First we test for space characters. If any are found, return false. Then split the url into scheme and domain
portions, or for protocol relative (//example.com) urls, just the domain. Use is_url() to validate the two
portions of the url. If both are valid, or for protocol relative if domain is valid, return true, else false.
]]
local function check_url( url_str )
if nil == url_str:match ("^%S+$") then -- if there are any spaces in |url=value it can't be a proper url
return false;
end
local scheme, domain;
scheme, domain = split_url (url_str); -- get scheme or nil and domain or nil from url;
return is_url (scheme, domain); -- return true if value appears to be a valid url
end
--[=[-------------------------< I S _ P A R A M E T E R _ E X T _ W I K I L I N K >----------------------------
Return true if a parameter value has a string that begins and ends with square brackets [ and ] and the first
non-space characters following the opening bracket appear to be a url. The test will also find external wikilinks
that use protocol relative urls. Also finds bare urls.
The frontier pattern prevents a match on interwiki links which are similar to scheme:path urls. The tests that
find bracketed urls are required because the parameters that call this test (currently |title=, |chapter=, |work=,
and |publisher=) may have wikilinks and there are articles or redirects like '//Hus' so, while uncommon, |title=[[//Hus]]
is possible as might be [[en://Hus]].
]=]
local function is_parameter_ext_wikilink (value)
local scheme, domain;
value = value:gsub ('([^%s/])/[%a%d].*', '%1'); -- strip path information (the capture prevents false replacement of '//')
if value:match ('%f[%[]%[%a%S*:%S+.*%]') then -- if ext wikilink with scheme and domain: [xxxx://yyyyy.zzz]
scheme, domain = value:match ('%f[%[]%[(%a%S*:)(%S+).*%]')
elseif value:match ('%f[%[]%[//%S*%.%S+.*%]') then -- if protocol relative ext wikilink: [//yyyyy.zzz]
domain = value:match ('%f[%[]%[//(%S*%.%S+).*%]');
elseif value:match ('%a%S*:%S+') then -- if bare url with scheme; may have leading or trailing plain text
scheme, domain = value:match ('(%a%S*:)(%S+)');
elseif value:match ('//%S*%.%S+') then -- if protocol relative bare url: //yyyyy.zzz; may have leading or trailing plain text
domain = value:match ('//(%S*%.%S+)'); -- what is left should be the domain
else
return false; -- didn't find anything that is obviously a url
end
return is_url (scheme, domain); -- return true if value appears to be a valid url
end
--[[-------------------------< C H E C K _ F O R _ U R L >-----------------------------------------------------
loop through a list of parameters and their values. Look at the value and if it has an external link, emit an error message.
]]
local function check_for_url (parameter_list)
local error_message = '';
for k, v in pairs (parameter_list) do -- for each parameter in the list
if is_parameter_ext_wikilink (v) then -- look at the value; if there is a url add an error message
if is_set(error_message) then -- once we've added the first portion of the error message ...
error_message=error_message .. ", "; -- ... add a comma space separator
end
error_message=error_message .. "&#124;" .. k .. "="; -- add the failed parameter
end
end
if is_set (error_message) then -- done looping, if there is an error message, display it
table.insert( z.message_tail, { set_error( 'param_has_ext_link', {error_message}, true ) } );
end
end
--[[--------------------------< S A F E _ F O R _ I T A L I C S >----------------------------------------------
Protects a string that will be wrapped in wiki italic markup '' ... ''
Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that
they will be inverted (i.e. unitalicized) in the resulting references. In addition, <i> and '' tend to interact
poorly under Mediawiki's HTML tidy.
]]
local function safe_for_italics( str )
if not is_set(str) then
return str;
else
if str:sub(1,1) == "'" then str = "<span />" .. str; end
if str:sub(-1,-1) == "'" then str = str .. "<span />"; end
-- Remove newlines as they break italics.
return str:gsub( '\n', ' ' );
end
end
--[[--------------------------< S A F E _ F O R _ U R L >------------------------------------------------------
Escape sequences for content that will be used for URL descriptions
]]
local function safe_for_url( str )
if str:match( "%[%[.-%]%]" ) ~= nil then
table.insert( z.message_tail, { set_error( 'wikilink_in_url', {}, true ) } );
end
return str:gsub( '[%[%]\n]', {
['['] = '&#91;',
[']'] = '&#93;',
['\n'] = ' ' } );
end
--[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------
Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one
argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason
this function is similar to but separate from wrap_msg().
]]
local function wrap_style (key, str)
if not is_set( str ) then
return "";
elseif in_array( key, { 'italic-title', 'trans-italic-title' } ) then
str = safe_for_italics( str );
end
return substitute( cfg.presentation[key], {str} );
end
--[[--------------------------< E X T E R N A L _ L I N K >----------------------------------------------------
Format an external link with error checking
]]
local function external_link( URL, label, source )
local error_str = "";
if not is_set( label ) then
label = URL;
if is_set( source ) then
error_str = set_error( 'bare_url_missing_title', { wrap_style ('parameter', source) }, false, " " );
else
error( cfg.messages["bare_url_no_origin"] );
end
end
if not check_url( URL ) then
error_str = set_error( 'bad_url', {wrap_style ('parameter', source)}, false, " " ) .. error_str;
end
return table.concat({ "[", URL, " ", safe_for_url( label ), "]", error_str });
end
--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
Formats a wiki style external link
]]
local function external_link_id(options)
local url_string = options.id;
if options.encode == true or options.encode == nil then
url_string = mw.uri.encode( url_string );
end
return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
options.link, options.label, options.separator or "&nbsp;",
options.prefix, url_string, options.suffix or "",
mw.text.nowiki(options.id)
);
end
--[[--------------------------< D E P R E C A T E D _ P A R A M E T E R >--------------------------------------
Categorize and emit an error message when the citation contains one or more deprecated parameters. The function includes the
offending parameter name to the error message. Only one error message is emitted regardless of the number of deprecated
parameters in the citation.
]]
local page_in_deprecated_cat; -- sticky flag so that the category is added only once
local function deprecated_parameter(name)
if not page_in_deprecated_cat then
page_in_deprecated_cat = true; -- note that we've added this category
table.insert( z.message_tail, { set_error( 'deprecated_params', {name}, true ) } ); -- add error message
end
endend
--[[--------------------------< K E R N _ Q U O T E S >----------------------------------------------------------[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------
Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
This function will positive kern either single or double quotes:This function will positive kern either single or double quotes:
"'Unkerned title with leading and trailing single quote marks'" "'Unkerned title with leading and trailing single quote marks'"
" 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example) " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
Double single quotes (italic or bold wikimarkup) are not kerned.Double single quotes (italic or bold wikimarkup) are not kerned.
Call this function for chapter titles, for website titles, etc; not for book titles.Call this function for chapter titles, for website titles, etc; not for book titles.
]]]]
function kern_quotes (str)local function kern_quotes (str)
local cap=''; local cap='';
local cap2=''; local cap2='';
cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes
if is_set (cap) then if is_set (cap) then
str = substitute (cfg.presentation['kern-left'], {cap, cap2}); str = substitute (cfg.presentation['kern-left'], {cap, cap2});
end end
cap, cap2 = str:match ("^(.+[^\'])([\"\'])$") cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")
if is_set (cap) then if is_set (cap) then
str = substitute (cfg.presentation['kern-right'], {cap, cap2}); str = substitute (cfg.presentation['kern-right'], {cap, cap2});
end end
return str; return str;
endend
--[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >------------------------------------------[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >----------------------------------------
|script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should|script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should
not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrappednot be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped
in italic markup.in italic markup.
Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.
|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:
|script-title=ja:*** *** (where * represents a Japanese character) |script-title=ja:*** *** (where * represents a Japanese character)
Spaces between the two-character code and the colon and the colon and the first script character are allowed:Spaces between the two-character code and the colon and the colon and the first script character are allowed:
|script-title=ja : *** *** |script-title=ja : *** ***
|script-title=ja: *** *** |script-title=ja: *** ***
|script-title=ja :*** *** |script-title=ja :*** ***
Spaces preceding the prefix are allowed: |script-title = ja:*** ***Spaces preceding the prefix are allowed: |script-title = ja:*** ***
The prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers canThe prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can
know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attributeknow the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute
is not added. At this time there is no error message for this condition.is not added. At this time there is no error message for this condition.
At this writing, only |script-title= is supported. It is anticipated that additional parameters will be created to use this function.Supports |script-title= and |script-chapter=
TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;
]]]]
function format_script_value (script_value)local function format_script_value (script_value)
local lang=''; -- initialize to empty string local lang=''; -- initialize to empty string
local name; local name;
if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix
lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
if not is_set (lang) then if not is_set (lang) then
return ''; -- script_value was just the prefix so return empty string return ''; -- script_value was just the prefix so return empty string
end end
-- if we get this far we have prefix and script -- if we get this far we have prefix and script
name = mw.language.fetchLanguageName( lang, "bs" ); -- get language name so that we can use it to categorize name = mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
if is_set (name) then -- is prefix a proper ISO 639-1 language code? if is_set (name) then -- is prefix a proper ISO 639-1 language code?
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
-- is prefix one of these language codes? -- is prefix one of these language codes?
if inArray (lang, {'ar', 'bs', 'dv', 'el', 'fa', 'hy', 'ja', 'ko', 'ku', 'he', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then if in_array (lang, {'ar', 'bg', 'bs', 'dv', 'el', 'fa', 'he', 'hy', 'ja', 'ka', 'ko', 'ku', 'mk', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then
table.insert( z.properties_cats, 'CS1 upotrebe (' .. name .. ')'); -- categorize in language-specific categories add_prop_cat ('script_with_name', {name, lang})
else else
table.insert( z.properties_cats, 'CS1 upotrebe na stranom jeziku'); -- use this category as a catchall until language-specific category is available add_prop_cat ('script')
end end
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
else else
lang = ''; -- invalid so set lang to empty string lang = ''; -- invalid so set lang to empty string
end end
end end
script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl
return script_value; return script_value;
endend
--[[--------------------------< S C R I P T _ C O N C A T E N A T E >--------------------------------------------[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------
Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been
wrapped in <bdi> tags.wrapped in <bdi> tags.
]]]]
function script_concatenate (title, script)local function script_concatenate (title, script)
if is_set (script) then if is_set (script) then
script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error
if is_set (script) then if is_set (script) then
title = title .. ' ' .. script; -- concatenate title and script title title = title .. ' ' .. script; -- concatenate title and script title
end end
end end
return title; return title;
endend
--[[--------------------------< W R A P _ S T Y L E >------------------------------------------------------------[[--------------------------< W R A P _ M S G >--------------------------------------------------------------
Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking oneApplies additional message text to various parameter values. Supplied string is wrapped using a message_list
argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reasonconfiguration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
this function is similar to but separate from wrap_msg().from citation_config.messages - the reason this function is similar to but separate from wrap_style().
]]]]
function wrap_style (key, str)local function wrap_msg (key, str, lower)
if not is_set( str ) then if not is_set( str ) then
return ""; return "";
elseif inArray( key, { 'italic-title', 'trans-italic-title' } ) then end
str = safeforitalics( str ); if true == lower then
local msg;
msg = cfg.messages[key]:lower(); -- set the message to lower case before
return substitute( msg, str ); -- including template text
else
return substitute( cfg.messages[key], str );
end
end
--[[-------------------------< I S _ A L I A S _ U S E D >-----------------------------------------------------
This function is used by select_one() to determine if one of a list of alias parameters is in the argument list
provided by the template.
Input:
args – pointer to the arguments table from calling template
alias – one of the list of possible aliases in the aliases lists from Module:Citation/CS1/Configuration
index – for enumerated parameters, identifies which one
enumerated – true/false flag used choose how enumerated aliases are examined
value – value associated with an alias that has previously been selected; nil if not yet selected
selected – the alias that has previously been selected; nil if not yet selected
error_list – list of aliases that are duplicates of the alias already selected
Returns:
value – value associated with alias we selected or that was previously selected or nil if an alias not yet selected
selected – the alias we selected or the alias that was previously selected or nil if an alias not yet selected
]]
local function is_alias_used (args, alias, index, enumerated, value, selected, error_list)
if enumerated then -- is this a test for an enumerated parameters?
alias = alias:gsub ('#', index); -- replace '#' with the value in index
else
alias = alias:gsub ('#', ''); -- remove '#' if it exists
end end
return substitute( cfg.presentation[key], {str} ); if is_set(args[alias]) then -- alias is in the template's argument list
if value ~= nil and selected ~= alias then -- if we have already selected one of the aliases
local skip;
for _, v in ipairs(error_list) do -- spin through the error list to see if we've added this alias
if v == alias then
skip = true;
break; -- has been added so stop looking
end
end
if not skip then -- has not been added so
table.insert( error_list, alias ); -- add error alias to the error list
end
else
value = args[alias]; -- not yet selected an alias, so select this one
selected = alias;
end
end
return value, selected; -- return newly selected alias, or previously selected alias
endend
--[[--------------------------< W R A P _ M S G >----------------------------------------------------------------[[--------------------------< S E L E C T _ O N E >----------------------------------------------------------
Chooses one matching parameter from a list of parameters to consider. The list of parameters to consider is just
names. For parameters that may be enumerated, the position of the numerator in the parameter name is identified
by the '#' so |author-last1= and |author1-last= are represented as 'author-last#' and 'author#-last'.
Because enumerated parameter |<param>1= is an alias of |<param>= we must test for both possibilities.
Applies additional message text to various parameter values. Supplied string is wrapped using a message_listGenerates an error if more than one match is present.
configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
from citation_config.messages - the reason this function is similar to but separate from wrap_style().
]]]]
function wrap_msg (key, str, lower)local function select_one( args, aliases_list, error_condition, index )
if not is_set( str ) then local value = nil; -- the value assigned to the selected parameter
return ""; local selected = ''; -- the name of the parameter we have chosen
local error_list = {};
if index ~= nil then index = tostring(index); end
for _, alias in ipairs( aliases_list ) do -- for each alias in the aliases list
if alias:match ('#') then -- if this alias can be enumerated
if '1' == index then -- when index is 1 test for enumerated and non-enumerated aliases
value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); -- first test for non-enumerated alias
end
value, selected = is_alias_used (args, alias, index, true, value, selected, error_list); -- test for enumerated alias
else
value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); --test for non-enumerated alias
end
end
if #error_list > 0 and 'none' ~= error_condition then -- for cases where this code is used outside of extract_names()
local error_str = "";
for _, k in ipairs( error_list ) do
if error_str ~= "" then error_str = error_str .. cfg.messages['parameter-separator'] end
error_str = error_str .. wrap_style ('parameter', k);
end
if #error_list > 1 then
error_str = error_str .. cfg.messages['parameter-final-separator'];
else
error_str = error_str .. cfg.messages['parameter-pair-separator'];
end
error_str = error_str .. wrap_style ('parameter', selected);
table.insert( z.message_tail, { set_error( error_condition, {error_str}, true ) } );
end end
if true == lower then
local msg; return value, selected;
msg = cfg.messages[key]:lower(); -- set the message to lower case before
str = substitute( msg, {str} ); -- including template text
return str;
else
return substitute( cfg.messages[key], {str} );
end
endend
--[[--------------------------< F O R M A T _ C H A P T E R _ T I T L E >----------------------------------------[[--------------------------< F O R M A T _ C H A P T E R _ T I T L E >--------------------------------------
Format the three chapter parameters: |chapter=, |trans-chapter=, and |chapter-url= into a single Chapter meta-Format the four chapter parameters: |script-chapter=, |chapter=, |trans-chapter=, and |chapter-url= into a single Chapter meta-
parameter (chapter_url_source used for error messages).parameter (chapter_url_source used for error messages).
]]]]
function format_chapter_title (chapter, transchapter, chapterurl, chapter_url_source)local function format_chapter_title (scriptchapter, chapter, transchapter, chapterurl, chapter_url_source, no_quotes)
local chapter_error = ''; local chapter_error = '';
if not is_set (chapter) then if not is_set (chapter) then
chapter = ''; -- just to be safe for concatenation chapter = ''; -- to be safe for concatenation
if is_set (transchapter) then else
chapter = wrap_style ('trans-quoted-title', transchapter); if false == no_quotes then
chapter_error = " " .. seterror ('trans_missing_chapter'); chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks
chapter = wrap_style ('quoted-title', chapter);
end end
if is_set (chapterurl) then end
chapter = externallink (chapterurl, chapter, chapter_url_source); -- adds bare_url_missing_title error if appropriate
chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
if is_set (transchapter) then
transchapter = wrap_style ('trans-quoted-title', transchapter);
if is_set (chapter) then
chapter = chapter .. ' ' .. transchapter;
else -- here when transchapter without chapter or script-chapter
chapter = transchapter; --
chapter_error = ' ' .. set_error ('trans_missing_title', {'chapter'});
end end
return chapter .. chapter_error; end
else -- here when chapter is set
chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks if is_set (chapterurl) then
chapter = wrap_style ('quoted-title', chapter); chapter = external_link (chapterurl, chapter, chapter_url_source); -- adds bare_url_missing_title error if appropriate
if is_set (transchapter) then end
transchapter = wrap_style ('trans-quoted-title', transchapter);
chapter = chapter .. ' ' .. transchapter; return chapter .. chapter_error;
end
--[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >----------------------------------------
This function searches a parameter's value for nonprintable or invisible characters. The search stops at the
first match.
This function will detect the visible replacement character when it is part of the wikisource.
Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers (gallery, math, pre, ref)
and identifies them with a slightly different error message. See also coins_cleanup().
Detects but ignores the character pattern that results from the transclusion of {{'}} templates.
Output of this function is an error message that identifies the character or the Unicode group, or the stripmarker
that was detected along with its position (or, for multi-byte characters, the position of its first byte) in the
parameter value.
]]
local function has_invisible_chars (param, v)
local position = ''; -- position of invisible char or starting position of stripmarker
local dummy; -- end of matching string; not used but required to hold end position when a capture is returned
local capture; -- used by stripmarker detection to hold name of the stripmarker
local i=1;
local stripmarker, apostrophe;
while cfg.invisible_chars[i] do
local char=cfg.invisible_chars[i][1] -- the character or group name
local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it
position, dummy, capture = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern
if position then
-- if 'nowiki' == capture or 'math' == capture or ('ref' == capture and 'quote' == param) then -- nowiki, math, or quote param and ref stripmarker (not an error condition)
if 'nowiki' == capture or 'math' == capture then -- nowiki, math stripmarker (not an error condition)
stripmarker = true; -- set a flag
elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker
position = nil; -- unset
elseif 'apostrophe' == char then -- apostrophe template uses &zwj;, hair space and zero-width space
apostrophe = true;
elseif true == apostrophe and in_array (char, {'zero width joiner', 'zero width space', 'hair space'}) then
position = nil; -- unset
else
local err_msg;
if capture then
err_msg = capture .. ' ' .. char;
else
err_msg = char .. ' ' .. 'character';
end
table.insert( z.message_tail, { set_error( 'invisible_char', {err_msg, wrap_style ('parameter', param), position}, true ) } ); -- add error message
return; -- and done with this parameter
end
end end
if is_set (chapterurl) then i=i+1; -- bump our index
chapter = externallink (chapterurl, chapter); -- adds bare_url_missing_title error if appropriate
end
end end
return chapter;
endend
--[[
Argument wrapper. This function provides support for argument --[[--------------------------< A R G U M E N T _ W R A P P E R >----------------------------------------------
mapping defined in the configuration file so that multiple names
can be transparently aliased to single internal variable.Argument wrapper. This function provides support for argument mapping defined in the configuration file so that
multiple names can be transparently aliased to single internal variable.
]]]]
function argument_wrapper( args )
local function argument_wrapper( args )
local origin = {}; local origin = {};
return setmetatable({ return setmetatable({
ORIGIN = function( self, k ) ORIGIN = function( self, k )
local dummy = self[k]; --force the variable to be loaded. local dummy = self[k]; --force the variable to be loaded.
return origin[k]; return origin[k];
end end
}, },
{ {
__index = function ( tbl, k ) __index = function ( tbl, k )
if origin[k] ~= nil then if origin[k] ~= nil then
return nil; return nil;
end end
local args, list, v = args, cfg.aliases[k]; local args, list, v = args, cfg.aliases[k];
if type( list ) == 'table' then if type( list ) == 'table' then
v, origin[k] = selectone( args, list, 'redundant_parameters' ); v, origin[k] = select_one( args, list, 'redundant_parameters' );
if origin[k] == nil then if origin[k] == nil then
origin[k] = ''; -- Empty string, not nil origin[k] = ''; -- Empty string, not nil
end end
elseif list ~= nil then elseif list ~= nil then
v, origin[k] = args[list], list; v, origin[k] = args[list], list;
else else
-- maybe let through instead of raising an error? -- maybe let through instead of raising an error?
-- v, origin[k] = args[k], k; -- v, origin[k] = args[k], k;
error( cfg.messages['unknown_argument_map'] ); error( cfg.messages['unknown_argument_map'] );
end end
-- Empty strings, not nil; -- Empty strings, not nil;
if v == nil then if v == nil then
v = cfg.defaults[k] or ''; v = cfg.defaults[k] or '';
origin[k] = ''; origin[k] = '';
end end
tbl = rawset( tbl, k, v ); tbl = rawset( tbl, k, v );
return v; return v;
end, end,
}); });
endend
--[[--[[--------------------------< V A L I D A T E >--------------------------------------------------------------
Looks for a parameter's name in the whitelist.Looks for a parameter's name in the whitelist.
Parameters in the whitelist can have three values:Parameters in the whitelist can have three values:
true - active, supported parameters true - active, supported parameters
false - deprecated, supported parameters false - deprecated, supported parameters
nil - unsupported parameters nil - unsupported parameters
]]]]
function validate( name )
local function validate( name )
local name = tostring( name ); local name = tostring( name );
local state = whitelist.basic_arguments[ name ]; local state = whitelist.basic_arguments[ name ];
-- Normal arguments -- Normal arguments
if true == state then return true; end -- valid actively supported parameter if true == state then return true; end -- valid actively supported parameter
if false == state then if false == state then
deprecated_parameter (name); -- parameter is deprecated but still supported deprecated_parameter (name); -- parameter is deprecated but still supported
return true; return true;
end end
-- Arguments with numbers in them -- Arguments with numbers in them
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last# name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#
state = whitelist.numbered_arguments[ name ]; state = whitelist.numbered_arguments[ name ];
if true == state then return true; end -- valid actively supported parameter if true == state then return true; end -- valid actively supported parameter
if false == state then if false == state then
deprecated_parameter (name); -- parameter is deprecated but still supported deprecated_parameter (name); -- parameter is deprecated but still supported
return true; return true;
end end
return false; -- Not supported because not found or name is set to nil return false; -- Not supported because not found or name is set to nil
endend
--[[--------------------------< E R R O R C O M M E N T >------------------------------------------------------
Wraps error messages with css markup according to the state of hidden.
]]
function errorcomment( content, hidden )
return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
end
--[[
Sets an error condition and returns the appropriate error message. The actual placement
of the error message in the output is the responsibility of the calling function.
]]
function seterror( error_id, arguments, raw, prefix, suffix )
local error_state = cfg.error_conditions[ error_id ];
prefix = prefix or "";
suffix = suffix or "";
if error_state == nil then
error( cfg.messages['undefined_error'] );
elseif is_set( error_state.category ) then
table.insert( z.error_categories, error_state.category );
end
local message = substitute( error_state.message, arguments );
message = message .. " ([[" .. cfg.messages['help page link'] ..
"#" .. error_state.anchor .. "|" ..
cfg.messages['help page label'] .. "]])";
z.error_ids[ error_id ] = true;
if inArray( error_id, { 'bare_url_missing_title', 'trans_missing_title' } )
and z.error_ids['citation_missing_title'] then
return '', false;
end
message = table.concat({ prefix, message, suffix });
if raw == true then
return message, error_state.hidden;
end
return errorcomment( message, error_state.hidden );
end
-- Formats a wiki style external link
function externallinkid(options)
local url_string = options.id;
if options.encode == true or options.encode == nil then
url_string = mw.uri.encode( url_string );
end
return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
options.link, options.label, options.separator or "&nbsp;",
options.prefix, url_string, options.suffix or "",
mw.text.nowiki(options.id)
);
end
-- Formats a wiki style internal link-- Formats a wiki style internal link
function internallinkid(options)local function internal_link_id(options)
return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]', return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',
options.link, options.label, options.separator or "&nbsp;", options.link, options.label, options.separator or "&nbsp;",
options.prefix, options.id, options.suffix or "", options.prefix, options.id, options.suffix or "",
mw.text.nowiki(options.id) mw.text.nowiki(options.id)
); );
endend
-- Format an external link with error checking
function externallink( URL, label, source )
local error_str = "";
if not is_set( label ) then
label = URL;
if is_set( source ) then
error_str = seterror( 'bare_url_missing_title', { wrap_style ('parameter', source) }, false, " " );
else
error( cfg.messages["bare_url_no_origin"] );
end
end
if not checkurl( URL ) then
error_str = seterror( 'bad_url', {}, false, " " ) .. error_str;
end
return table.concat({ "[", URL, " ", safeforurl( label ), "]", error_str });
end
--[[--------------------------< N O W R A P _ D A T E >----------------------------------------------------------[[--------------------------< N O W R A P _ D A T E >--------------------------------------------------------
When date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. When date is DD MMMM YYYY or isWhen date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. When date is DD MMMM YYYY or is
MMMM DD, YYYY then wrap in nowrap span: <span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYYMMMM DD, YYYY then wrap in nowrap span: <span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYY
DOES NOT yet support MMMM YYYY or any of the date ranges.DOES NOT yet support MMMM YYYY or any of the date ranges.
]]]]
function nowrap_date (date)local function nowrap_date (date)
local cap=''; local cap='';
local cap2=''; local cap2='';
if date:match("^%d%d%d%d%-%d%d%-%d%d$") then if date:match("^%d%d%d%d%-%d%d%-%d%d$") then
date = substitute (cfg.presentation['nowrap1'], date); date = substitute (cfg.presentation['nowrap1'], date);
elseif date:match("%a+%s*%d%d?,%s*%d%d%d%d") or date:match ("%d%d?%s*%a+%s*%d%d%d%d") then elseif date:match("^%a+%s*%d%d?,%s+%d%d%d%d$") or date:match ("^%d%d?%s*%a+%s+%d%d%d%d$") then
cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$"); cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$");
date = substitute (cfg.presentation['nowrap2'], {cap, cap2}); date = substitute (cfg.presentation['nowrap2'], {cap, cap2});
end end
return date; return date;
endend
--[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. ISBN-13 is checked in check_isbn().
If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length and stripped of dashes,
spaces and other non-isxn characters.
]]
local function is_valid_isxn (isxn_str, len)
local temp = 0;
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
len = len+1; -- adjust to be a loop counter
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
temp = temp + 10*( len - i ); -- it represents 10 decimal
else
temp = temp + tonumber( string.char(v) )*(len-i);
end
end
return temp % 11 == 0; -- returns true if calculation result is zero
end
--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------
ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
and stripped of dashes, spaces and other non-isxn-13 characters.
]]
local function is_valid_isxn_13 (isxn_str)
local temp=0;
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
for i, v in ipairs( isxn_str ) do
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
end
return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct
end
--[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------
Determines whether an ISBN string is valid
]]
local function check_isbn( isbn_str )
if nil ~= isbn_str:match("[^%s-0-9X]") then return false; end -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
local len = isbn_str:len();
if len ~= 10 and len ~= 13 then
return false;
end
if len == 10 then
if isbn_str:match( "^%d*X?$" ) == nil then return false; end
return is_valid_isxn(isbn_str, 10);
else
local temp = 0;
if isbn_str:match( "^97[89]%d*$" ) == nil then return false; end -- isbn13 begins with 978 or 979; ismn begins with 979
return is_valid_isxn_13 (isbn_str);
end
end
--[[--------------------------< C H E C K _ I S M N >------------------------------------------------------------
Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
section 2, pages 9–12.
]]
local function ismn (id)
local handler = cfg.id_handlers['ISMN'];
local text;
local valid_ismn = true;
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
valid_ismn = false;
else
valid_ismn=is_valid_isxn_13 (id); -- validate ismn
end
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id; -- because no place to link to yet
if false == valid_ismn then
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the issn is invalid
end
return text;
end
--[[--------------------------< I S S N >----------------------------------------------------------------------
Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but has separated the two groups of four
digits with a space. When that condition occurred, the resulting link looked like this:
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length and makes sure that the checkdigit agrees
with the calculated value. Incorrect length (8 digits), characters other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn
error message. The issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
]]
local function issn(id)
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
local handler = cfg.id_handlers['ISSN'];
local text;
local valid_issn = true;
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
valid_issn=false; -- wrong length or improper character
else
valid_issn=is_valid_isxn(id, 8); -- validate issn
end
if true == valid_issn then
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
else
id = issn_copy; -- if not valid, use the show the invalid issn with error message
end
text = external_link_id({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
if false == valid_issn then
text = text .. ' ' .. set_error( 'bad_issn' ) -- add an error message if the issn is invalid
end
return text
end
--[[--------------------------< A M A Z O N >--------------------------------------------------------------------[[--------------------------< A M A Z O N >------------------------------------------------------------------
Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alphaFormats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digitcharacters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
]]]]
function amazon(id, domain)local function amazon(id, domain)
local err_cat = "" local err_cat = ""
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
err_cat = ' ' .. seterror ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
else else
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
if checkisbn( id ) then -- see if asin value is isbn10 if check_isbn( id ) then -- see if asin value is isbn10
table.insert( z.maintenance_cats, "CS1 održavanje: ASIN koristi ISBN broj"); -- add to maint category add_maint_cat ('ASIN');
elseif not is_set (err_cat) then elseif not is_set (err_cat) then
err_cat = ' ' .. seterror ('bad_asin'); -- asin is not isbn10 err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
end end
elseif not id:match("^%u[%d%u]+$") then elseif not id:match("^%u[%d%u]+$") then
err_cat = ' ' .. seterror ('bad_asin'); -- asin doesn't begin with uppercase alpha err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
end end
end end
if not is_set(domain) then if not is_set(domain) then
domain = "com"; domain = "com";
elseif inArray (domain, {'jp', 'uk'}) then -- Japan, United Kingdom elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
domain = "co." .. domain; domain = "co." .. domain;
elseif inArray (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
domain = "com." .. domain; domain = "com." .. domain;
end end
local handler = cfg.id_handlers['ASIN']; local handler = cfg.id_handlers['ASIN'];
return externallinkid({link = handler.link, return external_link_id({link=handler.link,
label=handler.label , prefix="//www.amazon."..domain.."/dp/",id=id, label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
encode=handler.encode, separator = handler.separator}) .. err_cat; id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
endend
--[[--------------------------< A R X I V >----------------------------------------------------------------------[[--------------------------< A R X I V >--------------------------------------------------------------------
See: http://arxiv.org/help/arxiv_identifierSee: http://arxiv.org/help/arxiv_identifier
format and error check arXiv identifier. There are three valid forms of the identifier:format and error check arXiv identifier. There are three valid forms of the identifier:
the first form, valid only between date codes 9108 and 0703 is:the first form, valid only between date codes 9108 and 0703 is:
arXiv:<archive>.<class>/<date code><number><version> arXiv:<archive>.<class>/<date code><number><version>
where:where:
<archive> is a string of alpha characters - may be hyphenated; no other punctuation <archive> is a string of alpha characters - may be hyphenated; no other punctuation
<class> is a string of alpha characters - may be hyphenated; no other punctuation <class> is a string of alpha characters - may be hyphenated; no other punctuation
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
first digit of YY for this form can only 9 and 0 first digit of YY for this form can only 9 and 0
<number> is a three-digit number <number> is a three-digit number
<version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented) <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
the second form, valid from April 2007 through December 2014 is:the second form, valid from April 2007 through December 2014 is:
arXiv:<date code>.<number><version> arXiv:<date code>.<number><version>
where:where:
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
<number> is a four-digit number <number> is a four-digit number
<version> is a 1 or more digit number preceded with a lowercase v; no spaces <version> is a 1 or more digit number preceded with a lowercase v; no spaces
the third form, valid from January 2015 is:the third form, valid from January 2015 is:
arXiv:<date code>.<number><version> arXiv:<date code>.<number><version>
where:where:
<date code> and <version> are as defined for 0704-1412 <date code> and <version> are as defined for 0704-1412
<number> is a five-digit number <number> is a five-digit number
]]]]
function arxiv (id)local function arxiv (id, class)
local handler = cfg.id_handlers['ARXIV']; local handler = cfg.id_handlers['ARXIV'];
local year, month, version; local year, month, version;
local err_cat = "" local err_cat = '';
local text;
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
year = tonumber(year); year = tonumber(year);
month = tonumber(month); month = tonumber(month);
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
end end
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
year = tonumber(year); year = tonumber(year);
month = tonumber(month); month = tonumber(month);
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)? ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
end end
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
year = tonumber(year); year = tonumber(year);
month = tonumber(month); month = tonumber(month);
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
end end
else else
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- arXiv id doesn't match any format err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format
end end
return externallinkid({link = handler.link, label = handler.label, text = external_link_id({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
if is_set (class) then
class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink
else
class = ''; -- empty string for concatenation
end
return text .. class;
endend
--[[--[[
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
1. Remove all blanks.1. Remove all blanks.
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
3. If there is a hyphen in the string:3. If there is a hyphen in the string:
a. Remove it. a. Remove it.
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out): b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
1. All these characters should be digits, and there should be six or less. (not done in this function) 1. All these characters should be digits, and there should be six or less. (not done in this function)
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
]]]]
function normalize_lccn (lccn)local function normalize_lccn (lccn)
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
if nil ~= string.find (lccn,'/') then if nil ~= string.find (lccn,'/') then
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
end end
local prefix local prefix
local suffix local suffix
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
if nil ~= suffix then -- if there was a hyphen if nil ~= suffix then -- if there was a hyphen
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
lccn=prefix..suffix; -- reassemble the lccn lccn=prefix..suffix; -- reassemble the lccn
end end
return lccn; return lccn;
end end
--[[--[[
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of the LCCN dictates the character type of the first 1-3 characters; theFormat LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of the LCCN dictates the character type of the first 1-3 characters; the
rightmost eight are always digits. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/rightmost eight are always digits. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
length = 8 then all digitslength = 8 then all digits
length = 9 then lccn[1] is lower case alphalength = 9 then lccn[1] is lower case alpha
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digitslength = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digitslength = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
length = 12 then lccn[1] and lccn[2] are both lower case alphalength = 12 then lccn[1] and lccn[2] are both lower case alpha
]]]]
function lccn(lccn)
local function lccn(lccn)
local handler = cfg.id_handlers['LCCN']; local handler = cfg.id_handlers['LCCN'];
local err_cat = ''; -- presume that LCCN is valid local err_cat = ''; -- presume that LCCN is valid
local id = lccn; -- local copy of the lccn local id = lccn; -- local copy of the lccn
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
local len = id:len(); -- get the length of the lccn local len = id:len(); -- get the length of the lccn
if 8 == len then if 8 == len then
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
err_cat = ' ' .. seterror( 'bad_lccn' ); -- set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
end end
elseif 9 == len then -- LCCN should be adddddddd elseif 9 == len then -- LCCN should be adddddddd
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
err_cat = ' ' .. seterror( 'bad_lccn' ); -- set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
end end
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
end end
end end
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
end end
elseif 12 == len then -- LCCN should be aadddddddddd elseif 12 == len then -- LCCN should be aadddddddddd
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
end end
else else
err_cat = ' ' .. seterror( 'bad_lccn' ); -- wrong length, set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
end end
if not is_set (err_cat) and nil ~= lccn:find ('%s') then if not is_set (err_cat) and nil ~= lccn:find ('%s') then
err_cat = ' ' .. seterror( 'bad_lccn' ); -- lccn contains a space, set an error message err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
end end
return externallinkid({link = handler.link, label = handler.label, return external_link_id({link = handler.link, label = handler.label,
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
endend
--[[--[[
Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that itFormat PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that it
contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued.contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued.
]]]]
function pmid(id)
local function pmid(id)
local test_limit = 30000000; -- update this value as PMIDs approach local test_limit = 30000000; -- update this value as PMIDs approach
local handler = cfg.id_handlers['PMID']; local handler = cfg.id_handlers['PMID'];
local err_cat = ''; -- presume that PMID is valid local err_cat = ''; -- presume that PMID is valid
if id:match("[^%d]") then -- if PMID has anything but digits if id:match("[^%d]") then -- if PMID has anything but digits
err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
else -- PMID is only digits else -- PMID is only digits
local id_num = tonumber(id); -- convert id to a number for range testing local id_num = tonumber(id); -- convert id to a number for range testing
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
end end
end end
return externallinkid({link = handler.link, label = handler.label, return external_link_id({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
endend
--[[--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date isDetermines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
in the future, returns true; otherwise, returns false because the embargo has expired or |embargo= not set in this cite.in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
|embargo= was not set in this cite.
]]]]
function is_embargoed(embargo)
if is_set(embargo) thenlocal function is_embargoed (embargo)
if is_set (embargo) then
local lang = mw.getContentLanguage(); local lang = mw.getContentLanguage();
local good1, embargo_date, good2, todays_date; local good1, embargo_date, good2, todays_date;
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
good2, todays_date = pcall( lang.formatDate, lang, 'U' ); good2, todays_date = pcall( lang.formatDate, lang, 'U' );
if good1 and good2 and tonumber( embargo_date ) >= tonumber( todays_date ) then --is embargo date is in the future? if good1 and good2 then -- if embargo date and today's date are good dates
return true; -- still embargoed if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
return embargo; -- still embargoed
else
add_maint_cat ('embargo')
return ''; -- unset because embargo has expired
end
end end
end end
return false; -- embargo expired or |embargo= not set return ''; -- |embargo= not set return empty string
endend
--[[--[[--------------------------< P M C >------------------------------------------------------------------------
Format a PMC, do simple error checking, and check for embargoed articles.Format a PMC, do simple error checking, and check for embargoed articles.
The embargo parameter takes a date for a value. If the embargo date is in the futureThe embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
the PMC identifier will not be linked to the article. If the embargo specifies a date in the past, or if it is empty or omitted, thenbe linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the
the PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is lessPMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less
than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
]]]]
function pmc(id, embargo)
local function pmc(id, embargo)
local test_limit = 5000000; -- update this value as PMCs approach local test_limit = 5000000; -- update this value as PMCs approach
local handler = cfg.id_handlers['PMC']; local handler = cfg.id_handlers['PMC'];
local err_cat = ''; -- presume that PMC is valid local err_cat = ''; -- presume that PMC is valid
local text; local text;
if id:match("[^%d]") then -- if PMC has anything but digits if id:match("[^%d]") then -- if PMC has anything but digits
err_cat = ' ' .. seterror( 'bad_pmc' ); -- set an error message err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
else -- PMC is only digits else -- PMC is only digits
local id_num = tonumber(id); -- convert id to a number for range testing local id_num = tonumber(id); -- convert id to a number for range testing
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
err_cat = ' ' .. seterror( 'bad_pmc' ); -- set an error message err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
end end
end end
if is_embargoed(embargo) then if is_set (embargo) then -- is PMC is still embargoed?
text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; --still embargoed so no external link text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; -- still embargoed so no external link
else else
text = externallinkid({link = handler.link, label = handler.label, --no embargo date, ok to link to article text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
end end
return text; return text;
endend
-- Formats a DOI and checks for DOI errors.-- Formats a DOI and checks for DOI errors.
-- DOI names contain two parts: prefix and suffix separated by a forward slash.-- DOI names contain two parts: prefix and suffix separated by a forward slash.
-- Prefix: directory indicator '10.' followed by a registrant code-- Prefix: directory indicator '10.' followed by a registrant code
-- Suffix: character string of any length chosen by the registrant-- Suffix: character string of any length chosen by the registrant
-- This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes,-- This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes,
-- or, if it ends with a period or a comma, this function will emit a bad_doi error message.-- or, if it ends with a period or a comma, this function will emit a bad_doi error message.
-- DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,-- DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
-- and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely if ever used in doi names.-- and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely if ever used in doi names.
function doi(id, inactive)local function doi(id, inactive)
local cat = "" local cat = ""
local handler = cfg.id_handlers['DOI']; local handler = cfg.id_handlers['DOI'];
local text; local text;
if is_set(inactive) then if is_set(inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id; text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;
if is_set(inactive_year) then if is_set(inactive_year) then
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
else else
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
end end
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
else else
text = externallinkid({link = handler.link, label = handler.label, text = external_link_id({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
inactive = "" inactive = ""
end end
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
cat = ' ' .. seterror( 'bad_doi' ); cat = ' ' .. set_error( 'bad_doi' );
end end
return text .. inactive .. cat return text .. inactive .. cat
endend
-- Formats an OpenLibrary link, and checks for associated errors.
function openlibrary(id)--[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
Formats an OpenLibrary link, and checks for associated errors.
]]
local function openlibrary(id)
local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
local handler = cfg.id_handlers['OL']; local handler = cfg.id_handlers['OL'];
if ( code == "A" ) then if ( code == "A" ) then
return externallinkid({link=handler.link, label=handler.label, return external_link_id({link=handler.link, label=handler.label,
prefix="http://openlibrary.org/authors/OL",id=id, separator=handler.separator, prefix=handler.prefix .. 'authors/OL',
encode = handler.encode}) id=id, separator=handler.separator, encode = handler.encode})
elseif ( code == "M" ) then elseif ( code == "M" ) then
return externallinkid({link=handler.link, label=handler.label, return external_link_id({link=handler.link, label=handler.label,
prefix="http://openlibrary.org/books/OL",id=id, separator=handler.separator, prefix=handler.prefix .. 'books/OL',
encode = handler.encode}) id=id, separator=handler.separator, encode = handler.encode})
elseif ( code == "W" ) then elseif ( code == "W" ) then
return externallinkid({link=handler.link, label=handler.label, return external_link_id({link=handler.link, label=handler.label,
prefix= "http://openlibrary.org/works/OL",id=id, separator=handler.separator, prefix=handler.prefix .. 'works/OL',
encode = handler.encode}) id=id, separator=handler.separator, encode = handler.encode})
else else
return externallinkid({link=handler.link, label=handler.label, return external_link_id({link=handler.link, label=handler.label,
prefix= "http://openlibrary.org/OL",id=id, separator=handler.separator, prefix=handler.prefix .. 'OL',
encode = handler.encode}) .. id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
' ' .. seterror( 'bad_ol' );
end end
endend
--[[
Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but has separated the two groups of four
digits with a space. When that condition occurred, the resulting link looked like this:
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length and makes sure that the checkdigit agrees
with the calculated value. Incorrect length (8 digits), characters other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn
error message. The issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
]]
function issn(id)
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
local handler = cfg.id_handlers['ISSN'];
local text;
local valid_issn = true;
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
valid_issn=false; -- wrong length or improper character
else
valid_issn=is_valid_isxn(id, 8); -- validate issn
end
if true == valid_issn then
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
else
id = issn_copy; -- if not valid, use the show the invalid issn with error message
end
text = externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
if false == valid_issn then
text = text .. ' ' .. seterror( 'bad_issn' ) -- add an error message if the issn is invalid
end
return text
end
--[[--------------------------< M E S S A G E _ I D >------------------------------------------------------------[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed inValidate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in
'<' and/or '>' angle brackets.'<' and/or '>' angle brackets.
]]]]
function message_id (id)local function message_id (id)
local handler = cfg.id_handlers['USENETID']; local handler = cfg.id_handlers['USENETID'];
text = externallinkid({link = handler.link, label = handler.label, text = external_link_id({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
text = text .. ' ' .. seterror( 'bad_message_id' ) -- add an error message if the message id is invalid text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid
end end
return text return text
endend
--[[--------------------------< S E T _ T I T L E T Y P E >------------------------------------------------------[[--------------------------< S E T _ T I T L E T Y P E >----------------------------------------------------
This function sets default title types (equivalent to the citation including |type=<default value>) for those citations that have defaults.This function sets default title types (equivalent to the citation including |type=<default value>) for those templates that have defaults.
Also handles the special case where it is desirable to omit the title type from the rendered citation (|type=none).Also handles the special case where it is desirable to omit the title type from the rendered citation (|type=none).
]]]]
function set_titletype(cite_class, title_type)
local function set_titletype (cite_class, title_type)
if is_set(title_type) then if is_set(title_type) then
if "none" == title_type then if "none" == title_type then
title_type = ""; -- if |type=none then type parameter not displayed title_type = ""; -- if |type=none then type parameter not displayed
end end
return title_type; -- if |type= has been set to any other value use that value return title_type; -- if |type= has been set to any other value use that value
end end
if "AV-media-notes" == cite_class or "DVD-notes" == cite_class then -- if this citation is cite AV media notes or cite DVD notes return cfg.title_types [cite_class] or ''; -- set template's default title type; else empty string for concatenation
return "Media notes"; -- display AV media notes / DVD media notes annotationend
elseif "mailinglist" == cite_class then -- if this citation is cite mailing list--[[--------------------------< C L E A N _ I S B N >----------------------------------------------------------
return "Mailing list"; -- display mailing list annotation
elseif "podcast" == cite_class then -- if this citation is cite podcast
return "Podcast"; -- display podcast annotation
elseif "pressrelease" == cite_class then -- if this citation is cite press releaseRemoves irrelevant text and dashes from ISBN number
return "Press release"; -- display press release annotationSimilar to that used for Special:BookSources
elseif "report" == cite_class then -- if this citation is cite report
return "Report"; -- display report annotation
elseif "techreport" == cite_class then -- if this citation is cite techreport
return "Technical report"; -- display techreport annotation
elseif "thesis" == cite_class then -- if this citation is cite thesis (degree option handled after this function returns)
return "Thesis"; -- display simple thesis annotation (without |degree= modification)
end
end
--[[
Determines whether a URL string is valid
At present the only check is whether the string appears to
be prefixed with a URI scheme. It is not determined whether
the URI scheme is valid or whether the URL is otherwise well
formed.
]]]]
function checkurl( url_str )
-- Protocol-relative or URL scheme
return url_str:sub(1,2) == "//" or url_str:match( "^[^/]*:" ) ~= nil;
end
-- Removes irrelevant text and dashes from ISBN numberlocal function clean_isbn( isbn_str )
-- Similar to that used for Special:BookSources
function cleanisbn( isbn_str )
return isbn_str:gsub( "[^-0-9X]", "" ); return isbn_str:gsub( "[^-0-9X]", "" );
endend
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >------------------------------------[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
Returns a string where all of lua's magic characters have been escaped. This is important because functions likeReturns a string where all of lua's magic characters have been escaped. This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]]]
function escape_lua_magic_chars (argument)local function escape_lua_magic_chars (argument)
argument = argument:gsub("%%", "%%%%"); -- replace % with %% argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
return argument; return argument;
endend
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >----------------------------------[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
This function strips common patterns of apostrophe markup. We presume that editors who have taken the time toThis function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
]]]]
function strip_apostrophe_markup (argument)
local function strip_apostrophe_markup (argument)
if not is_set (argument) then return argument; end if not is_set (argument) then return argument; end
while true do while true do
if argument:match ("%'%'%'%'%'") then -- bold italic (5) if argument:match ("%'%'%'%'%'") then -- bold italic (5)
argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4) elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4)
argument=argument:gsub("%'%'%'%'", ""); argument=argument:gsub("%'%'%'%'", "");
elseif argument:match ("%'%'%'") then -- bold (3) elseif argument:match ("%'%'%'") then -- bold (3)
argument=argument:gsub("%'%'%'", ""); argument=argument:gsub("%'%'%'", "");
elseif argument:match ("%'%'") then -- italic (2) elseif argument:match ("%'%'") then -- italic (2)
argument=argument:gsub("%'%'", ""); argument=argument:gsub("%'%'", "");
else else
break; break;
end end
end end
return argument; -- done return argument; -- done
endend
--[[--------------------------< M A K E _ C O I N S _ T I T L E >------------------------------------------------[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with stringsApostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with strings
of %27%27...of %27%27...
]]]]
function make_coins_title (title, script)local function make_coins_title (title, script)
if is_set (title) then if is_set (title) then
title = strip_apostrophe_markup (title); -- strip any apostrophe markup title = strip_apostrophe_markup (title); -- strip any apostrophe markup
else else
title=''; -- if not set, make sure title is an empty string title=''; -- if not set, make sure title is an empty string
end end
if is_set (script) then if is_set (script) then
script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string) script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)
script = strip_apostrophe_markup (script); -- strip any apostrophe markup script = strip_apostrophe_markup (script); -- strip any apostrophe markup
else else
script=''; -- if not set, make sure script is an empty string script=''; -- if not set, make sure script is an empty string
end end
if is_set (title) and is_set (script) then if is_set (title) and is_set (script) then
script = ' ' .. script; -- add a space before we concatenate script = ' ' .. script; -- add a space before we concatenate
end end
return title .. script; -- return the concatenation return title .. script; -- return the concatenation
endend
--[[--------------------------< G E T _ C O I N S _ P A G E S >--------------------------------------------------[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
]]]]
function get_coins_pages (pages)
local function get_coins_pages (pages)
local pattern; local pattern;
if not is_set (pages) then return pages; end -- if no page numbers then we're done if not is_set (pages) then return pages; end -- if no page numbers then we're done
while true do while true do
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
if nil == pattern then break; end -- no more urls if nil == pattern then break; end -- no more urls
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
end end
pages = pages:gsub("[%[%]]", ""); -- remove the brackets pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?