-1096 Removals
+2544 Additions

Created by:srdjanm

11
2local z = {2local z = {
3 error_categories = {}; -- for categorizing citations that contain errors3 error_categories = {}; -- for categorizing citations that contain errors
4 error_ids = {};4 error_ids = {};
5 message_tail = {};5 message_tail = {};
6 maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work6 maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work
7 properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance7 properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance
8}8}
99
10-- Whether variable is set or not10--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
11]]
12local dates, year_date_check -- functions in Module:Citation/CS1/Date_validation
13
14local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
15local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist
16
17--[[--------------------------< I S _ S E T >------------------------------------------------------------------
18
19Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
20This function is global because it is called from both this module and from Date validation
21
22]]
11function is_set( var )23function is_set( var )
12 return not (var == nil or var == '');24 return not (var == nil or var == '');
13end25end
1426
15-- First set variable or nil if none27--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
16function first_set(...)28
17 local list = {...};29Locates and returns the first set value in a table of values where the order established in the table,
18 for _, var in pairs(list) do30left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set.
19 if is_set( var ) then31
20 return var;32This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs
33version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate
34the for-loop before it reached the actual end of the list.
35
36]]
37
38local function first_set (list, count)
39 local i = 1;
40 while i <= count do -- loop through all items in list
41 if is_set( list[i] ) then
42 return list[i]; -- return the first set list member
21 end43 end
44 i = i + 1; -- point to next
22 end45 end
23end46end
2447
25-- Whether needle is in haystack48--[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------
26function inArray( needle, haystack )49
50Whether needle is in haystack
51
52]]
53
54local function in_array( needle, haystack )
27 if needle == nil then55 if needle == nil then
28 return false;56 return false;
29 end57 end
30 for n,v in ipairs( haystack ) do58 for n,v in ipairs( haystack ) do
31 if v == needle then59 if v == needle then
32 return n;60 return n;
33 end61 end
34 end62 end
35 return false;63 return false;
36end64end
3765
38--[[66--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
39Categorize and emit an error message when the citation contains one or more deprecated parameters. Because deprecated parameters (currently |month=,67
40|coauthor=, and |coauthors=) aren't related to each other and because these parameters may be concatenated into the variables used by |date= and |author#= (and aliases)68Populates numbered arguments in a message string using an argument table.
41details of which parameter caused the error message are not provided. Only one error message is emitted regardless of the number of deprecated parameters in the citation.69
70]]
71
72local function substitute( msg, args )
73 return args and mw.message.newRawMessage( msg, args ):plain() or msg;
74end
75
76--[[--------------------------< E R R O R _ C O M M E N T >----------------------------------------------------
77
78Wraps error messages with css markup according to the state of hidden.
79
80]]
81local function error_comment( content, hidden )
82 return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
83end
84
85--[[--------------------------< S E T _ E R R O R >--------------------------------------------------------------
86
87Sets an error condition and returns the appropriate error message. The actual placement of the error message in the output is
88the responsibility of the calling function.
89
90]]
91local function set_error( error_id, arguments, raw, prefix, suffix )
92 local error_state = cfg.error_conditions[ error_id ];
93
94 prefix = prefix or "";
95 suffix = suffix or "";
96
97 if error_state == nil then
98 error( cfg.messages['undefined_error'] );
99 elseif is_set( error_state.category ) then
100 table.insert( z.error_categories, error_state.category );
101 end
102
103 local message = substitute( error_state.message, arguments );
104
105 message = message .. " ([[" .. cfg.messages['help page link'] ..
106 "#" .. error_state.anchor .. "|" ..
107 cfg.messages['help page label'] .. "]])";
108
109 z.error_ids[ error_id ] = true;
110 if in_array( error_id, { 'bare_url_missing_title', 'trans_missing_title' } )
111 and z.error_ids['citation_missing_title'] then
112 return '', false;
113 end
114
115 message = table.concat({ prefix, message, suffix });
116
117 if raw == true then
118 return message, error_state.hidden;
119 end
120
121 return error_comment( message, error_state.hidden );
122end
123
124--[[--------------------------< A D D _ M A I N T _ C A T >------------------------------------------------------
125
126Adds a category to z.maintenance_cats using names from the configuration file with additional text if any.
127To prevent duplication, the added_maint_cats table lists the categories by key that have been added to z.maintenance_cats.
128
129]]
130
131local added_maint_cats = {} -- list of maintenance categories that have been added to z.maintenance_cats
132local function add_maint_cat (key, arguments)
133 if not added_maint_cats [key] then
134 added_maint_cats [key] = true; -- note that we've added this category
135 table.insert( z.maintenance_cats, substitute (cfg.maint_cats [key], arguments)); -- make name then add to table
136 end
137end
138
139--[[--------------------------< A D D _ P R O P _ C A T >--------------------------------------------------------
140
141Adds a category to z.properties_cats using names from the configuration file with additional text if any.
142
143]]
144
145local added_prop_cats = {} -- list of property categories that have been added to z.properties_cats
146local function add_prop_cat (key, arguments)
147 if not added_prop_cats [key] then
148 added_prop_cats [key] = true; -- note that we've added this category
149 table.insert( z.properties_cats, substitute (cfg.prop_cats [key], arguments)); -- make name then add to table
150 end
151end
152
153--[[--------------------------< A D D _ V A N C _ E R R O R >----------------------------------------------------
154
155Adds a single Vancouver system error message to the template's output regardless of how many error actually exist.
156To prevent duplication, added_vanc_errs is nil until an error message is emitted.
157
158]]
159
160local added_vanc_errs; -- flag so we only emit one Vancouver error / category
161local function add_vanc_error ()
162 if not added_vanc_errs then
163 added_vanc_errs = true; -- note that we've added this category
164 table.insert( z.message_tail, { set_error( 'vancouver', {}, true ) } );
165 end
166end
167
168
169--[[--------------------------< I S _ S C H E M E >------------------------------------------------------------
170
171does this thing that purports to be a uri scheme seem to be a valid scheme? The scheme is checked to see if it
172is in agreement with http://tools.ietf.org/html/std66#section-3.1 which says:
173 Scheme names consist of a sequence of characters beginning with a
174 letter and followed by any combination of letters, digits, plus
175 ("+"), period ("."), or hyphen ("-").
176
177returns true if it does, else false
178
179]]
180
181local function is_scheme (scheme)
182 return scheme and scheme:match ('^%a[%a%d%+%.%-]*:'); -- true if scheme is set and matches the pattern
183end
184
185
186--[=[-------------------------< I S _ D O M A I N _ N A M E >--------------------------------------------------
187
188Does this thing that purports to be a domain name seem to be a valid domain name?
189
190Syntax defined here: http://tools.ietf.org/html/rfc1034#section-3.5
191BNF defined here: https://tools.ietf.org/html/rfc4234
192Single character names are generally reserved; see https://tools.ietf.org/html/draft-ietf-dnsind-iana-dns-01#page-15;
193 see also [[Single-letter second-level domain]]
194list of tlds: https://www.iana.org/domains/root/db
195
196rfc952 (modified by rfc 1123) requires the first and last character of a hostname to be a letter or a digit. Between
197the first and last characters the name may use letters, digits, and the hyphen.
198
199Also allowed are IPv4 addresses. IPv6 not supported
200
201domain is expected to be stripped of any path so that the last character in the last character of the tld. tld
202is two or more alpha characters. Any preceding '//' (from splitting a url with a scheme) will be stripped
203here. Perhaps not necessary but retained incase it is necessary for IPv4 dot decimal.
204
205There are several tests:
206 the first character of the whole domain name including subdomains must be a letter or a digit
207 single-letter/digit second-level domains in the .org TLD
208 q, x, and z SL domains in the .com TLD
209 i and q SL domains in the .net TLD
210 single-letter SL domains in the ccTLDs (where the ccTLD is two letters)
211 two-character SL domains in gTLDs (where the gTLD is two or more letters)
212 three-plus-character SL domains in gTLDs (where the gTLD is two or more letters)
213 IPv4 dot-decimal address format; TLD not allowed
214
215returns true if domain appears to be a proper name and tld or IPv4 address, else false
216
217]=]
218
219local function is_domain_name (domain)
220 if not domain then
221 return false; -- if not set, abandon
222 end
223
224 domain = domain:gsub ('^//', ''); -- strip '//' from domain name if present; done here so we only have to do it once
225
226 if not domain:match ('^[%a%d]') then -- first character must be letter or digit
227 return false;
228 end
229
230 if domain:match ('%f[%a%d][%a%d]%.org$') then -- one character .org hostname
231 return true;
232 elseif domain:match ('%f[%a][qxz]%.com$') then -- assigned one character .com hostname (x.com times out 2015-12-10)
233 return true;
234 elseif domain:match ('%f[%a][iq]%.net$') then -- assigned one character .net hostname (q.net registered but not active 2015-12-10)
235 return true;
236 elseif domain:match ('%f[%a%d][%a%d]%.%a%a$') then -- one character hostname and cctld (2 chars)
237 return true;
238 elseif domain:match ('%f[%a%d][%a%d][%a%d]%.%a%a+$') then -- two character hostname and tld
239 return true;
240 elseif domain:match ('%f[%a%d][%a%d][%a%d%-]+[%a%d]%.%a%a+$') then -- three or more character hostname.hostname or hostname.tld
241 return true;
242 elseif domain:match ('^%d%d?%d?%.%d%d?%d?%.%d%d?%d?%.%d%d?%d?') then -- IPv4 address
243 return true;
244 else
245 return false;
246 end
247end
248
249
250--[[--------------------------< I S _ U R L >------------------------------------------------------------------
251
252returns true if the scheme and domain parts of a url appear to be a valid url; else false.
253
254This function is the last step in the validation process. This function is separate because there are cases that
255are not covered by split_url(), for example is_parameter_ext_wikilink() which is looking for bracketted external
256wikilinks.
257
42]]258]]
43function deprecated_parameter(name)259
44 if true ~= Page_in_deprecated_cat then -- if we haven't been here before then set a 260local function is_url (scheme, domain)
45 Page_in_deprecated_cat=true; -- sticky flag so that if there are more than one deprecated parameter the category is added only once261 if is_set (scheme) then -- if scheme is set check it and domain
46 table.insert( z.message_tail, { seterror( 'deprecated_params', {name}, true ) } ); -- add error message262 return is_scheme (scheme) and is_domain_name (domain);
263 else
264 return is_domain_name (domain); -- scheme not set when url is protocol relative
265 end
266end
267
268
269--[[--------------------------< S P L I T _ U R L >------------------------------------------------------------
270
271Split a url into a scheme, authority indicator, and domain.
272If protocol relative url, return nil scheme and domain else return nil for both scheme and domain.
273
274When not protocol relative, get scheme, authority indicator, and domain. If there is an authority indicator (one
275or more '/' characters following the scheme's colon), make sure that there are only 2.
276
277]]
278
279local function split_url (url_str)
280 local scheme, authority, domain;
281
282 url_str = url_str:gsub ('(%a)/.*', '%1'); -- strip path information (the capture prevents false replacement of '//')
283
284 if url_str:match ('^//%S*') then -- if there is what appears to be a protocol relative url
285 domain = url_str:match ('^//(%S*)')
286 elseif url_str:match ('%S-:/*%S+') then -- if there is what appears to be a scheme, optional authority indicator, and domain name
287 scheme, authority, domain = url_str:match ('(%S-:)(/*)(%S+)'); -- extract the scheme, authority indicator, and domain portions
288 authority = authority:gsub ('//', '', 1); -- replace place 1 pair of '/' with nothing;
289 if is_set(authority) then -- if anything left (1 or 3+ '/' where authority should be) then
290 domain = nil; -- set to nil which will cause an error message
291 end
47 end292 end
293
294 return scheme, domain;
48end295end
49296
50-- Populates numbered arguments in a message string using an argument table.297
51function substitute( msg, args )298--[[--------------------------< L I N K _ P A R A M _ O K >---------------------------------------------------
52 return args and mw.message.newRawMessage( msg, args ):plain() or msg;299
300checks the content of |title-link=, |series-link=, |author-link= etc for properly formatted content: no wikilinks, no urls
301
302Link parameters are to hold the title of a wikipedia article so none of the WP:TITLESPECIALCHARACTERS are allowed:
303 # < > [ ] | { } _
304except the underscore which is used as a space in wiki urls and # which is used for section links
305
306returns false when the value contains any of these characters.
307
308When there are no illegal characters, this function returns TRUE if value DOES NOT appear to be a valid url (the
309|<param>-link= parameter is ok); else false when value appears to be a valid url (the |<param>-link= parameter is NOT ok).
310
311]]
312
313local function link_param_ok (value)
314 local scheme, domain;
315 if value:find ('[<>%[%]|{}]') then -- if any prohibited characters
316 return false;
317 end
318
319 scheme, domain = split_url (value); -- get scheme or nil and domain or nil from url;
320 return not is_url (scheme, domain); -- return true if value DOES NOT appear to be a valid url
321end
322
323
324--[[--------------------------< C H E C K _ U R L >------------------------------------------------------------
325
326Determines whether a URL string appears to be valid.
327
328First we test for space characters. If any are found, return false. Then split the url into scheme and domain
329portions, or for protocol relative (//example.com) urls, just the domain. Use is_url() to validate the two
330portions of the url. If both are valid, or for protocol relative if domain is valid, return true, else false.
331
332]]
333
334local function check_url( url_str )
335 if nil == url_str:match ("^%S+$") then -- if there are any spaces in |url=value it can't be a proper url
336 return false;
337 end
338 local scheme, domain;
339
340 scheme, domain = split_url (url_str); -- get scheme or nil and domain or nil from url;
341 return is_url (scheme, domain); -- return true if value appears to be a valid url
342end
343
344
345--[=[-------------------------< I S _ P A R A M E T E R _ E X T _ W I K I L I N K >----------------------------
346
347Return true if a parameter value has a string that begins and ends with square brackets [ and ] and the first
348non-space characters following the opening bracket appear to be a url. The test will also find external wikilinks
349that use protocol relative urls. Also finds bare urls.
350
351The frontier pattern prevents a match on interwiki links which are similar to scheme:path urls. The tests that
352find bracketed urls are required because the parameters that call this test (currently |title=, |chapter=, |work=,
353and |publisher=) may have wikilinks and there are articles or redirects like '//Hus' so, while uncommon, |title=[[//Hus]]
354is possible as might be [[en://Hus]].
355
356]=]
357
358local function is_parameter_ext_wikilink (value)
359local scheme, domain;
360
361 value = value:gsub ('([^%s/])/[%a%d].*', '%1'); -- strip path information (the capture prevents false replacement of '//')
362
363 if value:match ('%f[%[]%[%a%S*:%S+.*%]') then -- if ext wikilink with scheme and domain: [xxxx://yyyyy.zzz]
364 scheme, domain = value:match ('%f[%[]%[(%a%S*:)(%S+).*%]')
365 elseif value:match ('%f[%[]%[//%S*%.%S+.*%]') then -- if protocol relative ext wikilink: [//yyyyy.zzz]
366 domain = value:match ('%f[%[]%[//(%S*%.%S+).*%]');
367 elseif value:match ('%a%S*:%S+') then -- if bare url with scheme; may have leading or trailing plain text
368 scheme, domain = value:match ('(%a%S*:)(%S+)');
369 elseif value:match ('//%S*%.%S+') then -- if protocol relative bare url: //yyyyy.zzz; may have leading or trailing plain text
370 domain = value:match ('//(%S*%.%S+)'); -- what is left should be the domain
371 else
372 return false; -- didn't find anything that is obviously a url
373 end
374
375 return is_url (scheme, domain); -- return true if value appears to be a valid url
376end
377
378
379--[[-------------------------< C H E C K _ F O R _ U R L >-----------------------------------------------------
380
381loop through a list of parameters and their values. Look at the value and if it has an external link, emit an error message.
382
383]]
384
385local function check_for_url (parameter_list)
386local error_message = '';
387 for k, v in pairs (parameter_list) do -- for each parameter in the list
388 if is_parameter_ext_wikilink (v) then -- look at the value; if there is a url add an error message
389 if is_set(error_message) then -- once we've added the first portion of the error message ...
390 error_message=error_message .. ", "; -- ... add a comma space separator
391 end
392 error_message=error_message .. "&#124;" .. k .. "="; -- add the failed parameter
393 end
394 end
395 if is_set (error_message) then -- done looping, if there is an error message, display it
396 table.insert( z.message_tail, { set_error( 'param_has_ext_link', {error_message}, true ) } );
397 end
398end
399
400
401--[[--------------------------< S A F E _ F O R _ I T A L I C S >----------------------------------------------
402
403Protects a string that will be wrapped in wiki italic markup '' ... ''
404
405Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that
406they will be inverted (i.e. unitalicized) in the resulting references. In addition, <i> and '' tend to interact
407poorly under Mediawiki's HTML tidy.
408
409]]
410
411local function safe_for_italics( str )
412 if not is_set(str) then
413 return str;
414 else
415 if str:sub(1,1) == "'" then str = "<span />" .. str; end
416 if str:sub(-1,-1) == "'" then str = str .. "<span />"; end
417
418 -- Remove newlines as they break italics.
419 return str:gsub( '\n', ' ' );
420 end
421end
422
423--[[--------------------------< S A F E _ F O R _ U R L >------------------------------------------------------
424
425Escape sequences for content that will be used for URL descriptions
426
427]]
428
429local function safe_for_url( str )
430 if str:match( "%[%[.-%]%]" ) ~= nil then
431 table.insert( z.message_tail, { set_error( 'wikilink_in_url', {}, true ) } );
432 end
433
434 return str:gsub( '[%[%]\n]', {
435 ['['] = '&#91;',
436 [']'] = '&#93;',
437 ['\n'] = ' ' } );
438end
439
440--[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------
441
442Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one
443argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason
444this function is similar to but separate from wrap_msg().
445
446]]
447
448local function wrap_style (key, str)
449 if not is_set( str ) then
450 return "";
451 elseif in_array( key, { 'italic-title', 'trans-italic-title' } ) then
452 str = safe_for_italics( str );
453 end
454
455 return substitute( cfg.presentation[key], {str} );
456end
457
458--[[--------------------------< E X T E R N A L _ L I N K >----------------------------------------------------
459
460Format an external link with error checking
461
462]]
463
464local function external_link( URL, label, source )
465 local error_str = "";
466 if not is_set( label ) then
467 label = URL;
468 if is_set( source ) then
469 error_str = set_error( 'bare_url_missing_title', { wrap_style ('parameter', source) }, false, " " );
470 else
471 error( cfg.messages["bare_url_no_origin"] );
472 end
473 end
474 if not check_url( URL ) then
475 error_str = set_error( 'bad_url', {wrap_style ('parameter', source)}, false, " " ) .. error_str;
476 end
477 return table.concat({ "[", URL, " ", safe_for_url( label ), "]", error_str });
478end
479
480--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
481
482Formats a wiki style external link
483
484]]
485
486local function external_link_id(options)
487 local url_string = options.id;
488 if options.encode == true or options.encode == nil then
489 url_string = mw.uri.encode( url_string );
490 end
491 return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
492 options.link, options.label, options.separator or "&nbsp;",
493 options.prefix, url_string, options.suffix or "",
494 mw.text.nowiki(options.id)
495 );
496end
497
498--[[--------------------------< D E P R E C A T E D _ P A R A M E T E R >--------------------------------------
499
500Categorize and emit an error message when the citation contains one or more deprecated parameters. The function includes the
501offending parameter name to the error message. Only one error message is emitted regardless of the number of deprecated
502parameters in the citation.
503
504]]
505
506local page_in_deprecated_cat; -- sticky flag so that the category is added only once
507local function deprecated_parameter(name)
508 if not page_in_deprecated_cat then
509 page_in_deprecated_cat = true; -- note that we've added this category
510 table.insert( z.message_tail, { set_error( 'deprecated_params', {name}, true ) } ); -- add error message
511 end
53end512end
54513
55--[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------514--[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------
56515
57Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.516Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
58This function will positive kern either single or double quotes:517This function will positive kern either single or double quotes:
59 "'Unkerned title with leading and trailing single quote marks'"518 "'Unkerned title with leading and trailing single quote marks'"
60 " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)519 " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
61Double single quotes (italic or bold wikimarkup) are not kerned.520Double single quotes (italic or bold wikimarkup) are not kerned.
62521
63Call this function for chapter titles, for website titles, etc; not for book titles.522Call this function for chapter titles, for website titles, etc; not for book titles.
64523
65]]524]]
66525
67function kern_quotes (str)526local function kern_quotes (str)
68 local cap='';527 local cap='';
69 local cap2='';528 local cap2='';
70 529
71 cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes530 cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes
72 if is_set (cap) then531 if is_set (cap) then
73 str = substitute (cfg.presentation['kern-left'], {cap, cap2});532 str = substitute (cfg.presentation['kern-left'], {cap, cap2});
74 end533 end
75534
76 cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")535 cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")
77 if is_set (cap) then536 if is_set (cap) then
78 str = substitute (cfg.presentation['kern-right'], {cap, cap2});537 str = substitute (cfg.presentation['kern-right'], {cap, cap2});
79 end538 end
80 return str;539 return str;
81end540end
82541
83--[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >----------------------------------------542--[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >----------------------------------------
84543
85|script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should544|script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should
86not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped545not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped
87in italic markup.546in italic markup.
88547
89Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.548Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.
90549
91|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:550|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:
92 |script-title=ja:*** *** (where * represents a Japanese character)551 |script-title=ja:*** *** (where * represents a Japanese character)
93Spaces between the two-character code and the colon and the colon and the first script character are allowed:552Spaces between the two-character code and the colon and the colon and the first script character are allowed:
94 |script-title=ja : *** ***553 |script-title=ja : *** ***
95 |script-title=ja: *** ***554 |script-title=ja: *** ***
96 |script-title=ja :*** ***555 |script-title=ja :*** ***
97Spaces preceding the prefix are allowed: |script-title = ja:*** ***556Spaces preceding the prefix are allowed: |script-title = ja:*** ***
98557
99The prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can558The prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can
100know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute559know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute
101is not added. At this time there is no error message for this condition.560is not added. At this time there is no error message for this condition.
102561
103At this writing, only |script-title= is supported. It is anticipated that additional parameters will be created to use this function.562Supports |script-title= and |script-chapter=
104563
105TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;564TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;
106]]565]]
107566
108function format_script_value (script_value)567local function format_script_value (script_value)
109 local lang=''; -- initialize to empty string568 local lang=''; -- initialize to empty string
110 local name;569 local name;
111 if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix570 if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix
112 lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script571 lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
113 if not is_set (lang) then572 if not is_set (lang) then
114 return ''; -- script_value was just the prefix so return empty string573 return ''; -- script_value was just the prefix so return empty string
115 end574 end
116 -- if we get this far we have prefix and script575 -- if we get this far we have prefix and script
117 name = mw.language.fetchLanguageName( lang, "bs" ); -- get language name so that we can use it to categorize576 name = mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
118 if is_set (name) then -- is prefix a proper ISO 639-1 language code?577 if is_set (name) then -- is prefix a proper ISO 639-1 language code?
119 script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script578 script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
120 -- is prefix one of these language codes?579 -- is prefix one of these language codes?
121 if inArray (lang, {'ar', 'bs', 'dv', 'el', 'fa', 'hy', 'ja', 'ko', 'ku', 'he', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then580 if in_array (lang, {'ar', 'bg', 'bs', 'dv', 'el', 'fa', 'he', 'hy', 'ja', 'ka', 'ko', 'ku', 'mk', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then
122 table.insert( z.properties_cats, 'CS1 upotrebe (' .. name .. ')'); -- categorize in language-specific categories581 add_prop_cat ('script_with_name', {name, lang})
123 else582 else
124 table.insert( z.properties_cats, 'CS1 upotrebe na stranom jeziku'); -- use this category as a catchall until language-specific category is available583 add_prop_cat ('script')
125 end584 end
126 lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute585 lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
127 else586 else
128 lang = ''; -- invalid so set lang to empty string587 lang = ''; -- invalid so set lang to empty string
129 end588 end
130 end589 end
131 script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl590 script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl
132591
133 return script_value;592 return script_value;
134end593end
135594
136--[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------595--[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------
137596
138Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been 597Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been
139wrapped in <bdi> tags.598wrapped in <bdi> tags.
140]]599]]
141600
142function script_concatenate (title, script)601local function script_concatenate (title, script)
143 if is_set (script) then602 if is_set (script) then
144 script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error603 script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error
145 if is_set (script) then604 if is_set (script) then
146 title = title .. ' ' .. script; -- concatenate title and script title605 title = title .. ' ' .. script; -- concatenate title and script title
147 end606 end
148 end607 end
149 return title;608 return title;
150end609end
151610
152611
153--[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------612--[[--------------------------< W R A P _ M S G >--------------------------------------------------------------
154613
155Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one614Applies additional message text to various parameter values. Supplied string is wrapped using a message_list
156argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason615configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
157this function is similar to but separate from wrap_msg().616from citation_config.messages - the reason this function is similar to but separate from wrap_style().
158617
159]]618]]
160619
161function wrap_style (key, str)620local function wrap_msg (key, str, lower)
162 if not is_set( str ) then621 if not is_set( str ) then
163 return "";622 return "";
164 elseif inArray( key, { 'italic-title', 'trans-italic-title' } ) then623 end
165 str = safeforitalics( str );624 if true == lower then
625 local msg;
626 msg = cfg.messages[key]:lower(); -- set the message to lower case before
627 return substitute( msg, str ); -- including template text
628 else
629 return substitute( cfg.messages[key], str );
630 end
631end
632
633
634--[[-------------------------< I S _ A L I A S _ U S E D >-----------------------------------------------------
635
636This function is used by select_one() to determine if one of a list of alias parameters is in the argument list
637provided by the template.
638
639Input:
640 args – pointer to the arguments table from calling template
641 alias – one of the list of possible aliases in the aliases lists from Module:Citation/CS1/Configuration
642 index – for enumerated parameters, identifies which one
643 enumerated – true/false flag used choose how enumerated aliases are examined
644 value – value associated with an alias that has previously been selected; nil if not yet selected
645 selected – the alias that has previously been selected; nil if not yet selected
646 error_list – list of aliases that are duplicates of the alias already selected
647
648Returns:
649 value – value associated with alias we selected or that was previously selected or nil if an alias not yet selected
650 selected – the alias we selected or the alias that was previously selected or nil if an alias not yet selected
651
652]]
653
654local function is_alias_used (args, alias, index, enumerated, value, selected, error_list)
655 if enumerated then -- is this a test for an enumerated parameters?
656 alias = alias:gsub ('#', index); -- replace '#' with the value in index
657 else
658 alias = alias:gsub ('#', ''); -- remove '#' if it exists
166 end659 end
167660
168 return substitute( cfg.presentation[key], {str} );661 if is_set(args[alias]) then -- alias is in the template's argument list
662 if value ~= nil and selected ~= alias then -- if we have already selected one of the aliases
663 local skip;
664 for _, v in ipairs(error_list) do -- spin through the error list to see if we've added this alias
665 if v == alias then
666 skip = true;
667 break; -- has been added so stop looking
668 end
669 end
670 if not skip then -- has not been added so
671 table.insert( error_list, alias ); -- add error alias to the error list
672 end
673 else
674 value = args[alias]; -- not yet selected an alias, so select this one
675 selected = alias;
676 end
677 end
678 return value, selected; -- return newly selected alias, or previously selected alias
169end679end
170680
171681
172--[[--------------------------< W R A P _ M S G >--------------------------------------------------------------682--[[--------------------------< S E L E C T _ O N E >----------------------------------------------------------
683
684Chooses one matching parameter from a list of parameters to consider. The list of parameters to consider is just
685names. For parameters that may be enumerated, the position of the numerator in the parameter name is identified
686by the '#' so |author-last1= and |author1-last= are represented as 'author-last#' and 'author#-last'.
687
688Because enumerated parameter |<param>1= is an alias of |<param>= we must test for both possibilities.
689
173690
174Applies additional message text to various parameter values. Supplied string is wrapped using a message_list691Generates an error if more than one match is present.
175configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
176from citation_config.messages - the reason this function is similar to but separate from wrap_style().
177692
178]]693]]
179694
180function wrap_msg (key, str, lower)695local function select_one( args, aliases_list, error_condition, index )
181 if not is_set( str ) then696 local value = nil; -- the value assigned to the selected parameter
182 return "";697 local selected = ''; -- the name of the parameter we have chosen
698 local error_list = {};
699
700 if index ~= nil then index = tostring(index); end
701
702 for _, alias in ipairs( aliases_list ) do -- for each alias in the aliases list
703 if alias:match ('#') then -- if this alias can be enumerated
704 if '1' == index then -- when index is 1 test for enumerated and non-enumerated aliases
705 value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); -- first test for non-enumerated alias
706 end
707 value, selected = is_alias_used (args, alias, index, true, value, selected, error_list); -- test for enumerated alias
708 else
709 value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); --test for non-enumerated alias
710 end
711 end
712
713 if #error_list > 0 and 'none' ~= error_condition then -- for cases where this code is used outside of extract_names()
714 local error_str = "";
715 for _, k in ipairs( error_list ) do
716 if error_str ~= "" then error_str = error_str .. cfg.messages['parameter-separator'] end
717 error_str = error_str .. wrap_style ('parameter', k);
718 end
719 if #error_list > 1 then
720 error_str = error_str .. cfg.messages['parameter-final-separator'];
721 else
722 error_str = error_str .. cfg.messages['parameter-pair-separator'];
723 end
724 error_str = error_str .. wrap_style ('parameter', selected);
725 table.insert( z.message_tail, { set_error( error_condition, {error_str}, true ) } );
183 end726 end
184 if true == lower then727
185 local msg;728 return value, selected;
186 msg = cfg.messages[key]:lower(); -- set the message to lower case before
187 str = substitute( msg, {str} ); -- including template text
188 return str;
189 else
190 return substitute( cfg.messages[key], {str} );
191 end
192end729end
730
193731
194--[[--------------------------< F O R M A T _ C H A P T E R _ T I T L E >--------------------------------------732--[[--------------------------< F O R M A T _ C H A P T E R _ T I T L E >--------------------------------------
195733
196Format the three chapter parameters: |chapter=, |trans-chapter=, and |chapter-url= into a single Chapter meta-734Format the four chapter parameters: |script-chapter=, |chapter=, |trans-chapter=, and |chapter-url= into a single Chapter meta-
197parameter (chapter_url_source used for error messages).735parameter (chapter_url_source used for error messages).
198736
199]]737]]
200738
201function format_chapter_title (chapter, transchapter, chapterurl, chapter_url_source)739local function format_chapter_title (scriptchapter, chapter, transchapter, chapterurl, chapter_url_source, no_quotes)
202 local chapter_error = '';740 local chapter_error = '';
203 741
204 if not is_set (chapter) then742 if not is_set (chapter) then
205 chapter = ''; -- just to be safe for concatenation743 chapter = ''; -- to be safe for concatenation
206 if is_set (transchapter) then744 else
207 chapter = wrap_style ('trans-quoted-title', transchapter);745 if false == no_quotes then
208 chapter_error = " " .. seterror ('trans_missing_chapter');746 chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks
747 chapter = wrap_style ('quoted-title', chapter);
209 end748 end
210 if is_set (chapterurl) then749 end
211 chapter = externallink (chapterurl, chapter, chapter_url_source); -- adds bare_url_missing_title error if appropriate750
751 chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
752
753 if is_set (transchapter) then
754 transchapter = wrap_style ('trans-quoted-title', transchapter);
755 if is_set (chapter) then
756 chapter = chapter .. ' ' .. transchapter;
757 else -- here when transchapter without chapter or script-chapter
758 chapter = transchapter; --
759 chapter_error = ' ' .. set_error ('trans_missing_title', {'chapter'});
212 end760 end
213 return chapter .. chapter_error;761 end
214 else -- here when chapter is set762
215 chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks763 if is_set (chapterurl) then
216 chapter = wrap_style ('quoted-title', chapter);764 chapter = external_link (chapterurl, chapter, chapter_url_source); -- adds bare_url_missing_title error if appropriate
217 if is_set (transchapter) then765 end
218 transchapter = wrap_style ('trans-quoted-title', transchapter);766
219 chapter = chapter .. ' ' .. transchapter;767 return chapter .. chapter_error;
768end
769
770--[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >----------------------------------------
771
772This function searches a parameter's value for nonprintable or invisible characters. The search stops at the
773first match.
774
775This function will detect the visible replacement character when it is part of the wikisource.
776
777Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers (gallery, math, pre, ref)
778and identifies them with a slightly different error message. See also coins_cleanup().
779
780Detects but ignores the character pattern that results from the transclusion of {{'}} templates.
781
782Output of this function is an error message that identifies the character or the Unicode group, or the stripmarker
783that was detected along with its position (or, for multi-byte characters, the position of its first byte) in the
784parameter value.
785
786]]
787
788local function has_invisible_chars (param, v)
789 local position = ''; -- position of invisible char or starting position of stripmarker
790 local dummy; -- end of matching string; not used but required to hold end position when a capture is returned
791 local capture; -- used by stripmarker detection to hold name of the stripmarker
792 local i=1;
793 local stripmarker, apostrophe;
794
795 while cfg.invisible_chars[i] do
796 local char=cfg.invisible_chars[i][1] -- the character or group name
797 local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it
798 position, dummy, capture = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern
799
800 if position then
801-- if 'nowiki' == capture or 'math' == capture or ('ref' == capture and 'quote' == param) then -- nowiki, math, or quote param and ref stripmarker (not an error condition)
802 if 'nowiki' == capture or 'math' == capture then -- nowiki, math stripmarker (not an error condition)
803 stripmarker = true; -- set a flag
804 elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker
805 position = nil; -- unset
806 elseif 'apostrophe' == char then -- apostrophe template uses &zwj;, hair space and zero-width space
807 apostrophe = true;
808 elseif true == apostrophe and in_array (char, {'zero width joiner', 'zero width space', 'hair space'}) then
809 position = nil; -- unset
810 else
811 local err_msg;
812 if capture then
813 err_msg = capture .. ' ' .. char;
814 else
815 err_msg = char .. ' ' .. 'character';
816 end
817
818 table.insert( z.message_tail, { set_error( 'invisible_char', {err_msg, wrap_style ('parameter', param), position}, true ) } ); -- add error message
819 return; -- and done with this parameter
820 end
220 end821 end
221 if is_set (chapterurl) then822 i=i+1; -- bump our index
222 chapter = externallink (chapterurl, chapter); -- adds bare_url_missing_title error if appropriate
223 end
224 end823 end
225 return chapter;
226end824end
227825
228--[[826
229Argument wrapper. This function provides support for argument 827--[[--------------------------< A R G U M E N T _ W R A P P E R >----------------------------------------------
230mapping defined in the configuration file so that multiple names828
231can be transparently aliased to single internal variable.829Argument wrapper. This function provides support for argument mapping defined in the configuration file so that
830multiple names can be transparently aliased to single internal variable.
831
232]]832]]
233function argument_wrapper( args )833
834local function argument_wrapper( args )
234 local origin = {};835 local origin = {};
235 836
236 return setmetatable({837 return setmetatable({
237 ORIGIN = function( self, k )838 ORIGIN = function( self, k )
238 local dummy = self[k]; --force the variable to be loaded.839 local dummy = self[k]; --force the variable to be loaded.
239 return origin[k];840 return origin[k];
240 end841 end
241 },842 },
242 {843 {
243 __index = function ( tbl, k )844 __index = function ( tbl, k )
244 if origin[k] ~= nil then845 if origin[k] ~= nil then
245 return nil;846 return nil;
246 end847 end
247 848
248 local args, list, v = args, cfg.aliases[k];849 local args, list, v = args, cfg.aliases[k];
249 850
250 if type( list ) == 'table' then851 if type( list ) == 'table' then
251 v, origin[k] = selectone( args, list, 'redundant_parameters' );852 v, origin[k] = select_one( args, list, 'redundant_parameters' );
252 if origin[k] == nil then853 if origin[k] == nil then
253 origin[k] = ''; -- Empty string, not nil854 origin[k] = ''; -- Empty string, not nil
254 end855 end
255 elseif list ~= nil then856 elseif list ~= nil then
256 v, origin[k] = args[list], list;857 v, origin[k] = args[list], list;
257 else858 else
258 -- maybe let through instead of raising an error?859 -- maybe let through instead of raising an error?
259 -- v, origin[k] = args[k], k;860 -- v, origin[k] = args[k], k;
260 error( cfg.messages['unknown_argument_map'] );861 error( cfg.messages['unknown_argument_map'] );
261 end862 end
262 863
263 -- Empty strings, not nil;864 -- Empty strings, not nil;
264 if v == nil then865 if v == nil then
265 v = cfg.defaults[k] or '';866 v = cfg.defaults[k] or '';
266 origin[k] = '';867 origin[k] = '';
267 end868 end
268 869
269 tbl = rawset( tbl, k, v );870 tbl = rawset( tbl, k, v );
270 return v;871 return v;
271 end,872 end,
272 });873 });
273end874end
274875
275--[[876--[[--------------------------< V A L I D A T E >--------------------------------------------------------------
276Looks for a parameter's name in the whitelist.877Looks for a parameter's name in the whitelist.
277878
278Parameters in the whitelist can have three values:879Parameters in the whitelist can have three values:
279 true - active, supported parameters880 true - active, supported parameters
280 false - deprecated, supported parameters881 false - deprecated, supported parameters
281 nil - unsupported parameters882 nil - unsupported parameters
883
282]]884]]
283function validate( name )885
886local function validate( name )
284 local name = tostring( name );887 local name = tostring( name );
285 local state = whitelist.basic_arguments[ name ];888 local state = whitelist.basic_arguments[ name ];
286 889
287 -- Normal arguments890 -- Normal arguments
288 if true == state then return true; end -- valid actively supported parameter891 if true == state then return true; end -- valid actively supported parameter
289 if false == state then892 if false == state then
290 deprecated_parameter (name); -- parameter is deprecated but still supported893 deprecated_parameter (name); -- parameter is deprecated but still supported
291 return true;894 return true;
292 end895 end
293 896
294 -- Arguments with numbers in them897 -- Arguments with numbers in them
295 name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#898 name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#
296 state = whitelist.numbered_arguments[ name ];899 state = whitelist.numbered_arguments[ name ];
297 if true == state then return true; end -- valid actively supported parameter900 if true == state then return true; end -- valid actively supported parameter
298 if false == state then901 if false == state then
299 deprecated_parameter (name); -- parameter is deprecated but still supported902 deprecated_parameter (name); -- parameter is deprecated but still supported
300 return true;903 return true;
301 end904 end
302 905
303 return false; -- Not supported because not found or name is set to nil906 return false; -- Not supported because not found or name is set to nil
304end907end
305908
306--[[--------------------------< E R R O R C O M M E N T >------------------------------------------------------
307
308Wraps error messages with css markup according to the state of hidden.
309
310]]
311function errorcomment( content, hidden )
312 return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
313end
314
315--[[
316Sets an error condition and returns the appropriate error message. The actual placement
317of the error message in the output is the responsibility of the calling function.
318]]
319function seterror( error_id, arguments, raw, prefix, suffix )
320 local error_state = cfg.error_conditions[ error_id ];
321
322 prefix = prefix or "";
323 suffix = suffix or "";
324
325 if error_state == nil then
326 error( cfg.messages['undefined_error'] );
327 elseif is_set( error_state.category ) then
328 table.insert( z.error_categories, error_state.category );
329 end
330
331 local message = substitute( error_state.message, arguments );
332
333 message = message .. " ([[" .. cfg.messages['help page link'] ..
334 "#" .. error_state.anchor .. "|" ..
335 cfg.messages['help page label'] .. "]])";
336
337 z.error_ids[ error_id ] = true;
338 if inArray( error_id, { 'bare_url_missing_title', 'trans_missing_title' } )
339 and z.error_ids['citation_missing_title'] then
340 return '', false;
341 end
342
343 message = table.concat({ prefix, message, suffix });
344
345 if raw == true then
346 return message, error_state.hidden;
347 end
348
349 return errorcomment( message, error_state.hidden );
350end
351
352-- Formats a wiki style external link
353function externallinkid(options)
354 local url_string = options.id;
355 if options.encode == true or options.encode == nil then
356 url_string = mw.uri.encode( url_string );
357 end
358 return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
359 options.link, options.label, options.separator or "&nbsp;",
360 options.prefix, url_string, options.suffix or "",
361 mw.text.nowiki(options.id)
362 );
363end
364909
365-- Formats a wiki style internal link910-- Formats a wiki style internal link
366function internallinkid(options)911local function internal_link_id(options)
367 return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',912 return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',
368 options.link, options.label, options.separator or "&nbsp;",913 options.link, options.label, options.separator or "&nbsp;",
369 options.prefix, options.id, options.suffix or "",914 options.prefix, options.id, options.suffix or "",
370 mw.text.nowiki(options.id)915 mw.text.nowiki(options.id)
371 );916 );
372end917end
373918
374-- Format an external link with error checking
375function externallink( URL, label, source )
376 local error_str = "";
377 if not is_set( label ) then
378 label = URL;
379 if is_set( source ) then
380 error_str = seterror( 'bare_url_missing_title', { wrap_style ('parameter', source) }, false, " " );
381 else
382 error( cfg.messages["bare_url_no_origin"] );
383 end
384 end
385 if not checkurl( URL ) then
386 error_str = seterror( 'bad_url', {}, false, " " ) .. error_str;
387 end
388 return table.concat({ "[", URL, " ", safeforurl( label ), "]", error_str });
389end
390919
391--[[--------------------------< N O W R A P _ D A T E >--------------------------------------------------------920--[[--------------------------< N O W R A P _ D A T E >--------------------------------------------------------
392921
393When date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. When date is DD MMMM YYYY or is922When date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. When date is DD MMMM YYYY or is
394MMMM DD, YYYY then wrap in nowrap span: <span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYY923MMMM DD, YYYY then wrap in nowrap span: <span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYY
395924
396DOES NOT yet support MMMM YYYY or any of the date ranges.925DOES NOT yet support MMMM YYYY or any of the date ranges.
397926
398]]927]]
399928
400function nowrap_date (date)929local function nowrap_date (date)
401 local cap='';930 local cap='';
402 local cap2='';931 local cap2='';
403932
404 if date:match("^%d%d%d%d%-%d%d%-%d%d$") then933 if date:match("^%d%d%d%d%-%d%d%-%d%d$") then
405 date = substitute (cfg.presentation['nowrap1'], date);934 date = substitute (cfg.presentation['nowrap1'], date);
406 935
407 elseif date:match("%a+%s*%d%d?,%s*%d%d%d%d") or date:match ("%d%d?%s*%a+%s*%d%d%d%d") then936 elseif date:match("^%a+%s*%d%d?,%s+%d%d%d%d$") or date:match ("^%d%d?%s*%a+%s+%d%d%d%d$") then
408 cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$");937 cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$");
409 date = substitute (cfg.presentation['nowrap2'], {cap, cap2});938 date = substitute (cfg.presentation['nowrap2'], {cap, cap2});
410 end939 end
411 940
412 return date;941 return date;
413end942end
414943
944--[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
945
946ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. ISBN-13 is checked in check_isbn().
947If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length and stripped of dashes,
948spaces and other non-isxn characters.
949
950]]
951
952local function is_valid_isxn (isxn_str, len)
953 local temp = 0;
954 isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
955 len = len+1; -- adjust to be a loop counter
956 for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
957 if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
958 temp = temp + 10*( len - i ); -- it represents 10 decimal
959 else
960 temp = temp + tonumber( string.char(v) )*(len-i);
961 end
962 end
963 return temp % 11 == 0; -- returns true if calculation result is zero
964end
965
966
967--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------
968
969ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
970If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
971and stripped of dashes, spaces and other non-isxn-13 characters.
972
973]]
974
975local function is_valid_isxn_13 (isxn_str)
976 local temp=0;
977
978 isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
979 for i, v in ipairs( isxn_str ) do
980 temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
981 end
982 return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct
983end
984
985--[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------
986
987Determines whether an ISBN string is valid
988
989]]
990
991local function check_isbn( isbn_str )
992 if nil ~= isbn_str:match("[^%s-0-9X]") then return false; end -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
993 isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
994 local len = isbn_str:len();
995
996 if len ~= 10 and len ~= 13 then
997 return false;
998 end
999
1000 if len == 10 then
1001 if isbn_str:match( "^%d*X?$" ) == nil then return false; end
1002 return is_valid_isxn(isbn_str, 10);
1003 else
1004 local temp = 0;
1005 if isbn_str:match( "^97[89]%d*$" ) == nil then return false; end -- isbn13 begins with 978 or 979; ismn begins with 979
1006 return is_valid_isxn_13 (isbn_str);
1007 end
1008end
1009
1010--[[--------------------------< C H E C K _ I S M N >------------------------------------------------------------
1011
1012Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
1013same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
1014section 2, pages 9–12.
1015
1016]]
1017
1018local function ismn (id)
1019 local handler = cfg.id_handlers['ISMN'];
1020 local text;
1021 local valid_ismn = true;
1022
1023 id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
1024
1025 if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
1026 valid_ismn = false;
1027 else
1028 valid_ismn=is_valid_isxn_13 (id); -- validate ismn
1029 end
1030
1031-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
1032-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
1033
1034 text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id; -- because no place to link to yet
1035
1036 if false == valid_ismn then
1037 text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the issn is invalid
1038 end
1039
1040 return text;
1041end
1042
1043--[[--------------------------< I S S N >----------------------------------------------------------------------
1044
1045Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but has separated the two groups of four
1046digits with a space. When that condition occurred, the resulting link looked like this:
1047
1048 |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
1049
1050This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length and makes sure that the checkdigit agrees
1051with the calculated value. Incorrect length (8 digits), characters other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn
1052error message. The issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
1053
1054]]
1055
1056local function issn(id)
1057 local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
1058 local handler = cfg.id_handlers['ISSN'];
1059 local text;
1060 local valid_issn = true;
1061
1062 id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
1063
1064 if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
1065 valid_issn=false; -- wrong length or improper character
1066 else
1067 valid_issn=is_valid_isxn(id, 8); -- validate issn
1068 end
1069
1070 if true == valid_issn then
1071 id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
1072 else
1073 id = issn_copy; -- if not valid, use the show the invalid issn with error message
1074 end
1075
1076 text = external_link_id({link = handler.link, label = handler.label,
1077 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
1078
1079 if false == valid_issn then
1080 text = text .. ' ' .. set_error( 'bad_issn' ) -- add an error message if the issn is invalid
1081 end
1082
1083 return text
1084end
1085
415--[[--------------------------< A M A Z O N >------------------------------------------------------------------1086--[[--------------------------< A M A Z O N >------------------------------------------------------------------
4161087
417Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha1088Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
418characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit1089characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
419isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.1090isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
420Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.1091Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
4211092
422]]1093]]
4231094
424function amazon(id, domain)1095local function amazon(id, domain)
425 local err_cat = ""1096 local err_cat = ""
4261097
427 if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then1098 if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
428 err_cat = ' ' .. seterror ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters1099 err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
429 else1100 else
430 if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)1101 if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
431 if checkisbn( id ) then -- see if asin value is isbn101102 if check_isbn( id ) then -- see if asin value is isbn10
432 table.insert( z.maintenance_cats, "CS1 održavanje: ASIN koristi ISBN broj"); -- add to maint category1103 add_maint_cat ('ASIN');
433 elseif not is_set (err_cat) then1104 elseif not is_set (err_cat) then
434 err_cat = ' ' .. seterror ('bad_asin'); -- asin is not isbn101105 err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
435 end1106 end
436 elseif not id:match("^%u[%d%u]+$") then1107 elseif not id:match("^%u[%d%u]+$") then
437 err_cat = ' ' .. seterror ('bad_asin'); -- asin doesn't begin with uppercase alpha1108 err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
438 end1109 end
439 end1110 end
440 if not is_set(domain) then 1111 if not is_set(domain) then
441 domain = "com";1112 domain = "com";
442 elseif inArray (domain, {'jp', 'uk'}) then -- Japan, United Kingdom1113 elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
443 domain = "co." .. domain;1114 domain = "co." .. domain;
444 elseif inArray (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico1115 elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
445 domain = "com." .. domain;1116 domain = "com." .. domain;
446 end1117 end
447 local handler = cfg.id_handlers['ASIN'];1118 local handler = cfg.id_handlers['ASIN'];
448 return externallinkid({link = handler.link,1119 return external_link_id({link=handler.link,
449 label=handler.label , prefix="//www.amazon."..domain.."/dp/",id=id,1120 label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
450 encode=handler.encode, separator = handler.separator}) .. err_cat;1121 id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
451end1122end
4521123
453--[[--------------------------< A R X I V >--------------------------------------------------------------------1124--[[--------------------------< A R X I V >--------------------------------------------------------------------
4541125
455See: http://arxiv.org/help/arxiv_identifier1126See: http://arxiv.org/help/arxiv_identifier
4561127
457format and error check arXiv identifier. There are three valid forms of the identifier:1128format and error check arXiv identifier. There are three valid forms of the identifier:
458the first form, valid only between date codes 9108 and 0703 is:1129the first form, valid only between date codes 9108 and 0703 is:
459 arXiv:<archive>.<class>/<date code><number><version>1130 arXiv:<archive>.<class>/<date code><number><version>
460where:1131where:
461 <archive> is a string of alpha characters - may be hyphenated; no other punctuation1132 <archive> is a string of alpha characters - may be hyphenated; no other punctuation
462 <class> is a string of alpha characters - may be hyphenated; no other punctuation1133 <class> is a string of alpha characters - may be hyphenated; no other punctuation
463 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 011134 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
464 first digit of YY for this form can only 9 and 01135 first digit of YY for this form can only 9 and 0
465 <number> is a three-digit number1136 <number> is a three-digit number
466 <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)1137 <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
467 1138
468the second form, valid from April 2007 through December 2014 is:1139the second form, valid from April 2007 through December 2014 is:
469 arXiv:<date code>.<number><version>1140 arXiv:<date code>.<number><version>
470where:1141where:
471 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 011142 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
472 <number> is a four-digit number1143 <number> is a four-digit number
473 <version> is a 1 or more digit number preceded with a lowercase v; no spaces1144 <version> is a 1 or more digit number preceded with a lowercase v; no spaces
4741145
475the third form, valid from January 2015 is:1146the third form, valid from January 2015 is:
476 arXiv:<date code>.<number><version>1147 arXiv:<date code>.<number><version>
477where:1148where:
478 <date code> and <version> are as defined for 0704-14121149 <date code> and <version> are as defined for 0704-1412
479 <number> is a five-digit number1150 <number> is a five-digit number
480]]1151]]
4811152
482function arxiv (id)1153local function arxiv (id, class)
483 local handler = cfg.id_handlers['ARXIV'];1154 local handler = cfg.id_handlers['ARXIV'];
484 local year, month, version;1155 local year, month, version;
485 local err_cat = ""1156 local err_cat = '';
1157 local text;
486 1158
487 if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version1159 if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
488 year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");1160 year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
489 year = tonumber(year);1161 year = tonumber(year);
490 month = tonumber(month);1162 month = tonumber(month);
491 if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month1163 if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
492 ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?1164 ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
493 err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message1165 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
494 end1166 end
495 elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version1167 elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
496 year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");1168 year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
497 year = tonumber(year);1169 year = tonumber(year);
498 month = tonumber(month);1170 month = tonumber(month);
499 if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)1171 if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
500 ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?1172 ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
501 err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message1173 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
502 end1174 end
503 elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version1175 elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
504 year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");1176 year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
505 year = tonumber(year);1177 year = tonumber(year);
506 month = tonumber(month);1178 month = tonumber(month);
507 if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)1179 if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
508 err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message1180 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
509 end1181 end
510 else1182 else
511 err_cat = ' ' .. seterror( 'bad_arxiv' ); -- arXiv id doesn't match any format1183 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format
512 end1184 end
5131185
514 return externallinkid({link = handler.link, label = handler.label,1186 text = external_link_id({link = handler.link, label = handler.label,
515 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;1187 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
1188
1189 if is_set (class) then
1190 class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink
1191 else
1192 class = ''; -- empty string for concatenation
1193 end
1194
1195 return text .. class;
516end1196end
517
5181197
519--[[1198--[[
520lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)1199lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
5211. Remove all blanks.12001. Remove all blanks.
5222. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.12012. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
5233. If there is a hyphen in the string:12023. If there is a hyphen in the string:
524 a. Remove it.1203 a. Remove it.
525 b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):1204 b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
526 1. All these characters should be digits, and there should be six or less. (not done in this function)1205 1. All these characters should be digits, and there should be six or less. (not done in this function)
527 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.1206 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
5281207
529Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.1208Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
530]]1209]]
5311210
532function normalize_lccn (lccn)1211local function normalize_lccn (lccn)
533 lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace1212 lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
5341213
535 if nil ~= string.find (lccn,'/') then1214 if nil ~= string.find (lccn,'/') then
536 lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it1215 lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
537 end1216 end
5381217
539 local prefix1218 local prefix
540 local suffix1219 local suffix
541 prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix1220 prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
5421221
543 if nil ~= suffix then -- if there was a hyphen1222 if nil ~= suffix then -- if there was a hyphen
544 suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 61223 suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
545 lccn=prefix..suffix; -- reassemble the lccn1224 lccn=prefix..suffix; -- reassemble the lccn
546 end1225 end
547 1226
548 return lccn;1227 return lccn;
549 end1228 end
5501229
551--[[1230--[[
552Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of the LCCN dictates the character type of the first 1-3 characters; the1231Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of the LCCN dictates the character type of the first 1-3 characters; the
553rightmost eight are always digits. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/1232rightmost eight are always digits. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
5541233
555length = 8 then all digits1234length = 8 then all digits
556length = 9 then lccn[1] is lower case alpha1235length = 9 then lccn[1] is lower case alpha
557length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits1236length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
558length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits1237length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
559length = 12 then lccn[1] and lccn[2] are both lower case alpha1238length = 12 then lccn[1] and lccn[2] are both lower case alpha
5601239
561]]1240]]
562function lccn(lccn)1241
1242local function lccn(lccn)
563 local handler = cfg.id_handlers['LCCN'];1243 local handler = cfg.id_handlers['LCCN'];
564 local err_cat = ''; -- presume that LCCN is valid1244 local err_cat = ''; -- presume that LCCN is valid
565 local id = lccn; -- local copy of the lccn1245 local id = lccn; -- local copy of the lccn
5661246
567 id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)1247 id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
568 local len = id:len(); -- get the length of the lccn1248 local len = id:len(); -- get the length of the lccn
5691249
570 if 8 == len then1250 if 8 == len then
571 if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)1251 if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
572 err_cat = ' ' .. seterror( 'bad_lccn' ); -- set an error message1252 err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
573 end1253 end
574 elseif 9 == len then -- LCCN should be adddddddd1254 elseif 9 == len then -- LCCN should be adddddddd
575 if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?1255 if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
576 err_cat = ' ' .. seterror( 'bad_lccn' ); -- set an error message1256 err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
577 end1257 end
578 elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd1258 elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
579 if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...1259 if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
580 if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern1260 if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
581 err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message1261 err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
582 end1262 end
583 end1263 end
584 elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd1264 elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
585 if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns1265 if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
586 err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message1266 err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
587 end1267 end
588 elseif 12 == len then -- LCCN should be aadddddddddd1268 elseif 12 == len then -- LCCN should be aadddddddddd
589 if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern1269 if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
590 err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message1270 err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
591 end1271 end
592 else1272 else
593 err_cat = ' ' .. seterror( 'bad_lccn' ); -- wrong length, set an error message1273 err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
594 end1274 end
5951275
596 if not is_set (err_cat) and nil ~= lccn:find ('%s') then1276 if not is_set (err_cat) and nil ~= lccn:find ('%s') then
597 err_cat = ' ' .. seterror( 'bad_lccn' ); -- lccn contains a space, set an error message1277 err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
598 end1278 end
5991279
600 return externallinkid({link = handler.link, label = handler.label,1280 return external_link_id({link = handler.link, label = handler.label,
601 prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;1281 prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
602end1282end
6031283
604--[[1284--[[
605Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that it1285Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that it
606contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued.1286contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued.
607]]1287]]
608function pmid(id)1288
1289local function pmid(id)
609 local test_limit = 30000000; -- update this value as PMIDs approach1290 local test_limit = 30000000; -- update this value as PMIDs approach
610 local handler = cfg.id_handlers['PMID'];1291 local handler = cfg.id_handlers['PMID'];
611 local err_cat = ''; -- presume that PMID is valid1292 local err_cat = ''; -- presume that PMID is valid
612 1293
613 if id:match("[^%d]") then -- if PMID has anything but digits1294 if id:match("[^%d]") then -- if PMID has anything but digits
614 err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message1295 err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
615 else -- PMID is only digits1296 else -- PMID is only digits
616 local id_num = tonumber(id); -- convert id to a number for range testing1297 local id_num = tonumber(id); -- convert id to a number for range testing
617 if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries1298 if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
618 err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message1299 err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
619 end1300 end
620 end1301 end
621 1302
622 return externallinkid({link = handler.link, label = handler.label,1303 return external_link_id({link = handler.link, label = handler.label,
623 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;1304 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
624end1305end
6251306
626--[[1307--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
1308
627Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is1309Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
628in the future, returns true; otherwise, returns false because the embargo has expired or |embargo= not set in this cite.1310in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
1311|embargo= was not set in this cite.
1312
629]]1313]]
630function is_embargoed(embargo)1314
631 if is_set(embargo) then1315local function is_embargoed (embargo)
1316 if is_set (embargo) then
632 local lang = mw.getContentLanguage();1317 local lang = mw.getContentLanguage();
633 local good1, embargo_date, good2, todays_date;1318 local good1, embargo_date, good2, todays_date;
634 good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );1319 good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
635 good2, todays_date = pcall( lang.formatDate, lang, 'U' );1320 good2, todays_date = pcall( lang.formatDate, lang, 'U' );
636 1321
637 if good1 and good2 and tonumber( embargo_date ) >= tonumber( todays_date ) then --is embargo date is in the future?1322 if good1 and good2 then -- if embargo date and today's date are good dates
638 return true; -- still embargoed1323 if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
1324 return embargo; -- still embargoed
1325 else
1326 add_maint_cat ('embargo')
1327 return ''; -- unset because embargo has expired
1328 end
639 end1329 end
640 end1330 end
641 return false; -- embargo expired or |embargo= not set1331 return ''; -- |embargo= not set return empty string
642end1332end
6431333
644--[[1334--[[--------------------------< P M C >------------------------------------------------------------------------
1335
645Format a PMC, do simple error checking, and check for embargoed articles.1336Format a PMC, do simple error checking, and check for embargoed articles.
6461337
647The embargo parameter takes a date for a value. If the embargo date is in the future1338The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
648the PMC identifier will not be linked to the article. If the embargo specifies a date in the past, or if it is empty or omitted, then1339be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the
649the PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.1340PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
1341
1342PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
1343has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
1344returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
6501345
651PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less1346PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less
652than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.1347than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
1348
653]]1349]]
654function pmc(id, embargo)1350
1351local function pmc(id, embargo)
655 local test_limit = 5000000; -- update this value as PMCs approach1352 local test_limit = 5000000; -- update this value as PMCs approach
656 local handler = cfg.id_handlers['PMC'];1353 local handler = cfg.id_handlers['PMC'];
657 local err_cat = ''; -- presume that PMC is valid1354 local err_cat = ''; -- presume that PMC is valid
658 1355
659 local text;1356 local text;
6601357
661 if id:match("[^%d]") then -- if PMC has anything but digits1358 if id:match("[^%d]") then -- if PMC has anything but digits
662 err_cat = ' ' .. seterror( 'bad_pmc' ); -- set an error message1359 err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
663 else -- PMC is only digits1360 else -- PMC is only digits
664 local id_num = tonumber(id); -- convert id to a number for range testing1361 local id_num = tonumber(id); -- convert id to a number for range testing
665 if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries1362 if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
666 err_cat = ' ' .. seterror( 'bad_pmc' ); -- set an error message1363 err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
667 end1364 end
668 end1365 end
669 1366
670 if is_embargoed(embargo) then1367 if is_set (embargo) then -- is PMC is still embargoed?
671 text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; --still embargoed so no external link1368 text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; -- still embargoed so no external link
672 else1369 else
673 text = externallinkid({link = handler.link, label = handler.label, --no embargo date, ok to link to article1370 text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
674 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;1371 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
675 end1372 end
676 return text;1373 return text;
677end1374end
6781375
679-- Formats a DOI and checks for DOI errors.1376-- Formats a DOI and checks for DOI errors.
6801377
681-- DOI names contain two parts: prefix and suffix separated by a forward slash.1378-- DOI names contain two parts: prefix and suffix separated by a forward slash.
682-- Prefix: directory indicator '10.' followed by a registrant code1379-- Prefix: directory indicator '10.' followed by a registrant code
683-- Suffix: character string of any length chosen by the registrant1380-- Suffix: character string of any length chosen by the registrant
6841381
685-- This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes,1382-- This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes,
686-- or, if it ends with a period or a comma, this function will emit a bad_doi error message.1383-- or, if it ends with a period or a comma, this function will emit a bad_doi error message.
6871384
688-- DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,1385-- DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
689-- and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely if ever used in doi names.1386-- and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely if ever used in doi names.
6901387
691function doi(id, inactive)1388local function doi(id, inactive)
692 local cat = ""1389 local cat = ""
693 local handler = cfg.id_handlers['DOI'];1390 local handler = cfg.id_handlers['DOI'];
694 1391
695 local text;1392 local text;
696 if is_set(inactive) then1393 if is_set(inactive) then
697 local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date1394 local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
698 text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;1395 text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;
699 if is_set(inactive_year) then1396 if is_set(inactive_year) then
700 table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );1397 table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
701 else1398 else
702 table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year1399 table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
703 end1400 end
704 inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" 1401 inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
705 else 1402 else
706 text = externallinkid({link = handler.link, label = handler.label,1403 text = external_link_id({link = handler.link, label = handler.label,
707 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})1404 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
708 inactive = "" 1405 inactive = ""
709 end1406 end
7101407
711 if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma1408 if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
712 cat = ' ' .. seterror( 'bad_doi' );1409 cat = ' ' .. set_error( 'bad_doi' );
713 end1410 end
714 return text .. inactive .. cat 1411 return text .. inactive .. cat
715end1412end
7161413
717-- Formats an OpenLibrary link, and checks for associated errors.1414
718function openlibrary(id)1415--[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
719 local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'1416
1417Formats an OpenLibrary link, and checks for associated errors.
1418
1419]]
1420local function openlibrary(id)
1421 local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
720 local handler = cfg.id_handlers['OL'];1422 local handler = cfg.id_handlers['OL'];
7211423
722 if ( code == "A" ) then1424 if ( code == "A" ) then
723 return externallinkid({link=handler.link, label=handler.label,1425 return external_link_id({link=handler.link, label=handler.label,
724 prefix="http://openlibrary.org/authors/OL",id=id, separator=handler.separator,1426 prefix=handler.prefix .. 'authors/OL',
725 encode = handler.encode})1427 id=id, separator=handler.separator, encode = handler.encode})
726 elseif ( code == "M" ) then1428 elseif ( code == "M" ) then
727 return externallinkid({link=handler.link, label=handler.label,1429 return external_link_id({link=handler.link, label=handler.label,
728 prefix="http://openlibrary.org/books/OL",id=id, separator=handler.separator,1430 prefix=handler.prefix .. 'books/OL',
729 encode = handler.encode})1431 id=id, separator=handler.separator, encode = handler.encode})
730 elseif ( code == "W" ) then1432 elseif ( code == "W" ) then
731 return externallinkid({link=handler.link, label=handler.label,1433 return external_link_id({link=handler.link, label=handler.label,
732 prefix= "http://openlibrary.org/works/OL",id=id, separator=handler.separator,1434 prefix=handler.prefix .. 'works/OL',
733 encode = handler.encode})1435 id=id, separator=handler.separator, encode = handler.encode})
734 else1436 else
735 return externallinkid({link=handler.link, label=handler.label,1437 return external_link_id({link=handler.link, label=handler.label,
736 prefix= "http://openlibrary.org/OL",id=id, separator=handler.separator,1438 prefix=handler.prefix .. 'OL',
737 encode = handler.encode}) .. 1439 id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
738 ' ' .. seterror( 'bad_ol' );
739 end1440 end
740end1441end
7411442
742--[[
743Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but has separated the two groups of four
744digits with a space. When that condition occurred, the resulting link looked like this:
745
746 |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
747
748This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length and makes sure that the checkdigit agrees
749with the calculated value. Incorrect length (8 digits), characters other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn
750error message. The issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
751]]
752function issn(id)
753 local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
754 local handler = cfg.id_handlers['ISSN'];
755 local text;
756 local valid_issn = true;
757
758 id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
759
760 if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
761 valid_issn=false; -- wrong length or improper character
762 else
763 valid_issn=is_valid_isxn(id, 8); -- validate issn
764 end
765
766 if true == valid_issn then
767 id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
768 else
769 id = issn_copy; -- if not valid, use the show the invalid issn with error message
770 end
771
772 text = externallinkid({link = handler.link, label = handler.label,
773 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
774
775 if false == valid_issn then
776 text = text .. ' ' .. seterror( 'bad_issn' ) -- add an error message if the issn is invalid
777 end
778
779 return text
780end
7811443
782--[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------1444--[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
7831445
784Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in1446Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in
785'<' and/or '>' angle brackets.1447'<' and/or '>' angle brackets.
7861448
787]]1449]]
7881450
789function message_id (id)1451local function message_id (id)
790 local handler = cfg.id_handlers['USENETID'];1452 local handler = cfg.id_handlers['USENETID'];
7911453
792 text = externallinkid({link = handler.link, label = handler.label,1454 text = external_link_id({link = handler.link, label = handler.label,
793 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})1455 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
794 1456
795 if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'1457 if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
796 text = text .. ' ' .. seterror( 'bad_message_id' ) -- add an error message if the message id is invalid1458 text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid
797 end 1459 end
798 1460
799 return text1461 return text
800end1462end
8011463
802
803
804--[[--------------------------< S E T _ T I T L E T Y P E >----------------------------------------------------1464--[[--------------------------< S E T _ T I T L E T Y P E >----------------------------------------------------
8051465
806This function sets default title types (equivalent to the citation including |type=<default value>) for those citations that have defaults.1466This function sets default title types (equivalent to the citation including |type=<default value>) for those templates that have defaults.
807Also handles the special case where it is desirable to omit the title type from the rendered citation (|type=none).1467Also handles the special case where it is desirable to omit the title type from the rendered citation (|type=none).
8081468
809]]1469]]
810function set_titletype(cite_class, title_type)1470
1471local function set_titletype (cite_class, title_type)
811 if is_set(title_type) then1472 if is_set(title_type) then
812 if "none" == title_type then1473 if "none" == title_type then
813 title_type = ""; -- if |type=none then type parameter not displayed1474 title_type = ""; -- if |type=none then type parameter not displayed
814 end1475 end
815 return title_type; -- if |type= has been set to any other value use that value1476 return title_type; -- if |type= has been set to any other value use that value
816 end1477 end
8171478
818 if "AV-media-notes" == cite_class or "DVD-notes" == cite_class then -- if this citation is cite AV media notes or cite DVD notes1479 return cfg.title_types [cite_class] or ''; -- set template's default title type; else empty string for concatenation
819 return "Media notes"; -- display AV media notes / DVD media notes annotation1480end
8201481
821 elseif "mailinglist" == cite_class then -- if this citation is cite mailing list1482--[[--------------------------< C L E A N _ I S B N >----------------------------------------------------------
822 return "Mailing list"; -- display mailing list annotation
823
824 elseif "podcast" == cite_class then -- if this citation is cite podcast
825 return "Podcast"; -- display podcast annotation
8261483
827 elseif "pressrelease" == cite_class then -- if this citation is cite press release1484Removes irrelevant text and dashes from ISBN number
828 return "Press release"; -- display press release annotation1485Similar to that used for Special:BookSources
8291486
830 elseif "report" == cite_class then -- if this citation is cite report
831 return "Report"; -- display report annotation
832
833 elseif "techreport" == cite_class then -- if this citation is cite techreport
834 return "Technical report"; -- display techreport annotation
835
836 elseif "thesis" == cite_class then -- if this citation is cite thesis (degree option handled after this function returns)
837 return "Thesis"; -- display simple thesis annotation (without |degree= modification)
838 end
839end
840
841--[[
842Determines whether a URL string is valid
843
844At present the only check is whether the string appears to
845be prefixed with a URI scheme. It is not determined whether
846the URI scheme is valid or whether the URL is otherwise well
847formed.
848]]1487]]
849function checkurl( url_str )
850 -- Protocol-relative or URL scheme
851 return url_str:sub(1,2) == "//" or url_str:match( "^[^/]*:" ) ~= nil;
852end
8531488
854-- Removes irrelevant text and dashes from ISBN number1489local function clean_isbn( isbn_str )
855-- Similar to that used for Special:BookSources
856function cleanisbn( isbn_str )
857 return isbn_str:gsub( "[^-0-9X]", "" );1490 return isbn_str:gsub( "[^-0-9X]", "" );
858end1491end
8591492
860--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------1493--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
8611494
862Returns a string where all of lua's magic characters have been escaped. This is important because functions like1495Returns a string where all of lua's magic characters have been escaped. This is important because functions like
863string.gsub() treat their pattern and replace strings as patterns, not literal strings.1496string.gsub() treat their pattern and replace strings as patterns, not literal strings.
864]]1497]]
865function escape_lua_magic_chars (argument)1498local function escape_lua_magic_chars (argument)
866 argument = argument:gsub("%%", "%%%%"); -- replace % with %%1499 argument = argument:gsub("%%", "%%%%"); -- replace % with %%
867 argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters1500 argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
868 return argument;1501 return argument;
869end1502end
8701503
871--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------1504--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
8721505
873Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.1506Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
874This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to1507This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
875markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.1508markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
8761509
877]]1510]]
878function strip_apostrophe_markup (argument)1511
1512local function strip_apostrophe_markup (argument)
879 if not is_set (argument) then return argument; end1513 if not is_set (argument) then return argument; end
8801514
881 while true do1515 while true do
882 if argument:match ("%'%'%'%'%'") then -- bold italic (5)1516 if argument:match ("%'%'%'%'%'") then -- bold italic (5)
883 argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it1517 argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
884 elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4)1518 elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4)
885 argument=argument:gsub("%'%'%'%'", "");1519 argument=argument:gsub("%'%'%'%'", "");
886 elseif argument:match ("%'%'%'") then -- bold (3)1520 elseif argument:match ("%'%'%'") then -- bold (3)
887 argument=argument:gsub("%'%'%'", "");1521 argument=argument:gsub("%'%'%'", "");
888 elseif argument:match ("%'%'") then -- italic (2)1522 elseif argument:match ("%'%'") then -- italic (2)
889 argument=argument:gsub("%'%'", "");1523 argument=argument:gsub("%'%'", "");
890 else1524 else
891 break;1525 break;
892 end1526 end
893 end1527 end
894 return argument; -- done1528 return argument; -- done
895end1529end
8961530
897--[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------1531--[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
8981532
899Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)1533Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
9001534
901Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with strings1535Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with strings
902of %27%27...1536of %27%27...
903]]1537]]
9041538
905function make_coins_title (title, script)1539local function make_coins_title (title, script)
906 if is_set (title) then1540 if is_set (title) then
907 title = strip_apostrophe_markup (title); -- strip any apostrophe markup1541 title = strip_apostrophe_markup (title); -- strip any apostrophe markup
908 else1542 else
909 title=''; -- if not set, make sure title is an empty string1543 title=''; -- if not set, make sure title is an empty string
910 end1544 end
911 if is_set (script) then1545 if is_set (script) then
912 script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)1546 script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)
913 script = strip_apostrophe_markup (script); -- strip any apostrophe markup1547 script = strip_apostrophe_markup (script); -- strip any apostrophe markup
914 else1548 else
915 script=''; -- if not set, make sure script is an empty string1549 script=''; -- if not set, make sure script is an empty string
916 end1550 end
917 if is_set (title) and is_set (script) then1551 if is_set (title) and is_set (script) then
918 script = ' ' .. script; -- add a space before we concatenate1552 script = ' ' .. script; -- add a space before we concatenate
919 end1553 end
920 return title .. script; -- return the concatenation1554 return title .. script; -- return the concatenation
921end1555end
9221556
923--[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------1557--[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
9241558
925Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.1559Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
9261560
927]]1561]]
928function get_coins_pages (pages)1562
1563local function get_coins_pages (pages)
929 local pattern;1564 local pattern;
930 if not is_set (pages) then return pages; end -- if no page numbers then we're done1565 if not is_set (pages) then return pages; end -- if no page numbers then we're done
931 1566
932 while true do1567 while true do
933 pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "1568 pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
934 if nil == pattern then break; end -- no more urls1569 if nil == pattern then break; end -- no more urls
935 pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters1570 pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
936 pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible1571 pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
937 end1572 end
938 pages = pages:gsub("[%[%]]", ""); -- remove the brackets1573 pages = pages:gsub("[%[%]]", ""); -- remove the brackets
939 pages = pages:gsub("–", "-" ); -- replace endashes with hyphens1574 pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
940 pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?1575 pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
941 return pages;1576 return pages;
942end1577end
9431578
944--[[
945ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. ISBN-13 is checked in checkisbn().
946If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length and stripped of dashes,
947spaces and other non-isxn characters.
948]]
949function is_valid_isxn (isxn_str, len)
950 local temp = 0;
951 isxn_str = { isxn_str:byte(1, len) }; -- make a table of bytes
952 len = len+1; -- adjust to be a loop counter
953 for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
954 if v == string.byte( "X" ) then -- if checkdigit is X
955 temp = temp + 10*( len - i ); -- it represents 10 decimal
956 else
957 temp = temp + tonumber( string.char(v) )*(len-i);
958 end
959 end
960 return temp % 11 == 0; -- returns true if calculation result is zero
961end
962
963-- Determines whether an ISBN string is valid
964function checkisbn( isbn_str )
965 if nil ~= isbn_str:match("[^%s-0-9X]") then return false; end -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
966 isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
967 local len = isbn_str:len();
968
969 if len ~= 10 and len ~= 13 then
970 return false;
971 end
972
973 if len == 10 then
974 if isbn_str:match( "^%d*X?$" ) == nil then return false; end
975 return is_valid_isxn(isbn_str, 10);
976 else
977 local temp = 0;
978 if isbn_str:match( "^97[89]%d*$" ) == nil then return false; end -- isbn13 begins with 978 or 979
979 isbn_str = { isbn_str:byte(1, len) };
980 for i, v in ipairs( isbn_str ) do
981 temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) );
982 end
983 return temp % 10 == 0;
984 end
985end
986
987-- Gets the display text for a wikilink like [[A|B]] or [[B]] gives B1579-- Gets the display text for a wikilink like [[A|B]] or [[B]] gives B
988function removewikilink( str )1580local function remove_wiki_link( str )
989 return (str:gsub( "%[%[([^%[%]]*)%]%]", function(l)1581 return (str:gsub( "%[%[([^%[%]]*)%]%]", function(l)
990 return l:gsub( "^[^|]*|(.*)$", "%1" ):gsub("^%s*(.-)%s*$", "%1");1582 return l:gsub( "^[^|]*|(.*)$", "%1" ):gsub("^%s*(.-)%s*$", "%1");
991 end));1583 end));
992end
993
994-- Escape sequences for content that will be used for URL descriptions
995function safeforurl( str )
996 if str:match( "%[%[.-%]%]" ) ~= nil then
997 table.insert( z.message_tail, { seterror( 'wikilink_in_url', {}, true ) } );
998 end
999
1000 return str:gsub( '[%[%]\n]', {
1001 ['['] = '&#91;',
1002 [']'] = '&#93;',
1003 ['\n'] = ' ' } );
1004end1584end
10051585
1006-- Converts a hyphen to a dash1586-- Converts a hyphen to a dash
1007function hyphentodash( str )1587local function hyphen_to_dash( str )
1008 if not is_set(str) or str:match( "[%[%]{}<>]" ) ~= nil then1588 if not is_set(str) or str:match( "[%[%]{}<>]" ) ~= nil then
1009 return str;1589 return str;
1010 end 1590 end
1011 return str:gsub( '-', '–' );1591 return str:gsub( '-', '–' );
1012end1592end
10131593
1014-- Protects a string that will be wrapped in wiki italic markup '' ... ''1594--[[--------------------------< S A F E _ J O I N >------------------------------------------------------------
1015function safeforitalics( str )
1016 --[[ Note: We cannot use <i> for italics, as the expected behavior for
1017 italics specified by ''...'' in the title is that they will be inverted
1018 (i.e. unitalicized) in the resulting references. In addition, <i> and ''
1019 tend to interact poorly under Mediawiki's HTML tidy. ]]
1020
1021 if not is_set(str) then
1022 return str;
1023 else
1024 if str:sub(1,1) == "'" then str = "<span />" .. str; end
1025 if str:sub(-1,-1) == "'" then str = str .. "<span />"; end
1026
1027 -- Remove newlines as they break italics.
1028 return str:gsub( '\n', ' ' );
1029 end
1030end
1031
1032--[[--------------------------< S A F E J O I N >--------------------------------------------------------------
10331595
1034Joins a sequence of strings together while checking for duplicate separation characters.1596Joins a sequence of strings together while checking for duplicate separation characters.
10351597
1036]]1598]]
1037function safejoin( tbl, duplicate_char )1599
1600local function safe_join( tbl, duplicate_char )
1038 --[[1601 --[[
1039 Note: we use string functions here, rather than ustring functions.1602 Note: we use string functions here, rather than ustring functions.
1040 1603
1041 This has considerably faster performance and should work correctly as 1604 This has considerably faster performance and should work correctly as
1042 long as the duplicate_char is strict ASCII. The strings1605 long as the duplicate_char is strict ASCII. The strings
1043 in tbl may be ASCII or UTF8.1606 in tbl may be ASCII or UTF8.
1044 ]]1607 ]]
1045 1608
1046 local str = ''; -- the output string1609 local str = ''; -- the output string
1047 local comp = ''; -- what does 'comp' mean?1610 local comp = ''; -- what does 'comp' mean?
1048 local end_chr = '';1611 local end_chr = '';
1049 local trim;1612