Module:String

From Center for Integrated Circuits and Devices Research (CIDR)
Revision as of 10:30, 3 November 2019 by wp>RexxS (Changed protection level for "Module:String": Cascade-protected from main page, so no point in enabling TE ([Edit=Require administrator access] (indefinite) [Move=Require administrator access] (indefinite)))
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:String/doc

  1 --[[
  2 
  3 This module is intended to provide access to basic string functions.
  4 
  5 Most of the functions provided here can be invoked with named parameters,
  6 unnamed parameters, or a mixture.  If named parameters are used, Mediawiki will
  7 automatically remove any leading or trailing whitespace from the parameter.
  8 Depending on the intended use, it may be advantageous to either preserve or
  9 remove such whitespace.
 10 
 11 Global options
 12     ignore_errors: If set to 'true' or 1, any error condition will result in
 13         an empty string being returned rather than an error message.
 14 
 15     error_category: If an error occurs, specifies the name of a category to
 16         include with the error message.  The default category is
 17         [Category:Errors reported by Module String].
 18 
 19     no_category: If set to 'true' or 1, no category will be added if an error
 20         is generated.
 21 
 22 Unit tests for this module are available at Module:String/tests.
 23 ]]
 24 
 25 local str = {}
 26 
 27 --[[
 28 len
 29 
 30 This function returns the length of the target string.
 31 
 32 Usage:
 33 {{#invoke:String|len|target_string|}}
 34 OR
 35 {{#invoke:String|len|s=target_string}}
 36 
 37 Parameters
 38     s: The string whose length to report
 39 
 40 If invoked using named parameters, Mediawiki will automatically remove any leading or
 41 trailing whitespace from the target string.
 42 ]]
 43 function str.len( frame )
 44 	local new_args = str._getParameters( frame.args, {'s'} )
 45 	local s = new_args['s'] or ''
 46 	return mw.ustring.len( s )
 47 end
 48 
 49 --[[
 50 sub
 51 
 52 This function returns a substring of the target string at specified indices.
 53 
 54 Usage:
 55 {{#invoke:String|sub|target_string|start_index|end_index}}
 56 OR
 57 {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}}
 58 
 59 Parameters
 60     s: The string to return a subset of
 61     i: The fist index of the substring to return, defaults to 1.
 62     j: The last index of the string to return, defaults to the last character.
 63 
 64 The first character of the string is assigned an index of 1.  If either i or j
 65 is a negative value, it is interpreted the same as selecting a character by
 66 counting from the end of the string.  Hence, a value of -1 is the same as
 67 selecting the last character of the string.
 68 
 69 If the requested indices are out of range for the given string, an error is
 70 reported.
 71 ]]
 72 function str.sub( frame )
 73 	local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } )
 74 	local s = new_args['s'] or ''
 75 	local i = tonumber( new_args['i'] ) or 1
 76 	local j = tonumber( new_args['j'] ) or -1
 77 
 78 	local len = mw.ustring.len( s )
 79 
 80 	-- Convert negatives for range checking
 81 	if i < 0 then
 82 		i = len + i + 1
 83 	end
 84 	if j < 0 then
 85 		j = len + j + 1
 86 	end
 87 
 88 	if i > len or j > len or i < 1 or j < 1 then
 89 		return str._error( 'String subset index out of range' )
 90 	end
 91 	if j < i then
 92 		return str._error( 'String subset indices out of order' )
 93 	end
 94 
 95 	return mw.ustring.sub( s, i, j )
 96 end
 97 
 98 --[[
 99 This function implements that features of {{str sub old}} and is kept in order
100 to maintain these older templates.
101 ]]
102 function str.sublength( frame )
103 	local i = tonumber( frame.args.i ) or 0
104 	local len = tonumber( frame.args.len )
105 	return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) )
106 end
107 
108 --[[
109 match
110 
111 This function returns a substring from the source string that matches a
112 specified pattern.
113 
114 Usage:
115 {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}}
116 OR
117 {{#invoke:String|match|s=source_string|pattern=pattern_string|start=start_index
118     |match=match_number|plain=plain_flag|nomatch=nomatch_output}}
119 
120 Parameters
121     s: The string to search
122     pattern: The pattern or string to find within the string
123     start: The index within the source string to start the search.  The first
124         character of the string has index 1.  Defaults to 1.
125     match: In some cases it may be possible to make multiple matches on a single
126         string.  This specifies which match to return, where the first match is
127         match= 1.  If a negative number is specified then a match is returned
128         counting from the last match.  Hence match = -1 is the same as requesting
129         the last match.  Defaults to 1.
130     plain: A flag indicating that the pattern should be understood as plain
131         text.  Defaults to false.
132     nomatch: If no match is found, output the "nomatch" value rather than an error.
133 
134 If invoked using named parameters, Mediawiki will automatically remove any leading or
135 trailing whitespace from each string.  In some circumstances this is desirable, in
136 other cases one may want to preserve the whitespace.
137 
138 If the match_number or start_index are out of range for the string being queried, then
139 this function generates an error.  An error is also generated if no match is found.
140 If one adds the parameter ignore_errors=true, then the error will be suppressed and
141 an empty string will be returned on any failure.
142 
143 For information on constructing Lua patterns, a form of [regular expression], see:
144 
145 * http://www.lua.org/manual/5.1/manual.html#5.4.1
146 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
147 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
148 
149 ]]
150 -- This sub-routine is exported for use in other modules
151 function str._match( s, pattern, start, match_index, plain_flag, nomatch )
152 	if s == '' then
153 		return str._error( 'Target string is empty' )
154 	end
155 	if pattern == '' then
156 		return str._error( 'Pattern string is empty' )
157 	end
158 	start = tonumber(start) or 1
159 	if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then
160 		return str._error( 'Requested start is out of range' )
161 	end
162 	if match_index == 0 then
163 		return str._error( 'Match index is out of range' )
164 	end
165 	if plain_flag then
166 		pattern = str._escapePattern( pattern )
167 	end
168 
169 	local result
170 	if match_index == 1 then
171 		-- Find first match is simple case
172 		result = mw.ustring.match( s, pattern, start )
173 	else
174 		if start > 1 then
175 			s = mw.ustring.sub( s, start )
176 		end
177 
178 		local iterator = mw.ustring.gmatch(s, pattern)
179 		if match_index > 0 then
180 			-- Forward search
181 			for w in iterator do
182 				match_index = match_index - 1
183 				if match_index == 0 then
184 					result = w
185 					break
186 				end
187 			end
188 		else
189 			-- Reverse search
190 			local result_table = {}
191 			local count = 1
192 			for w in iterator do
193 				result_table[count] = w
194 				count = count + 1
195 			end
196 
197 			result = result_table[ count + match_index ]
198 		end
199 	end
200 
201 	if result == nil then
202 		if nomatch == nil then
203 			return str._error( 'Match not found' )
204 		else
205 			return nomatch
206 		end
207 	else
208 		return result
209 	end
210 end
211 -- This is the entry point for #invoke:String|match
212 function str.match( frame )
213 	local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} )
214 	local s = new_args['s'] or ''
215 	local start = tonumber( new_args['start'] ) or 1
216 	local plain_flag = str._getBoolean( new_args['plain'] or false )
217 	local pattern = new_args['pattern'] or ''
218 	local match_index = math.floor( tonumber(new_args['match']) or 1 )
219 	local nomatch = new_args['nomatch']
220 
221 	return str._match( s, pattern, start, match_index, plain_flag, nomatch )
222 end
223 
224 --[[
225 pos
226 
227 This function returns a single character from the target string at position pos.
228 
229 Usage:
230 {{#invoke:String|pos|target_string|index_value}}
231 OR
232 {{#invoke:String|pos|target=target_string|pos=index_value}}
233 
234 Parameters
235     target: The string to search
236     pos: The index for the character to return
237 
238 If invoked using named parameters, Mediawiki will automatically remove any leading or
239 trailing whitespace from the target string.  In some circumstances this is desirable, in
240 other cases one may want to preserve the whitespace.
241 
242 The first character has an index value of 1.
243 
244 If one requests a negative value, this function will select a character by counting backwards
245 from the end of the string.  In other words pos = -1 is the same as asking for the last character.
246 
247 A requested value of zero, or a value greater than the length of the string returns an error.
248 ]]
249 function str.pos( frame )
250 	local new_args = str._getParameters( frame.args, {'target', 'pos'} )
251 	local target_str = new_args['target'] or ''
252 	local pos = tonumber( new_args['pos'] ) or 0
253 
254 	if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then
255 		return str._error( 'String index out of range' )
256 	end
257 
258 	return mw.ustring.sub( target_str, pos, pos )
259 end
260 
261 --[[
262 str_find
263 
264 This function duplicates the behavior of {{str_find}}, including all of its quirks.
265 This is provided in order to support existing templates, but is NOT RECOMMENDED for
266 new code and templates.  New code is recommended to use the "find" function instead.
267 
268 Returns the first index in "source" that is a match to "target".  Indexing is 1-based,
269 and the function returns -1 if the "target" string is not present in "source".
270 
271 Important Note: If the "target" string is empty / missing, this function returns a
272 value of "1", which is generally unexpected behavior, and must be accounted for
273 separatetly.
274 ]]
275 function str.str_find( frame )
276 	local new_args = str._getParameters( frame.args, {'source', 'target'} )
277 	local source_str = new_args['source'] or ''
278 	local target_str = new_args['target'] or ''
279 
280 	if target_str == '' then
281 		return 1
282 	end
283 
284 	local start = mw.ustring.find( source_str, target_str, 1, true )
285 	if start == nil then
286 		start = -1
287 	end
288 
289 	return start
290 end
291 
292 --[[
293 find
294 
295 This function allows one to search for a target string or pattern within another
296 string.
297 
298 Usage:
299 {{#invoke:String|find|source_str|target_string|start_index|plain_flag}}
300 OR
301 {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}}
302 
303 Parameters
304     source: The string to search
305     target: The string or pattern to find within source
306     start: The index within the source string to start the search, defaults to 1
307     plain: Boolean flag indicating that target should be understood as plain
308         text and not as a Lua style regular expression, defaults to true
309 
310 If invoked using named parameters, Mediawiki will automatically remove any leading or
311 trailing whitespace from the parameter.  In some circumstances this is desirable, in
312 other cases one may want to preserve the whitespace.
313 
314 This function returns the first index >= "start" where "target" can be found
315 within "source".  Indices are 1-based.  If "target" is not found, then this
316 function returns 0.  If either "source" or "target" are missing / empty, this
317 function also returns 0.
318 
319 This function should be safe for UTF-8 strings.
320 ]]
321 function str.find( frame )
322 	local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } )
323 	local source_str = new_args['source'] or ''
324 	local pattern = new_args['target'] or ''
325 	local start_pos = tonumber(new_args['start']) or 1
326 	local plain = new_args['plain'] or true
327 
328 	if source_str == '' or pattern == '' then
329 		return 0
330 	end
331 
332 	plain = str._getBoolean( plain )
333 
334 	local start = mw.ustring.find( source_str, pattern, start_pos, plain )
335 	if start == nil then
336 		start = 0
337 	end
338 
339 	return start
340 end
341 
342 --[[
343 replace
344 
345 This function allows one to replace a target string or pattern within another
346 string.
347 
348 Usage:
349 {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}}
350 OR
351 {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string|
352    count=replacement_count|plain=plain_flag}}
353 
354 Parameters
355     source: The string to search
356     pattern: The string or pattern to find within source
357     replace: The replacement text
358     count: The number of occurences to replace, defaults to all.
359     plain: Boolean flag indicating that pattern should be understood as plain
360         text and not as a Lua style regular expression, defaults to true
361 ]]
362 function str.replace( frame )
363 	local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } )
364 	local source_str = new_args['source'] or ''
365 	local pattern = new_args['pattern'] or ''
366 	local replace = new_args['replace'] or ''
367 	local count = tonumber( new_args['count'] )
368 	local plain = new_args['plain'] or true
369 
370 	if source_str == '' or pattern == '' then
371 		return source_str
372 	end
373 	plain = str._getBoolean( plain )
374 
375 	if plain then
376 		pattern = str._escapePattern( pattern )
377 		replace = mw.ustring.gsub( replace, "%%", "%%%%" ) --Only need to escape replacement sequences.
378 	end
379 
380 	local result
381 
382 	if count ~= nil then
383 		result = mw.ustring.gsub( source_str, pattern, replace, count )
384 	else
385 		result = mw.ustring.gsub( source_str, pattern, replace )
386 	end
387 
388 	return result
389 end
390 
391 --[[
392     simple function to pipe string.rep to templates.
393 ]]
394 function str.rep( frame )
395 	local repetitions = tonumber( frame.args[2] )
396 	if not repetitions then
397 		return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' )
398 	end
399 	return string.rep( frame.args[1] or '', repetitions )
400 end
401 
402 --[[
403 escapePattern
404 
405 This function escapes special characters from a Lua string pattern. See [1]
406 for details on how patterns work.
407 
408 [1] https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
409 
410 Usage:
411 {{#invoke:String|escapePattern|pattern_string}}
412 
413 Parameters
414     pattern_string: The pattern string to escape.
415 ]]
416 function str.escapePattern( frame )
417 	local pattern_str = frame.args[1]
418 	if not pattern_str then
419 		return str._error( 'No pattern string specified' )
420 	end
421 	local result = str._escapePattern( pattern_str )
422 	return result
423 end
424 
425 --[[
426 count
427 This function counts the number of occurrences of one string in another.
428 ]]
429 function str.count(frame)
430 	local args = str._getParameters(frame.args, {'source', 'pattern', 'plain'})
431 	local source = args.source or ''
432 	local pattern = args.pattern or ''
433 	local plain = str._getBoolean(args.plain or true)
434 	if plain then
435 		pattern = str._escapePattern(pattern)
436 	end
437 	local _, count = mw.ustring.gsub(source, pattern, '')
438 	return count
439 end
440 
441 --[[
442 endswith
443 This function determines whether a string ends with another string.
444 ]]
445 function str.endswith(frame)
446 	local args = str._getParameters(frame.args, {'source', 'pattern'})
447 	local source = args.source or ''
448 	local pattern = args.pattern or ''
449 	if pattern == '' then
450 		-- All strings end with the empty string.
451 		return "yes"
452 	end
453 	if mw.ustring.sub(source, -mw.ustring.len(pattern), -1) == pattern then
454 		return "yes"
455 	else
456 		return ""
457 	end
458 end
459 
460 --[[
461 join
462 
463 Join all non empty arguments together; the first argument is the separator.
464 Usage:
465 {{#invoke:String|join|sep|one|two|three}}
466 ]]
467 function str.join(frame)
468 	local args = {}
469 	local sep
470 	for _, v in ipairs( frame.args ) do
471 		if sep then
472 			if v ~= '' then
473 				table.insert(args, v)
474 			end
475 		else
476 			sep = v
477 		end
478 	end
479 	return table.concat( args, sep or '' )
480 end
481 
482 --[[
483 Helper function that populates the argument list given that user may need to use a mix of
484 named and unnamed parameters.  This is relevant because named parameters are not
485 identical to unnamed parameters due to string trimming, and when dealing with strings
486 we sometimes want to either preserve or remove that whitespace depending on the application.
487 ]]
488 function str._getParameters( frame_args, arg_list )
489 	local new_args = {}
490 	local index = 1
491 	local value
492 
493 	for _, arg in ipairs( arg_list ) do
494 		value = frame_args[arg]
495 		if value == nil then
496 			value = frame_args[index]
497 			index = index + 1
498 		end
499 		new_args[arg] = value
500 	end
501 
502 	return new_args
503 end
504 
505 --[[
506 Helper function to handle error messages.
507 ]]
508 function str._error( error_str )
509 	local frame = mw.getCurrentFrame()
510 	local error_category = frame.args.error_category or 'Errors reported by Module String'
511 	local ignore_errors = frame.args.ignore_errors or false
512 	local no_category = frame.args.no_category or false
513 
514 	if str._getBoolean(ignore_errors) then
515 		return ''
516 	end
517 
518 	local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'
519 	if error_category ~= '' and not str._getBoolean( no_category ) then
520 		error_str = '[[Category:' .. error_category .. ']]' .. error_str
521 	end
522 
523 	return error_str
524 end
525 
526 --[[
527 Helper Function to interpret boolean strings
528 ]]
529 function str._getBoolean( boolean_str )
530 	local boolean_value
531 
532 	if type( boolean_str ) == 'string' then
533 		boolean_str = boolean_str:lower()
534 		if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0'
535 				or boolean_str == '' then
536 			boolean_value = false
537 		else
538 			boolean_value = true
539 		end
540 	elseif type( boolean_str ) == 'boolean' then
541 		boolean_value = boolean_str
542 	else
543 		error( 'No boolean value found' )
544 	end
545 	return boolean_value
546 end
547 
548 --[[
549 Helper function that escapes all pattern characters so that they will be treated
550 as plain text.
551 ]]
552 function str._escapePattern( pattern_str )
553 	return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
554 end
555 
556 return str