Module:String: Difference between revisions
Jump to navigation
Jump to search
(Created page with "--[[ This module is intended to provide access to basic string functions. Most of the functions provided here can be invoked with named parameters, unnamed parameters, or a mixture. If named parameters are used, Mediawiki will automatically remove any leading or trailing whitespace from the parameter. Depending on the intended use, it may be advantageous to either preserve or remove such whitespace. Global options ignore_errors: If set to 'true' or 1, any error c...") |
m (1 revision imported) |
(No difference)
|
Latest revision as of 23:03, 4 September 2022
Documentation for this module may be created at Module:String/doc
1 --[[
2
3 This module is intended to provide access to basic string functions.
4
5 Most of the functions provided here can be invoked with named parameters,
6 unnamed parameters, or a mixture. If named parameters are used, Mediawiki will
7 automatically remove any leading or trailing whitespace from the parameter.
8 Depending on the intended use, it may be advantageous to either preserve or
9 remove such whitespace.
10
11 Global options
12 ignore_errors: If set to 'true' or 1, any error condition will result in
13 an empty string being returned rather than an error message.
14
15 error_category: If an error occurs, specifies the name of a category to
16 include with the error message. The default category is
17 [Category:Errors reported by Module String].
18
19 no_category: If set to 'true' or 1, no category will be added if an error
20 is generated.
21
22 Unit tests for this module are available at Module:String/tests.
23 ]]
24
25 local str = {}
26
27 --[[
28 len
29
30 This function returns the length of the target string.
31
32 Usage:
33 {{#invoke:String|len|target_string|}}
34 OR
35 {{#invoke:String|len|s=target_string}}
36
37 Parameters
38 s: The string whose length to report
39
40 If invoked using named parameters, Mediawiki will automatically remove any leading or
41 trailing whitespace from the target string.
42 ]]
43 function str.len( frame )
44 local new_args = str._getParameters( frame.args, {'s'} );
45 local s = new_args['s'] or '';
46 return mw.ustring.len( s )
47 end
48
49 --[[
50 sub
51
52 This function returns a substring of the target string at specified indices.
53
54 Usage:
55 {{#invoke:String|sub|target_string|start_index|end_index}}
56 OR
57 {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}}
58
59 Parameters
60 s: The string to return a subset of
61 i: The fist index of the substring to return, defaults to 1.
62 j: The last index of the string to return, defaults to the last character.
63
64 The first character of the string is assigned an index of 1. If either i or j
65 is a negative value, it is interpreted the same as selecting a character by
66 counting from the end of the string. Hence, a value of -1 is the same as
67 selecting the last character of the string.
68
69 If the requested indices are out of range for the given string, an error is
70 reported.
71 ]]
72 function str.sub( frame )
73 local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } );
74 local s = new_args['s'] or '';
75 local i = tonumber( new_args['i'] ) or 1;
76 local j = tonumber( new_args['j'] ) or -1;
77
78 local len = mw.ustring.len( s );
79
80 -- Convert negatives for range checking
81 if i < 0 then
82 i = len + i + 1;
83 end
84 if j < 0 then
85 j = len + j + 1;
86 end
87
88 if i > len or j > len or i < 1 or j < 1 then
89 return str._error( 'String subset index out of range' );
90 end
91 if j < i then
92 return str._error( 'String subset indices out of order' );
93 end
94
95 return mw.ustring.sub( s, i, j )
96 end
97
98 --[[
99 This function implements that features of {{str sub old}} and is kept in order
100 to maintain these older templates.
101 ]]
102 function str.sublength( frame )
103 local i = tonumber( frame.args.i ) or 0
104 local len = tonumber( frame.args.len )
105 return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) )
106 end
107
108 --[[
109 match
110
111 This function returns a substring from the source string that matches a
112 specified pattern.
113
114 Usage:
115 {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch}}
116 OR
117 {{#invoke:String|pos|s=source_string|pattern=pattern_string|start=start_index
118 |match=match_number|plain=plain_flag|nomatch=nomatch_output}}
119
120 Parameters
121 s: The string to search
122 pattern: The pattern or string to find within the string
123 start: The index within the source string to start the search. The first
124 character of the string has index 1. Defaults to 1.
125 match: In some cases it may be possible to make multiple matches on a single
126 string. This specifies which match to return, where the first match is
127 match= 1. If a negative number is specified then a match is returned
128 counting from the last match. Hence match = -1 is the same as requesting
129 the last match. Defaults to 1.
130 plain: A flag indicating that the pattern should be understood as plain
131 text. Defaults to false.
132 nomatch: If no match is found, output the "nomatch" value rather than an error.
133
134 If invoked using named parameters, Mediawiki will automatically remove any leading or
135 trailing whitespace from each string. In some circumstances this is desirable, in
136 other cases one may want to preserve the whitespace.
137
138 If the match_number or start_index are out of range for the string being queried, then
139 this function generates an error. An error is also generated if no match is found.
140 If one adds the parameter ignore_errors=true, then the error will be suppressed and
141 an empty string will be returned on any failure.
142
143 For information on constructing Lua patterns, a form of [regular expression], see:
144
145 * https://www.lua.org/manual/5.1/manual.html#5.4.1
146 * https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
147 * https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
148
149 ]]
150 -- This sub-routine is exported for use in other modules
151 function str._match( s, pattern, start, match_index, plain, nomatch )
152 if s == '' then
153 return str._error( 'Target string is empty' );
154 end
155 if pattern == '' then
156 return str._error( 'Pattern string is empty' );
157 end
158 start = tonumber(start) or 1
159 if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then
160 return str._error( 'Requested start is out of range' );
161 end
162 if match_index == 0 then
163 return str._error( 'Match index is out of range' );
164 end
165 if plain_flag then
166 pattern = str._escapePattern( pattern );
167 end
168
169 local result
170 if match_index == 1 then
171 -- Find first match is simple case
172 result = mw.ustring.match( s, pattern, start )
173 else
174 if start > 1 then
175 s = mw.ustring.sub( s, start );
176 end
177
178 local iterator = mw.ustring.gmatch(s, pattern);
179 if match_index > 0 then
180 -- Forward search
181 for w in iterator do
182 match_index = match_index - 1;
183 if match_index == 0 then
184 result = w;
185 break;
186 end
187 end
188 else
189 -- Reverse search
190 local result_table = {};
191 local count = 1;
192 for w in iterator do
193 result_table[count] = w;
194 count = count + 1;
195 end
196
197 result = result_table[ count + match_index ];
198 end
199 end
200
201 if result == nil then
202 if nomatch == nil then
203 return str._error( 'Match not found' );
204 else
205 return nomatch;
206 end
207 else
208 return result;
209 end
210 end
211 -- This is the entry point for #invoke:String|match
212 function str.match( frame )
213 local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} );
214 local s = new_args['s'] or '';
215 local start = tonumber( new_args['start'] ) or 1;
216 local plain_flag = str._getBoolean( new_args['plain'] or false );
217 local pattern = new_args['pattern'] or '';
218 local match_index = math.floor( tonumber(new_args['match']) or 1 );
219 local nomatch = new_args['nomatch'];
220
221 return str._match( s, pattern, start, match_index, plain, nomatch )
222 end
223
224 --[[
225 pos
226
227 This function returns a single character from the target string at position pos.
228
229 Usage:
230 {{#invoke:String|pos|target_string|index_value}}
231 OR
232 {{#invoke:String|pos|target=target_string|pos=index_value}}
233
234 Parameters
235 target: The string to search
236 pos: The index for the character to return
237
238 If invoked using named parameters, Mediawiki will automatically remove any leading or
239 trailing whitespace from the target string. In some circumstances this is desirable, in
240 other cases one may want to preserve the whitespace.
241
242 The first character has an index value of 1.
243
244 If one requests a negative value, this function will select a character by counting backwards
245 from the end of the string. In other words pos = -1 is the same as asking for the last character.
246
247 A requested value of zero, or a value greater than the length of the string returns an error.
248 ]]
249 function str.pos( frame )
250 local new_args = str._getParameters( frame.args, {'target', 'pos'} );
251 local target_str = new_args['target'] or '';
252 local pos = tonumber( new_args['pos'] ) or 0;
253
254 if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then
255 return str._error( 'String index out of range' );
256 end
257
258 return mw.ustring.sub( target_str, pos, pos );
259 end
260
261 --[[
262 str_find
263
264 This function duplicates the behavior of {{str_find}}, including all of its quirks.
265 This is provided in order to support existing templates, but is NOT RECOMMENDED for
266 new code and templates. New code is recommended to use the "find" function instead.
267
268 Returns the first index in "source" that is a match to "target". Indexing is 1-based,
269 and the function returns -1 if the "target" string is not present in "source".
270
271 Important Note: If the "target" string is empty / missing, this function returns a
272 value of "1", which is generally unexpected behavior, and must be accounted for
273 separatetly.
274 ]]
275 function str.str_find( frame )
276 local new_args = str._getParameters( frame.args, {'source', 'target'} );
277 local source_str = new_args['source'] or '';
278 local target_str = new_args['target'] or '';
279
280 if target_str == '' then
281 return 1;
282 end
283
284 local start = mw.ustring.find( source_str, target_str, 1, true )
285 if start == nil then
286 start = -1
287 end
288
289 return start
290 end
291
292 --[[
293 find
294
295 This function allows one to search for a target string or pattern within another
296 string.
297
298 Usage:
299 {{#invoke:String|find|source_str|target_string|start_index|plain_flag}}
300 OR
301 {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}}
302
303 Parameters
304 source: The string to search
305 target: The string or pattern to find within source
306 start: The index within the source string to start the search, defaults to 1
307 plain: Boolean flag indicating that target should be understood as plain
308 text and not as a Lua style regular expression, defaults to true
309
310 If invoked using named parameters, Mediawiki will automatically remove any leading or
311 trailing whitespace from the parameter. In some circumstances this is desirable, in
312 other cases one may want to preserve the whitespace.
313
314 This function returns the first index >= "start" where "target" can be found
315 within "source". Indices are 1-based. If "target" is not found, then this
316 function returns 0. If either "source" or "target" are missing / empty, this
317 function also returns 0.
318
319 This function should be safe for UTF-8 strings.
320 ]]
321 function str.find( frame )
322 local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } );
323 local source_str = new_args['source'] or '';
324 local pattern = new_args['target'] or '';
325 local start_pos = tonumber(new_args['start']) or 1;
326 local plain = new_args['plain'] or true;
327
328 if source_str == '' or pattern == '' then
329 return 0;
330 end
331
332 plain = str._getBoolean( plain );
333
334 local start = mw.ustring.find( source_str, pattern, start_pos, plain )
335 if start == nil then
336 start = 0
337 end
338
339 return start
340 end
341
342 --[[
343 replace
344
345 This function allows one to replace a target string or pattern within another
346 string.
347
348 Usage:
349 {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}}
350 OR
351 {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string|
352 count=replacement_count|plain=plain_flag}}
353
354 Parameters
355 source: The string to search
356 pattern: The string or pattern to find within source
357 replace: The replacement text
358 count: The number of occurences to replace, defaults to all.
359 plain: Boolean flag indicating that pattern should be understood as plain
360 text and not as a Lua style regular expression, defaults to true
361 ]]
362 function str.replace( frame )
363 local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } );
364 local source_str = new_args['source'] or '';
365 local pattern = new_args['pattern'] or '';
366 local replace = new_args['replace'] or '';
367 local count = tonumber( new_args['count'] );
368 local plain = new_args['plain'] or true;
369
370 if source_str == '' or pattern == '' then
371 return source_str;
372 end
373 plain = str._getBoolean( plain );
374
375 if plain then
376 pattern = str._escapePattern( pattern );
377 replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences.
378 end
379
380 local result;
381
382 if count ~= nil then
383 result = mw.ustring.gsub( source_str, pattern, replace, count );
384 else
385 result = mw.ustring.gsub( source_str, pattern, replace );
386 end
387
388 return result;
389 end
390
391 --[[
392 simple function to pipe string.rep to templates.
393 ]]
394
395 function str.rep( frame )
396 local repetitions = tonumber( frame.args[2] )
397 if not repetitions then
398 return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' )
399 end
400 return string.rep( frame.args[1] or '', repetitions )
401 end
402
403 --[[
404 escapePattern
405
406 This function escapes special characters from a Lua string pattern. See [1]
407 for details on how patterns work.
408
409 [1] https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
410
411 Usage:
412 {{#invoke:String|escapePattern|pattern_string}}
413
414 Parameters
415 pattern_string: The pattern string to escape.
416 ]]
417 function str.escapePattern( frame )
418 local pattern_str = frame.args[1]
419 if not pattern_str then
420 return str._error( 'No pattern string specified' );
421 end
422 local result = str._escapePattern( pattern_str )
423 return result
424 end
425
426 --[[
427 Helper function that populates the argument list given that user may need to use a mix of
428 named and unnamed parameters. This is relevant because named parameters are not
429 identical to unnamed parameters due to string trimming, and when dealing with strings
430 we sometimes want to either preserve or remove that whitespace depending on the application.
431 ]]
432 function str._getParameters( frame_args, arg_list )
433 local new_args = {};
434 local index = 1;
435 local value;
436
437 for i,arg in ipairs( arg_list ) do
438 value = frame_args[arg]
439 if value == nil then
440 value = frame_args[index];
441 index = index + 1;
442 end
443 new_args[arg] = value;
444 end
445
446 return new_args;
447 end
448
449 --[[
450 Helper function to handle error messages.
451 ]]
452 function str._error( error_str )
453 local frame = mw.getCurrentFrame();
454 local error_category = frame.args.error_category or 'Errors reported by Module String';
455 local ignore_errors = frame.args.ignore_errors or false;
456 local no_category = frame.args.no_category or false;
457
458 if str._getBoolean(ignore_errors) then
459 return '';
460 end
461
462 local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>';
463 if error_category ~= '' and not str._getBoolean( no_category ) then
464 error_str = '[[Category:' .. error_category .. ']]' .. error_str;
465 end
466
467 return error_str;
468 end
469
470 --[[
471 Helper Function to interpret boolean strings
472 ]]
473 function str._getBoolean( boolean_str )
474 local boolean_value;
475
476 if type( boolean_str ) == 'string' then
477 boolean_str = boolean_str:lower();
478 if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0'
479 or boolean_str == '' then
480 boolean_value = false;
481 else
482 boolean_value = true;
483 end
484 elseif type( boolean_str ) == 'boolean' then
485 boolean_value = boolean_str;
486 else
487 error( 'No boolean value found' );
488 end
489 return boolean_value
490 end
491
492 --[[
493 Helper function that escapes all pattern characters so that they will be treated
494 as plain text.
495 ]]
496 function str._escapePattern( pattern_str )
497 return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" );
498 end
499
500 return str