Module:Str find word

--- STABLE: 16-11-2021 20:30 --- done 25-10-2022: cade=default false, A=/=a. (was:true A=a) -- todo: solve escape-comma (& get rid of ""-quote setting; just regular) -- todo: sep = len 1 max?; return string has comma-space not just comma -- ? not %escape then? -- todo literal qupotes = default? case=false =default (speed) --option keep order -- option loop list -- option merge aliases

require('strict') local p = {} local getArgs = require('Module:Arguments').getArgs local str = require('Module:String') local yesno = require('Module:Yesno') local defaultSep = ',' local iMaxWords = 16 local warningIMaxWordsReached = nil local xpLitWordCount = 0 local report -- to be initinated when explain needed

-- Initialise the /report subpage. -- only invoked when 'explain' asked local function initReport report = require('Module:Str find word/report') end

-- Turn "&#x0041;" into "A" etc. asap -- and reduce multi-spaces (including nbsp etc.) into single space local function decodeUnicode(str) return mw.ustring.gsub(mw.text.decode(str), '%s+', ' ') end

-- %-Escape any word (character string) before feeding it into a string pattern function -- all punctuation (%p) will be %-escaped local function escape_word(word) return str._escapePattern(word) end

-- Reads and parses a word list and returns a table with words (simple array) -- words list can be: source, andwords-to-check, orwords-to-check -- step 1: when case-insensitive, turn string into lowercase -- step 2: read & remove Literals ("..") -- step 3: read comma-separated words -- step 4: when booleans=T, change boolean words into true/false (module:yesno rules) --		all words returned are trimmed, TODO and all ws into single-plainspace? --		only T/F words are edited, other words remain, untouched -- return the table (a straight array) local function buildWordTable(tArgs, sWordlist) local wordTable = {} local hitWord	= '' local hitCount	= 0 if sWordlist == '' then return wordTable end

-- Step 1: case-sensitive if yesno(tArgs.case, true) == false then sWordlist = string.lower(sWordlist) end

-- Step 2: read "literals", -- then remove them from the string: -- replaced by single comma; idle & keeps word separation --- if yesno(tArgs.literals, false) then if false then local _, sCount _, sCount = mw.ustring.gsub(sWordlist, '"', )		if sCount > 1 then			local litWord = 			local i, j

while sCount > 1 do -- could do here: only when even? i = string.find(sWordlist, '%"', 1, false)				j = string.find(sWordlist, '%"', i+1, false) litWord = mw.text.trim(string.sub(sWordlist, i+1, j-1)) if #litWord > 0 then -- not an empty string or spaces only xpLitWordCount = xpLitWordCount + 1 table.insert(wordTable, litWord) end -- remove from source, and do next gsub search: sWordlist = string.gsub(sWordlist, '%"%s*'												.. escape_word(litWord) 												.. '%s*%"', ',') _, sCount = mw.ustring.gsub(sWordlist, '"', )			end		end	end	-- Step 3: parse comma-delimited words	hitCount = 0	sWordlist = tArgs.sep .. sWordlist .. tArgs.sep	local eSep	eSep = escape_word(tArgs.sep)	local patstring = '%f[^' .. eSep .. '][^' .. eSep .. ']+%f[' .. eSep .. ']'	if yesno(tArgs.explain, true) then		report.xpMessage('1.eSep: ' .. eSep) -- dev		report.xpMessage('2.pattern: ' .. patstring) -- dev	end 	while hitCount <= iMaxWords do		hitCount = hitCount + 1		hitWord = str._match(sWordlist, patstring, 1, hitCount, false, tArgs.sep)		hitWord = mw.text.trim(hitWord)		if hitWord == tArgs.sep then			-- no more words found in the string			break		elseif hitWord ~=  then			table.insert(wordTable, hitWord)		end 	end 	if hitCount > iMaxWords then 	 	warningIMaxWordsReached = 'Max number of words (' .. tostring(iMaxWords) .. ') reached. Extra words are ignored.'	 								.. ' (' .. mw.ustring.sub(mw.text.trim(sWordlist), 1, 90) .. ' ...). ' 	end

-- Step 4: when read booleans, converse words to true/false -- todo: check parameter here not elsewhere if tArgs.booleans then -- TODO if Yesno(tArgs.booleans) ... 		local sBool for i, v in ipairs(wordTable) do			sBool = yesno(v) if sBool ~= nil then wordTable[i] = tostring(sBool) end end end

return wordTable end

-- Check whether a single word is in a table (a simple array of words) -- returns hitword or nil local function findWordInTable(sourceWordTable, word) local bHit = false for i, v in ipairs(sourceWordTable) do		if v == word then bHit = true break end end if bHit then return word else return nil end end

-- AND-logic with andWordTable words: ALL words must be found -- returns {T/F, hittable} --		T when *all* AND words are found --		hittable with all hit words -- note 1: when F, the hittable still contains the words that were found -- note 2: empty AND-wordlist => True by logic (because: not falsified) local function checkANDwords(sourceWordTable, andWordTable) local result1 local bAND local tHits

bAND = true tHits = {} result1 = nil if #andWordTable > 0 then for i, word in ipairs(andWordTable) do			result1 = findWordInTable(sourceWordTable, word) or nil if result1 == nil then bAND = false -- Falsified! -- could break after this logically but -- continue to complete the table (bAND remains false) else table.insert(tHits, result1) end end else bAND = true end return bAND, tHits end

-- OR-logic with orWordTable words: at least one word must be found -- returns {T/F, hittable} --		True when at least one OR word is found --		hittable has all hit words -- note 1: empty OR-wordlist => True by logic (because: not falsified) -- note 2: while just one hitword is a True result, the hittable contains all words found local function checkORwords(sourceWordTable, orWordTable) local result1 local bOR local tHits

bOR = false tHits = {} result1 = nil if #orWordTable > 0 then for i, word in ipairs(orWordTable) do			result1 = findWordInTable(sourceWordTable, word) or nil if result1 == nil then -- this one is false; bOR unchanged; do next else bOR = true -- Confirmed! table.insert(tHits, result1) -- could break here logically, but complete the check end end else bOR = true end

return bOR, tHits end

-- Determine the requested return value (string). -- sYeslist is the _main return value (logically defined value) -- this function applies tArgs.yes / tArgs.no return value -- note: yes='' implies: blank return value -- note: no parameter yes= (that is, yes=nil) implies: by default, return the sYeslist local function yesnoReturnstring(tArgs, sYeslist) if sYeslist == '' then -- False return tArgs.no or '' else -- True if tArgs.yes == nil then return sYeslist else -- some |yes= value is entered, could be '' return tArgs.yes end end end

local function isPreview local ifPreview = require('Module:If preview') return not (ifPreview._warning( {'is_preview'} ) == '') end

-- Explain options (=report info), interprets parameter explain= -- returns true/false/'testcases' -- explain=true => show report in Preview -- explain=testcases => WHEN in ns: template: or user: AND subpage = '/testcases' THEN show permanently local function checkExplain(tArgs) if yesno(tArgs.explain, true) then if yesno(tArgs.explain, false) == true then -- explicit True so preview show if isPreview == true then return true end elseif tArgs.explain == 'testcases' then local titleObj = mw.title.getCurrentTitle if titleObj:inNamespaces('template', 'user') and titleObj.subpageText == 'testcases' and titleObj.isSubpage then return 'testcases' end end end return false end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== -- _main function: check for presence of words in source string -- Checks and returns: --		when T: the string of all hitwords (default), or the |yes=... input --		when F: empty string '' (default), or the |no=... input -- steps: -- 1. input word strings are prepared (parsed into an array of words) -- 2. words checks are made (applying AND-logic, OR-logic) -- 3. final conclusion drawn (T/F) -- 4. optionally, the preview report is prepared (debug, feedback) -- 5. based on T or F status, the return value (string) is established and returned -- note 1: each return value (yes=.., no=..) can be '' (nulstring) function p._main(tArgs) local sourceWordTable	= {} local andWordTable		= {} local orWordTable		= {} local tANDhits local tORhits -- logical finding: local bANDresult	= false local bORresult 	= false local resultALL 	= false local sYeslist		= ''

sourceWordTable	= buildWordTable(tArgs, tArgs.source) andWordTable	= buildWordTable(tArgs, tArgs.andString) orWordTable		= buildWordTable(tArgs, tArgs.orString)

if (#sourceWordTable == 0) or (#andWordTable + #orWordTable == 0) then -- No words to check resultALL = false if yesno(tArgs.explain, true) then report.xpNoWords(tArgs, sourceWordTable, andWordTable, orWordTable) end else bANDresult, tANDhits	= checkANDwords(sourceWordTable, andWordTable) bORresult, tORhits		= checkORwords(sourceWordTable, orWordTable) resultALL = (bANDresult) and (bORresult) end

sYeslist = '' if resultALL then -- concat the sYeslist (= all hit words; from 2 tables) if bANDresult then sYeslist = sYeslist .. table.concat(tANDhits, tArgs.sep) end

if #tORhits > 0 then if #tANDhits > 0 then sYeslist = sYeslist .. tArgs.sep end sYeslist = sYeslist .. table.concat(tORhits, tArgs.sep) end end if yesno(tArgs.explain, true) then if tArgs.yes ~= nil then if (tArgs.yes == ) and (tArgs.no == ) then report.xpYesNoBothBlank end end if warningIMaxWordsReached ~= nil then report.xpMessage(warningIMaxWordsReached) end report.xpBuildReport(tArgs, sourceWordTable, 						bANDresult, andWordTable, tANDhits,						bORresult, orWordTable, tORhits,						sYeslist, xpLitWordCount) end return yesnoReturnstring(tArgs, sYeslist) end

-- set wordt separator local function setSep(sSep) if sSep == nil then return defaultSep end local msg = '' -- todo what with | local newSep = defaultSep

newSep = sSep sSep = decodeUnicode(sSep) if string.match(sSep, '[%s%w%d]') ~= nil then -- not ok msg = 'Irregular characters in sep: ' .. sSep newSep = defaultSep end newSep = string.sub(sSep, 1, 1) if newSep == '' then --- ??? newSep = defaultSep end return newSep end

local function concatAndLists(s1, s2, newSep) local tLists = {} -- working table: both s1 and s2 to concat table.insert(tLists, s1) table.insert(tLists, s2) return table.concat(tLists, newSep) end

local function parseArgs(origArgs) local newArgs = {} newArgs['sep']		= setSep(origArgs['sep']) -- do first, needed below newArgs['source']	= decodeUnicode(origArgs['s'] or origArgs['source'] or '') newArgs['andString'] = decodeUnicode(concatAndLists( origArgs['w'] or origArgs['word'] or nil, origArgs['andw'] or origArgs['andwords'] or nil, newArgs.sSep)									) newArgs['orString']	= decodeUnicode(origArgs['orw'] or origArgs['orwords'] or '') -- boolean options: catch both parameters, also handle nil & nonsense input values: newArgs['case']		= yesno(origArgs['case'] or origArgs['casesensitive'] or false, false) -- defaults to False newArgs['booleans']	= yesno(origArgs['bool'] or origArgs['booleans'] or false, false) -- defaults to False newArgs['literals']	= yesno(origArgs['literals'] or origArgs['lit'] or true, true) -- defaults to True newArgs['yes']		= origArgs['yes'] or nil -- nil; default so return sYeslist; keep '' as legal input & return value newArgs['no']		= origArgs['no'] or '' newArgs['explain']	= origArgs['explain'] or false

newArgs.explain = checkExplain(newArgs) return newArgs end

function p.main(frame) local origArgs = getArgs(frame) local sReturn = '' local tArgs = {}

tArgs = parseArgs(origArgs) if yesno(tArgs.explain, true) then initReport report.xpListArguments(origArgs) end

sReturn = p._main(tArgs) if warningIMaxWordsReached ~=nil then local preview = require('Module:If preview') sReturn = sReturn .. preview._warning({warningIMaxWordsReached}) end

if yesno(tArgs.explain, true) then return sReturn .. report.xpPresent(tArgs.explain) else return sReturn end end

return p