×
Create a new article
Write your page title here:
We currently have 10,675 articles on Polandball Wiki. Type your article name above or create one of the articles listed here!



    Polandball Wiki

    Documentation for this module may be created at Module:Text/doc

    local Text = { serial = "2017-11-01",
                   suite  = "Text" }
    --[=[
    Text utilities
    ]=]
    
    
    
    -- local globals
    local PatternCJK        = false
    local PatternCombined   = false
    local PatternLatin      = false
    local PatternTerminated = false
    local QuoteLang         = false
    local QuoteType         = false
    local RangesLatin       = false
    local SeekQuote         = false
    
    
    
    local function factoryQuote()
        -- Create quote definitions
        QuoteLang = { af        = "bd",
                      ar        = "la",
                      be        = "labd",
                      bg        = "bd",
                      ca        = "la",
                      cs        = "bd",
                      da        = "bd",
                      de        = "bd",
                      dsb       = "bd",
                      et        = "bd",
                      el        = "lald",
                      en        = "ld",
                      es        = "la",
                      eu        = "la",
                --    fa        = "la",
                      fi        = "rd",
                      fr        = "laSPC",
                      ga        = "ld",
                      he        = "ldla",
                      hr        = "bd",
                      hsb       = "bd",
                      hu        = "bd",
                      hy        = "labd",
                      id        = "rd",
                      is        = "bd",
                      it        = "ld",
                      ja        = "x300C",
                      ka        = "bd",
                      ko        = "ld",
                      lt        = "bd",
                      lv        = "bd",
                      nl        = "ld",
                      nn        = "la",
                      no        = "la",
                      pl        = "bdla",
                      pt        = "lald",
                      ro        = "bdla",
                      ru        = "labd",
                      sk        = "bd",
                      sl        = "bd",
                      sq        = "la",
                      sr        = "bx",
                      sv        = "rd",
                      th        = "ld",
                      tr        = "ld",
                      uk        = "la",
                      zh        = "ld",
                      ["de-ch"] = "la",
                      ["en-gb"] = "lsld",
                      ["en-us"] = "ld",
                      ["fr-ch"] = "la",
                      ["it-ch"] = "la",
                      ["pt-br"] = "ldla",
                      ["zh-tw"] = "x300C",
                      ["zh-cn"] = "ld" }
        QuoteType = { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                      bdla  = { { 8222, 8220 },  {  171,  187 } },
                      bx    = { { 8222, 8221 },  { 8218, 8217 } },
                      la    = { {  171,  187 },  { 8249, 8250 } },
                      laSPC = { {  171,  187 },  { 8249, 8250 },  true },
                      labd  = { {  171,  187 },  { 8222, 8220 } },
                      lald  = { {  171,  187 },  { 8220, 8221 } },
                      ld    = { { 8220, 8221 },  { 8216, 8217 } },
                      ldla  = { { 8220, 8221 },  {  171,  187 } },
                      lsld  = { { 8216, 8217 },  { 8220, 8221 } },
                      rd    = { { 8221, 8221 },  { 8217, 8217 } },
                      x300C = { { 0x300C, 0x300D },
                                { 0x300E, 0x300F } } }
        return r
    end -- factoryQuote()
    
    
    
    local function fiatQuote( apply, alien, advance )
        -- Quote text
        -- Parameter:
        --     apply    -- string, with text
        --     alien    -- string, with language code
        --     advance  -- number, with level 1 or 2
        local r = apply
        local suite
        if not QuoteLang then
            factoryQuote()
        end
        suite = QuoteLang[ alien ]
        if not suite then
            local slang = alien:match( "^(%l+)-" )
            if slang then
                suite = QuoteLang[ slang ]
            end
            if not suite then
                suite = QuoteLang[ "en" ]
            end
        end
        if suite then
            local quotes = QuoteType[ suite ]
            if quotes then
                local space
                if quotes[ 3 ] then
                    space = " "
                else
                    space = ""
                end
                quotes = quotes[ advance ]
                if quotes then
                    r = mw.ustring.format( "%s%s%s%s%s",
                                           mw.ustring.char( quotes[ 1 ] ),
                                           space,
                                           apply,
                                           space,
                                           mw.ustring.char( quotes[ 2 ] ) )
                end
            else
                mw.log( "fiatQuote() " .. suite )
            end
        end
        return r
    end -- fiatQuote()
    
    
    
    Text.char = function ( apply, again, accept )
        -- Create string from codepoints
        -- Parameter:
        --     apply   -- table (sequence) with numerical codepoints, or nil
        --     again   -- number of repetitions, or nil
        --     accept  -- true, if no error messages to be appended
        -- Returns: string
        local r
        if type( apply ) == "table" then
            local bad   = { }
            local codes = { }
            local s
            for k, v in pairs( apply ) do
                s = type( v )
                if s == "number" then
                    if v < 32  and  v ~= 9  and  v ~= 10 then
                        v = tostring( v )
                    else
                        v = math.floor( v )
                        s = false
                    end
                elseif s ~= "string" then
                    v = tostring( v )
                end
                if s then
                    table.insert( bad, v )
                else
                    table.insert( codes, v )
                end
            end -- for k, v
            if #bad == 0 then
                if #codes > 0 then
                    r = mw.ustring.char( unpack( codes ) )
                    if again then
                        if type( again ) == "number" then
                            local n = math.floor( again )
                            if n > 1 then
                                r = r:rep( n )
                            elseif n < 1 then
                                r = ""
                            end
                        else
                            s = "bad repetitions: " .. tostring( again )
                        end
                    end
                end
            else
                s = "bad codepoints: " .. table.concat( bad, " " )
            end
            if s  and  not accept then
                r = tostring(  mw.html.create( "span" )
                                      :addClass( "error" )
                                      :wikitext( s ) )
            end
        end
        return r or ""
    end -- Text.char()
    
    
    
    Text.concatParams = function ( args, apply, adapt )
        -- Concat list items into one string
        -- Parameter:
        --     args   -- table (sequence) with numKey=string
        --     apply  -- string (optional); separator (default: "|")
        --     adapt  -- string (optional); format including "%s"
        -- Returns: string
        local collect = { }
        args = type(args) == 'table' and args or {} -- ensure args is table
        for k, v in pairs( args ) do
            if type( k ) == "number" then
                v = mw.text.trim( v )
                if v ~= "" then
                    if adapt then
                        v = mw.ustring.format( adapt, v )
                    end
                    table.insert( collect, v )
                end
            end
        end -- for k, v
        return table.concat( collect,  apply or "|" )
    end -- Text.concatParams()
    
    
    
    Text.containsCJK = function ( analyse )
        -- Is any CJK code within?
        -- Parameter:
        --     analyse  -- string
        -- Returns: true, if CJK detected
        analyse = analyse or ""
        if not patternCJK then
            patternCJK = mw.ustring.char( 91,
            	                            4352, 45,   4607,
            	                           11904, 45,  42191,
            	                           43072, 45,  43135,
            	                           44032, 45,  55215,
            	                           63744, 45,  64255,
            	                           65072, 45,  65103,
            	                           65381, 45,  65500,
                                          131072, 45, 196607,
                                          93 )
        end
        if mw.ustring.find( analyse, patternCJK ) then
        	return true
        end
        return false
    end -- Text.containsCJK()
    
    Text.removeDelimited = function (s, prefix, suffix)
    	-- Remove all text in s delimited by prefix and suffix (inclusive)
    	-- Arguments:
    	--    s = string to process
    	--    prefix = initial delimiter
    	--    suffix = ending delimiter
    	-- Returns: stripped string
    	local prefixLen = mw.ustring.len(prefix)
    	local suffixLen = mw.ustring.len(suffix)
    	local i = s:find(prefix, 1, true)
    	local r = s
    	local j
    	while i do
    		j = r:find(suffix, i + prefixLen)
    		if j then
    			r = r:sub(1, i - 1)..r:sub(j+suffixLen)
    		else
    			r = r:sub(1, i - 1)
    		end
    		i = r:find(prefix, 1, true)
    	end
    	return r
    end
    
    Text.getPlain = function ( adjust )
        -- Remove wikisyntax from string, except templates
        -- Parameter:
        --     adjust  -- string
        -- Returns: string
        local r = Text.removeDelimited(adjust,"<!--","-->")
        r = r:gsub( "(</?%l[^>]*>)", "" )
             :gsub( "'''", "" )
             :gsub( "''", "" )
             :gsub( "&nbsp;", " " )
        return r
    end -- Text.getPlain()
    
    
    
    Text.isLatinRange = function ( adjust )
        -- Are characters expected to be latin or symbols within latin texts?
        -- Precondition:
        --     adjust  -- string, or nil for initialization
        -- Returns: true, if valid for latin only
        local r
        if not RangesLatin then
            RangesLatin = { {    7,  687 },
                            { 7531, 7578 },
                            { 7680, 7935 },
                            { 8194, 8250 } }
        end
        if not PatternLatin then
            local range
            PatternLatin = "^["
            for i = 1, #RangesLatin do
                range = RangesLatin[ i ]
                PatternLatin = PatternLatin ..
                               mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
            end    -- for i
            PatternLatin = PatternLatin .. "]*$"
        end
        if adjust then
            if mw.ustring.match( adjust, PatternLatin ) then
                r = true
            else
                r = false
            end
        end
        return r
    end -- Text.isLatinRange()
    
    
    
    Text.isQuote = function ( ask )
        -- Is this character any quotation mark?
        -- Parameter:
        --     ask  -- string, with single character
        -- Returns: true, if ask is quotation mark
        local r
        if not SeekQuote then
            SeekQuote = mw.ustring.char(   34,       -- "
                                           39,       -- '
                                          171,       -- laquo
                                          187,       -- raquo
                                         8216,       -- lsquo
                                         8217,       -- rsquo
                                         8218,       -- sbquo
                                         8220,       -- ldquo
                                         8221,       -- rdquo
                                         8222,       -- bdquo
                                         8249,       -- lsaquo
                                         8250,       -- rsaquo
                                         0x300C,     -- CJK
                                         0x300D,     -- CJK
                                         0x300E,     -- CJK
                                         0x300F )    -- CJK
        end
        if ask == "" then
            r = false
        elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
            r = true
        else
            r = false
        end
        return r
    end -- Text.isQuote()
    
    
    
    Text.listToText = function ( args, adapt )
        -- Format list items similar to mw.text.listToText()
        -- Parameter:
        --     args   -- table (sequence) with numKey=string
        --     adapt  -- string (optional); format including "%s"
        -- Returns: string
        local collect = { }
        for k, v in pairs( args ) do
            if type( k ) == "number" then
                v = mw.text.trim( v )
                if v ~= "" then
                    if adapt then
                        v = mw.ustring.format( adapt, v )
                    end
                    table.insert( collect, v )
                end
            end
        end -- for k, v
        return mw.text.listToText( collect )
    end -- Text.listToText()
    
    
    
    Text.quote = function ( apply, alien, advance )
        -- Quote text
        -- Parameter:
        --     apply    -- string, with text
        --     alien    -- string, with language code, or nil
        --     advance  -- number, with level 1 or 2, or nil
        -- Returns: quoted string
        local mode, slang
        if type( alien ) == "string" then
            slang = mw.text.trim( alien ):lower()
        else
            slang = mw.title.getCurrentTitle().pageLanguage
            if not slang then
                -- TODO FIXME: Introduction expected 2017-04
                slang = mw.language.getContentLanguage():getCode()
            end
        end
        if advance == 2 then
            mode = 2
        else
            mode = 1
        end
        return fiatQuote( mw.text.trim( apply ), slang, mode )
    end -- Text.quote()
    
    
    
    Text.quoteUnquoted = function ( apply, alien, advance )
        -- Quote text, if not yet quoted and not empty
        -- Parameter:
        --     apply    -- string, with text
        --     alien    -- string, with language code, or nil
        --     advance  -- number, with level 1 or 2, or nil
        -- Returns: string; possibly quoted
        local r = mw.text.trim( apply )
        local s = mw.ustring.sub( r, 1, 1 )
        if s ~= ""  and  not Text.isQuote( s, advance ) then
            s = mw.ustring.sub( r, -1, 1 )
            if not Text.isQuote( s ) then
                r = Text.quote( r, alien, advance )
            end
        end
        return r
    end -- Text.quoteUnquoted()
    
    
    
    Text.removeDiacritics = function ( adjust )
        -- Remove all diacritics
        -- Parameter:
        --     adjust  -- string
        -- Returns: string; all latin letters should be ASCII
        --                  or basic greek or cyrillic or symbols etc.
        local cleanup, decomposed
        if not PatternCombined then
            PatternCombined = mw.ustring.char( 91,
                                                0x0300, 45, 0x036F,
                                                0x1AB0, 45, 0x1AFF,
                                                0x1DC0, 45, 0x1DFF,
                                                0xFE20, 45, 0xFE2F,
                                               93 )
        end
        decomposed = mw.ustring.toNFD( adjust )
        cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
        return mw.ustring.toNFC( cleanup )
    end -- Text.removeDiacritics()
    
    
    
    Text.sentenceTerminated = function ( analyse )
        -- Is string terminated by dot, question or exclamation mark?
        --     Quotation, link termination and so on granted
        -- Parameter:
        --     analyse  -- string
        -- Returns: true, if sentence terminated
        local r
        if not PatternTerminated then
            PatternTerminated = mw.ustring.char( 91,
                                                 12290,
                                                 65281,
                                                 65294,
                                                 65311 )
                                .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
        end
        if mw.ustring.find( analyse, PatternTerminated ) then
            r = true
        else
            r = false
        end
        return r
    end -- Text.sentenceTerminated()
    
    
    
    Text.ucfirstAll = function ( adjust )
        -- Capitalize all words
        -- Precondition:
        --     adjust  -- string
        -- Returns: string with all first letters in upper case
        local r = " " .. adjust
        local i = 1
        local c, j, m
        if adjust:find( "&" ) then
            r = r:gsub( "&amp;",      "&#38;" )
                 :gsub( "&lt;",       "&#60;" )
                 :gsub( "&gt;",       "&#62;" )
                 :gsub( "&nbsp;",    "&#160;" )
                 :gsub( "&thinsp;", "&#8201;" )
                 :gsub( "&zwnj;",   "&#8204;" )
                 :gsub( "&zwj;",    "&#8205;" )
                 :gsub( "&lrm;",    "&#8206;" )
                 :gsub( "&rlm;",    "&#8207;" )
            m = true
        end
        while i do
            i = mw.ustring.find( r, "%W%l", i )
            if i then
                j = i + 1
                c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
                r = string.format( "%s%s%s",
                                   mw.ustring.sub( r, 1, i ),
                                   c,
                                   mw.ustring.sub( r, i + 2 ) )
                i = j
            end
        end -- while i
        r = r:sub( 2 )
        if m then
            r = r:gsub(     "&#38;", "&amp;" )
                 :gsub(     "&#60;", "&lt;" )
                 :gsub(     "&#62;", "&gt;" )
                 :gsub(    "&#160;", "&nbsp;" )
                 :gsub(   "&#8201;", "&thinsp;" )
                 :gsub(   "&#8204;", "&zwnj;" )
                 :gsub(   "&#8205;", "&zwj;" )
                 :gsub(   "&#8206;", "&lrm;" )
                 :gsub(   "&#8207;", "&rlm;" )
                 :gsub( "&#X(%x+);", "&#x%1;" )
        end
        return r
    end -- Text.ucfirstAll()
    
    
    
    Text.uprightNonlatin = function ( adjust )
        -- Ensure non-italics for non-latin text parts
        --     One single greek letter might be granted
        -- Precondition:
        --     adjust  -- string
        -- Returns: string with non-latin parts enclosed in <span>
        local r
        Text.isLatinRange()
        if mw.ustring.match( adjust, PatternLatin ) then
            -- latin only, horizontal dashes, quotes
            r = adjust
        else
            local c
            local j    = false
            local k    = 1
            local m    = false
            local n    = mw.ustring.len( adjust )
            local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
            local flat = function ( a )
                      -- isLatin
                      local range
                      for i = 1, #RangesLatin do
                          range = RangesLatin[ i ]
                          if a >= range[ 1 ]  and  a <= range[ 2 ] then
                              return true
                          end
                      end    -- for i
                  end -- flat()
            local focus = function ( a )
                      -- char is not ambivalent
                      local r = ( a > 64 )
                      if r then
                          r = ( a < 8192  or  a > 8212 )
                      else
                          r = ( a == 38  or  a == 60 )    -- '&' '<'
                      end
                      return r
                  end -- focus()
            local form = function ( a )
                    return string.format( span,
                                          r,
                                          mw.ustring.sub( adjust, k, j - 1 ),
                                          mw.ustring.sub( adjust, j, a ) )
                  end -- form()
            r = ""
            for i = 1, n do
                c = mw.ustring.codepoint( adjust, i, i )
                if focus( c ) then
                    if flat( c ) then
                        if j then
                            if m then
                                if i == m then
                                    -- single greek letter.
                                    j = false
                                end
                                m = false
                            end
                            if j then
                                local nx = i - 1
                                local s  = ""
                                for ix = nx, 1, -1 do
                                    c = mw.ustring.sub( adjust, ix, ix )
                                    if c == " "  or  c == "(" then
                                        nx = nx - 1
                                        s  = c .. s
                                    else
                                        break -- for ix
                                    end
                                end -- for ix
                                r = form( nx ) .. s
                                j = false
                                k = i
                            end
                        end
                    elseif not j then
                        j = i
                        if c >= 880  and  c <= 1023 then
                            -- single greek letter?
                            m = i + 1
                        else
                            m = false
                        end
                    end
                elseif m then
                    m = m + 1
                end
            end    -- for i
            if j  and  ( not m  or  m < n ) then
                r = form( n )
            else
                r = r .. mw.ustring.sub( adjust, k )
            end
        end
        return r
    end -- Text.uprightNonlatin()
    
    
    
    Text.test = function ( about )
        local r
        if about == "quote" then
            factoryQuote()
            r = { }
            r.QuoteLang = QuoteLang
            r.QuoteType = QuoteType
        end
        return r
    end -- Text.test()
    
    
    
    -- Export
    local p = { }
    
    function p.char( frame )
        local params = frame:getParent().args
        local story = params[ 1 ]
        local codes, lenient, multiple
        if not story then
            params = frame.args
            story  = params[ 1 ]
        end
        if story then
            local items = mw.text.split( story, "%s+" )
            if #items > 0 then
                local j
                lenient  = ( params.errors == "0" )
                codes    = { }
                multiple = tonumber( params[ "*" ] )
                for k, v in pairs( items ) do
                    if v:sub( 1, 1 ) == "x" then
                        j = tonumber( "0" .. v )
                    elseif v == "" then
                        v = false
                    else
                        j = tonumber( v )
                    end
                    if v then
                        table.insert( codes,  j or v )
                    end
                end -- for k, v
            end
        end
        return Text.char( codes, multiple, lenient )
    end
    
    function p.concatParams( frame )
        local args
        local template = frame.args.template
        if type( template ) == "string" then
            template = mw.text.trim( template )
            template = ( template == "1" )
        end
        if template then
            args = frame:getParent().args
        else
            args = frame.args
        end
        return Text.concatParams( args,
                                  frame.args.separator,
                                  frame.args.format )
    end
    
    function p.containsCJK( frame )
        return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
    end
    
    function p.getPlain( frame )
        return Text.getPlain( frame.args[ 1 ] or "" )
    end
    
    function p.isLatinRange( frame )
        return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
    end
    
    function p.isQuote( frame )
        return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
    end
    
    
    
    function p.listToFormat(frame)
        local lists = {}
        local pformat = frame.args["format"]
        local sep = frame.args["sep"] or ";"
    
        -- Parameter parsen: Listen
        for k, v in pairs(frame.args) do
            local knum = tonumber(k)
            if knum then lists[knum] = v end
        end
    
        -- Listen splitten
        local maxListLen = 0
        for i = 1, #lists do
            lists[i] = mw.text.split(lists[i], sep)
            if #lists[i] > maxListLen then maxListLen = #lists[i] end
        end
    
        -- Ergebnisstring generieren
        local result = ""
        local result_line = ""
        for i = 1, maxListLen do
            result_line = pformat
            for j = 1, #lists do
                result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
            end
            result = result .. result_line
        end
    
        return result
    end
    
    
    
    function p.listToText( frame )
        local args
        local template = frame.args.template
        if type( template ) == "string" then
            template = mw.text.trim( template )
            template = ( template == "1" )
        end
        if template then
            args = frame:getParent().args
        else
            args = frame.args
        end
        return Text.listToText( args, frame.args.format )
    end
    
    
    
    function p.quote( frame )
        local slang = frame.args[2]
        if type( slang ) == "string" then
            slang = mw.text.trim( slang )
            if slang == "" then
                slang = false
            end
        end
        return Text.quote( frame.args[ 1 ] or "",
                           slang,
                           tonumber( frame.args[3] ) )
    end
    
    
    
    function p.quoteUnquoted( frame )
        local slang = frame.args[2]
        if type( slang ) == "string" then
            slang = mw.text.trim( slang )
            if slang == "" then
                slang = false
            end
        end
        return Text.quoteUnquoted( frame.args[ 1 ] or "",
                                   slang,
                                   tonumber( frame.args[3] ) )
    end
    
    
    
    function p.removeDiacritics( frame )
        return Text.removeDiacritics( frame.args[ 1 ] or "" )
    end
    
    function p.sentenceTerminated( frame )
        return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
    end
    
    function p.ucfirstAll( frame )
        return Text.ucfirstAll( frame.args[ 1 ] or "" )
    end
    
    function p.uprightNonlatin( frame )
        return Text.uprightNonlatin( frame.args[ 1 ] or "" )
    end
    
    
    
    function p.zip(frame)
        local lists = {}
        local seps = {}
        local defaultsep = frame.args["sep"] or ""
        local innersep = frame.args["isep"] or ""
        local outersep = frame.args["osep"] or ""
    
        -- Parameter parsen
        for k, v in pairs(frame.args) do
            local knum = tonumber(k)
            if knum then lists[knum] = v else
                if string.sub(k, 1, 3) == "sep" then
                    local sepnum = tonumber(string.sub(k, 4))
                    if sepnum then seps[sepnum] = v end
                end
            end
        end
        -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
        for i = 1, math.max(#seps, #lists) do
            if not seps[i] then seps[i] = defaultsep end
        end
    
        -- Listen splitten
        local maxListLen = 0
        for i = 1, #lists do
            lists[i] = mw.text.split(lists[i], seps[i])
            if #lists[i] > maxListLen then maxListLen = #lists[i] end
        end
    
        local result = ""
        for i = 1, maxListLen do
            if i ~= 1 then result = result .. outersep end
            for j = 1, #lists do
                if j ~= 1 then result = result .. innersep end
                result = result .. (lists[j][i] or "")
            end
        end
        return result
    end
    
    
    
    function p.failsafe()
        return Text.serial
    end
    
    
    
    p.Text = function ()
        return Text
    end -- p.Text
    
    return p
    
    Cookies help us deliver our services. By using our services, you agree to our use of cookies.
    Cookies help us deliver our services. By using our services, you agree to our use of cookies.