Modul:templateparser
Pendokumenan untuk modul ini boleh diciptakan di Modul:templateparser/doc
local concat = table.concat
local gmatch = string.gmatch
local insert = table.insert
local lower = string.lower
local match = string.match
local rawset = rawset
local sub = string.sub
local tonumber = tonumber
local tostring = tostring
local type = type
local ulower = string.ulower
local m_parser = require("Module:parser")
local data = mw.loadData("Module:templateparser/data")
local export = {}
------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------
local function is_space(this)
return this == " " or
this == "\t" or
this == "\n" or
this == "\v" or
this == "\f" or
this == "\r"
end
local function trim(str)
local n
for i = 1, #str do
if not is_space(sub(str, i, i)) then
n = i
break
end
end
if not n then
return ""
end
for i = #str, n, -1 do
if not is_space(sub(str, i, i)) then
return sub(str, n, i)
end
end
end
------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------
local Node = m_parser.Node
local Wikitext = m_parser.Wikitext
local Tag = Node:new("tag")
function Tag:__tostring()
local open_tag = {"<", self.name}
if self.ignored then
return ""
elseif self.attributes then
for attr, value in pairs(self.attributes) do
insert(open_tag, " " .. attr .. "=\"" .. value .. "\"")
end
end
if self.self_closing then
insert(open_tag, "/>")
return concat(open_tag)
end
insert(open_tag, ">")
return concat(open_tag) .. concat(self) .. "</" .. self.name .. ">"
end
local Argument = Node:new("argument")
function Argument:__tostring()
if self[2] then
local output, i = {"{{{", tostring(self[1])}, 2
while self[i] do
insert(output, "|")
insert(output, tostring(self[i]))
i = i + 1
end
insert(output, "}}}")
return concat(output)
elseif self[1] then
return "{{{" .. tostring(self[1]) .. "}}}"
else
return "argument"
end
end
function Argument:next()
self.i = self.i + 1
if self.i <= 2 then
return self[self.i]
end
end
local Parameter = Node:new("parameter")
function Parameter:__tostring()
if self.key then
return tostring(self.key) .. "=" .. Node.__tostring(self)
end
return Node.__tostring(self)
end
local Template = Node:new("template")
function Template:__tostring()
if self[2] then
local output, n = {"{{", tostring(self[1])}, 2
if self.colon then
insert(output, ":")
insert(output, tostring(self[3]))
n = 3
end
for i = n, #self do
insert(output, "|")
insert(output, tostring(self[i]))
end
insert(output, "}}")
return concat(output)
elseif self[1] then
return "{{" .. tostring(self[1]) .. "}}"
else
return "template"
end
end
function Template:get_params()
local params, implicit, key, value = {}, 0
for i = 2, #self do
if self[i].key then
-- Explicit parameters are converted to numbers if:
-- (a) They are integers, with no decimals (2.0) or leading zeroes (02).
-- (b) They are <= 2^53 and >= -2^53.
-- Note: Lua integers are only accurate to 2^53 - 1, so 2^53 and -2^53 have to be specifically checked for since Lua will evaluate 2^53 as equal to 2^53 + 1.
key = trim(tostring(self[i].key))
if match(key, "^-?[1-9]%d*$") or key == "0" then
local num = tonumber(key)
key = (
num <= 9007199254740991 and num >= -9007199254740991 or
key == "9007199254740992" or
key == "-9007199254740992"
) and num or key
end
value = trim(Node.__tostring(self[i]))
else
implicit = implicit + 1
key = implicit
value = tostring(self[i])
end
params[key] = value
end
return params
end
------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------
local Parser = m_parser.Parser
-- Argument.
do
local function handle_argument(self, this)
if this == "|" then
self:emit(Wikitext:new(self:pop_sublayer()))
self:push_sublayer()
elseif this == "}" and self:read(1) == "}" then
if self:read(2) == "}" then
self:emit(Wikitext:new(self:pop_sublayer()))
self:advance(2)
return self:pop()
end
return self:fail_route()
elseif this == "" then
return self:fail_route()
else
return self:block_handler(this)
end
end
function Parser:argument()
local argument = self:get(handle_argument, self.push_sublayer)
if argument == self.bad_route then
self:template()
else
if #self:layer() == self.emit_pos then
local inner = self:remove()
if type(argument[1]) == "table" then
insert(argument[1], 1, inner)
else
argument[1] = Wikitext:new{inner, argument[1]}
end
end
self.braces = self.braces - 3
self.brace_head = self.brace_head - 3
argument.pos = self.brace_head
self:emit(Argument:new(argument))
end
end
end
-- Template.
do
local handle_name
local handle_parameter
function handle_name(self, this)
if this == "|" then
self:emit(Wikitext:new(self:pop_sublayer()))
self.handler = handle_parameter
self:push_sublayer()
elseif this == "}" and self:read(1) == "}" then
self:emit(Wikitext:new(self:pop_sublayer()))
self:advance()
return self:pop()
elseif this == "" then
return self:fail_route()
else
return self:block_handler(this)
end
end
function handle_parameter(self, this)
if this == "=" and not self.key and (
self:read(1) ~= "=" or
self:read(-1) ~= "\n" and self:read(-1) ~= ""
) then
local key = self:pop_sublayer()
self:push_sublayer()
rawset(self:layer(), "key", Wikitext:new(key))
elseif this == "|" then
self:emit(Parameter:new(self:pop_sublayer()))
self:push_sublayer()
elseif this == "}" and self:read(1) == "}" then
self:emit(Parameter:new(self:pop_sublayer()))
self:advance()
return self:pop()
elseif this == "" then
return self:fail_route()
else
return self:block_handler(this)
end
end
function Parser:template()
local template = self:get(handle_name, self.push_sublayer)
if template == self.bad_route then
self:advance(-1)
for _ = 1, self.braces do
self:emit(self.emit_pos, "{")
end
self.braces = 0
else
if #self:layer() == self.emit_pos then
local inner = self:remove()
if type(template[1]) == "table" then
insert(template[1], 1, inner)
else
template[1] = Wikitext:new{inner, template[1]}
end
end
template.title = self.title
self.braces = self.braces - 2
self.brace_head = self.brace_head - 2
template.pos = self.brace_head
self:emit(Template:new(template))
end
end
function Parser:template_or_argument()
self:advance(2)
self.braces = 2
while self:read() == "{" do
self:advance()
self.braces = self.braces + 1
end
self.emit_pos = #self:layer() + 1
self.brace_head = self.raw_head
repeat
if self.braces == 1 then
self:emit(self.emit_pos, "{")
break
elseif self.braces == 2 then
self:template()
else
self:argument()
end
self:advance()
until self.braces == 0
self:advance(-1)
end
end
-- Text not in <onlyinclude></onlyinclude>.
function Parser:not_onlyinclude()
local this, nxt, nxt2 = self:read(0, 1, 2)
while not (
this == "" or
this == "<" and nxt == "onlyinclude" and nxt2 == ">"
) do
self:advance()
this, nxt, nxt2 = nxt, nxt2, self:read(2)
end
self:advance(2)
end
-- Tag.
do
local function is_ignored_tag(self, check)
return self.transcluded and check == "includeonly" or
not self.transcluded and (
check == "noinclude" or
check == "onlyinclude"
)
end
-- Handlers.
local handle_start
local handle_ignored_tag_start
local handle_ignored_tag
local handle_after_tag_name
local handle_before_attribute_name
local handle_attribute_name
local handle_before_attribute_value
local handle_quoted_attribute_value
local handle_unquoted_attribute_value
local handle_after_attribute_value
local handle_tag_block
local handle_end
function handle_start(self, this)
if this == "/" then
local check = lower(self:read(1))
if is_ignored_tag(self, check) then
self.name = check
self.ignored = true
self:advance()
self.handler = handle_ignored_tag_start
return
end
return self:fail_route()
end
local check = lower(this)
if is_ignored_tag(self, check) then
self.name = check
self.ignored = true
self.handler = handle_ignored_tag_start
elseif (
check == "noinclude" and self.transcluded or
check == "includeonly" and not self.transcluded
) then
self.name = check
self.ignored = true
self.handler = handle_after_tag_name
elseif data.tags[check] then
self.name = check
self.handler = handle_after_tag_name
else
return self:fail_route()
end
end
function handle_ignored_tag_start(self, this)
if this == ">" then
return self:pop()
elseif this == "/" and self:read(1) == ">" then
self.self_closing = true
self:advance()
return self:pop()
elseif is_space(this) then
self.handler = handle_ignored_tag
else
return self:fail_route()
end
end
function handle_ignored_tag(self, this)
if this == ">" then
return self:pop()
elseif this == "" then
return self:fail_route()
end
end
function handle_after_tag_name(self, this)
if this == "/" and self:read(1) == ">" then
self.self_closing = true
self:advance()
return self:pop()
elseif this == ">" then
self.handler = handle_tag_block
elseif is_space(this) then
self.handler = handle_before_attribute_name
else
return self:fail_route()
end
end
function handle_before_attribute_name(self, this)
if this == "/" and self:read(1) == ">" then
self.self_closing = true
self:advance()
return self:pop()
elseif this == ">" then
self.handler = handle_tag_block
elseif this ~= "/" and not is_space(this) then
self:push_sublayer(handle_attribute_name)
return self:consume()
elseif this == "" then
return self:fail_route()
end
end
function handle_attribute_name(self, this)
if this == "/" or this == ">" or is_space(this) then
self:pop_sublayer()
return self:consume()
elseif this == "=" then
self.attr_name = ulower(concat(self:pop_sublayer()))
self.handler = handle_before_attribute_value
elseif this == "" then
return self:fail_route()
else
self:emit(this)
end
end
function handle_before_attribute_value(self, this)
if this == "/" or this == ">" then
handle_after_attribute_value(self, "")
return self:consume()
elseif is_space(this) then
handle_after_attribute_value(self, "")
elseif this == "\"" or this == "'" then
self:push_sublayer(handle_quoted_attribute_value)
rawset(self:layer(), "quoter", this)
elseif this == "" then
return self:fail_route()
else
self:push_sublayer(handle_unquoted_attribute_value)
return self:consume()
end
end
function handle_quoted_attribute_value(self, this)
if this == ">" then
handle_after_attribute_value(self, concat(self:pop_sublayer()))
return self:consume()
elseif this == self.quoter then
handle_after_attribute_value(self, concat(self:pop_sublayer()))
elseif this == "" then
return self:fail_route()
else
self:emit(this)
end
end
function handle_unquoted_attribute_value(self, this)
if this == "/" or this == ">" then
handle_after_attribute_value(self, concat(self:pop_sublayer()))
return self:consume()
elseif is_space(this) then
handle_after_attribute_value(self, concat(self:pop_sublayer()))
elseif this == "" then
return self:fail_route()
else
self:emit(this)
end
end
function handle_after_attribute_value(self, attr_value)
self.attributes = self.attributes or {}
self.attributes[self.attr_name] = attr_value
self.attr_name = nil
self.handler = handle_before_attribute_name
end
function handle_tag_block(self, this)
if (
this == "<" and
self:read(1) == "/" and
lower(self:read(2)) == self.name
) then
local tag_end = self:get(handle_end, self.advance, 3)
if tag_end == self.bad_route then
self:emit("<")
else
return self:pop()
end
elseif this == "" then
return self:fail_route()
else
self:emit(this)
end
end
function handle_end(self, this)
if this == ">" then
return self:pop()
elseif not is_space(this) then
return self:fail_route()
end
end
function Parser:tag()
local tag = self:get(handle_start, self.advance)
if tag == self.bad_route then
self:emit("<")
else
self:emit(Tag:new(tag))
end
end
end
-- Block handlers.
do
local function handle_heading_block(self, this)
if this == "\n" then
self:emit("\n")
return self:pop()
else
return self:block_handler(this)
end
end
local function handle_language_conversion_block(self, this)
if this == "}" and self:read(1) == "-" then
self:advance()
self:emit("}", "-")
return self:pop()
else
return self:block_handler(this)
end
end
local function handle_wikilink_block(self, this)
if this == "]" and self:read(1) == "]" then
self:advance()
self:emit("]", "]")
return self:pop()
else
return self:block_handler(this)
end
end
function Parser:block_handler(this)
if this == "-" and self:read(1) == "{" then
self:advance()
self:emit("-")
if self:read(1) == "{" then
self:template_or_argument()
else
self:emit_tokens(self:get(handle_language_conversion_block))
end
elseif this == "=" and (
self:read(-1) == "\n" or
self:read(-1) == ""
) then
self:advance()
self:emit("=")
self:emit_tokens(self:get(handle_heading_block))
elseif this == "[" and self:read(1) == "[" then
self:advance()
self:emit("[")
self:emit_tokens(self:get(handle_wikilink_block))
else
return self:main_handler(this)
end
end
end
function Parser:main_handler(this)
if this == "<" then
if (
self:read(1) == "!" and
self:read(2) == "-" and
self:read(3) == "-"
) then
self:advance(4)
local this, nxt, nxt2 = self:read(0, 1, 2)
while not (
this == "" or
this == "-" and nxt == "-" and nxt2 == ">"
) do
self:advance()
this, nxt, nxt2 = nxt, nxt2, self:read(2)
end
self:advance(2)
elseif (
self.onlyinclude and
self:read(1) == "/" and
self:read(2) == "onlyinclude" and
self:read(3) == ">"
) then
self:advance(4)
self:not_onlyinclude()
else
self:tag()
end
elseif this == "{" and self:read(1) == "{" then
self:template_or_argument()
elseif this == "" then
return self:pop()
else
self:emit(this)
end
end
do
local function do_parse(self, str, title, transcluded)
rawset(self, "title", title)
if transcluded then
rawset(self, "transcluded", true)
if match(str, "<onlyinclude>") and match(str, "</onlyinclude>") then
rawset(self, "onlyinclude", true)
self:not_onlyinclude()
self:advance()
end
end
end
function export.parse(str, title, transcluded)
local text = {}
for chunk, char in gmatch(str, "([^%s!\"'%-/<=>%[%]{|}]*)(.?)") do
if #chunk > 0 then
insert(text, chunk)
end
if #char > 0 then
insert(text, char)
end
end
local tokens = Parser:parse(
text,
Parser.main_handler,
do_parse,
str,
title,
transcluded
)
return tokens
end
end
function export.parseTemplate(text)
text = export.parse(text)
if text and text.type == "template" then
return trim(tostring(text[1])), text:get_params()
end
end
function export.findTemplates(text)
text = export.parse(text)
text.i = 0
text._next = text.next
-- If text is itself a template object, return it on the first iteration.
local self_ret, node = text.type == "template"
return function()
if self_ret then
self_ret = false
return trim(tostring(text[1])), text:get_params(), tostring(text), 1
end
repeat
node = text:iterate()
until not node or (type(node) == "table" and node.type == "template")
if node then
return trim(tostring(node[1])), node:get_params(), tostring(node), node.pos
end
end
end
return export