Modul:templateparser

Daripada Wiktionary

Pendokumenan untuk modul ini boleh diciptakan di Modul:templateparser/doc

local concat = table.concat
local gmatch = string.gmatch
local insert = table.insert
local lower = string.lower
local match = string.match
local rawset = rawset
local sub = string.sub
local tonumber = tonumber
local tostring = tostring
local type = type
local ulower = string.ulower

local m_parser = require("Module:parser")
local data = mw.loadData("Module:templateparser/data")

local export = {}

------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------

local function is_space(this)
	return this == " " or
		this == "\t" or
		this == "\n" or
		this == "\v" or
		this == "\f" or
		this == "\r"
end

local function trim(str)
	local n
	for i = 1, #str do
		if not is_space(sub(str, i, i)) then
			n = i
			break
		end
	end
	if not n then
		return ""
	end
	for i = #str, n, -1 do
		if not is_space(sub(str, i, i)) then
			return sub(str, n, i)
		end
	end
end

------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------

local Node = m_parser.Node

local Wikitext = m_parser.Wikitext

local Tag = Node:new("tag")

function Tag:__tostring()
	local open_tag = {"<", self.name}
	if self.ignored then
		return ""
	elseif self.attributes then
		for attr, value in pairs(self.attributes) do
			insert(open_tag, " " .. attr .. "=\"" .. value .. "\"")
		end
	end
	if self.self_closing then
		insert(open_tag, "/>")
		return concat(open_tag)
	end
	insert(open_tag, ">")
	return concat(open_tag) .. concat(self) .. "</" .. self.name .. ">"
end

local Argument = Node:new("argument")

function Argument:__tostring()
	if self[2] then
		local output, i = {"{{{", tostring(self[1])}, 2
		while self[i] do
			insert(output, "|")
			insert(output, tostring(self[i]))
			i = i + 1
		end
		insert(output, "}}}")
		return concat(output)
	elseif self[1] then
		return "{{{" .. tostring(self[1]) .. "}}}"
	else
		return "argument"
	end
end

function Argument:next()
	self.i = self.i + 1
	if self.i <= 2 then
		return self[self.i]
	end
end

local Parameter = Node:new("parameter")

function Parameter:__tostring()
	if self.key then
		return tostring(self.key) .. "=" .. Node.__tostring(self)
	end
	return Node.__tostring(self)
end

local Template = Node:new("template")

function Template:__tostring()
	if self[2] then
		local output, n = {"{{", tostring(self[1])}, 2
		if self.colon then
			insert(output, ":")
			insert(output, tostring(self[3]))
			n = 3
		end
		for i = n, #self do
			insert(output, "|")
			insert(output, tostring(self[i]))
		end
		insert(output, "}}")
		return concat(output)
	elseif self[1] then
		return "{{" .. tostring(self[1]) .. "}}"
	else
		return "template"
	end
end

function Template:get_params()
	local params, implicit, key, value = {}, 0
	for i = 2, #self do
		if self[i].key then
			-- Explicit parameters are converted to numbers if:
			-- (a) They are integers, with no decimals (2.0) or leading zeroes (02).
			-- (b) They are <= 2^53 and >= -2^53.
			-- Note: Lua integers are only accurate to 2^53 - 1, so 2^53 and -2^53 have to be specifically checked for since Lua will evaluate 2^53 as equal to 2^53 + 1.
			key = trim(tostring(self[i].key))
			if match(key, "^-?[1-9]%d*$") or key == "0" then
				local num = tonumber(key)
				key = (
					num <= 9007199254740991 and num >= -9007199254740991 or
					key == "9007199254740992" or
					key == "-9007199254740992"
				) and num or key
			end
			value = trim(Node.__tostring(self[i]))
		else
			implicit = implicit + 1
			key = implicit
			value = tostring(self[i])
		end
		params[key] = value
	end
	return params
end

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

local Parser = m_parser.Parser

-- Argument.
do
	local function handle_argument(self, this)
		if this == "|" then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self:push_sublayer()
		elseif this == "}" and self:read(1) == "}" then
			if self:read(2) == "}" then
				self:emit(Wikitext:new(self:pop_sublayer()))
				self:advance(2)
				return self:pop()
			end
			return self:fail_route()
		elseif this == "" then
			return self:fail_route()
		else
			return self:block_handler(this)
		end
	end

	function Parser:argument()
		local argument = self:get(handle_argument, self.push_sublayer)
		if argument == self.bad_route then
			self:template()
		else
			if #self:layer() == self.emit_pos then
				local inner = self:remove()
				if type(argument[1]) == "table" then
					insert(argument[1], 1, inner)
				else
					argument[1] = Wikitext:new{inner, argument[1]}
				end
			end
			self.braces = self.braces - 3
			self.brace_head = self.brace_head - 3
			argument.pos = self.brace_head
			self:emit(Argument:new(argument))
		end
	end
end

-- Template.
do
	local handle_name
	local handle_parameter
	
	function handle_name(self, this)
		if this == "|" then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self.handler = handle_parameter
			self:push_sublayer()
		elseif this == "}" and self:read(1) == "}" then
			self:emit(Wikitext:new(self:pop_sublayer()))
			self:advance()
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		else
			return self:block_handler(this)
		end
	end
	
	function handle_parameter(self, this)
		if this == "=" and not self.key and (
			self:read(1) ~= "=" or
			self:read(-1) ~= "\n" and self:read(-1) ~= ""
		) then
			local key = self:pop_sublayer()
			self:push_sublayer()
			rawset(self:layer(), "key", Wikitext:new(key))
		elseif this == "|" then
			self:emit(Parameter:new(self:pop_sublayer()))
			self:push_sublayer()
		elseif this == "}" and self:read(1) == "}" then
			self:emit(Parameter:new(self:pop_sublayer()))
			self:advance()
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		else
			return self:block_handler(this)
		end
	end
	
	function Parser:template()
		local template = self:get(handle_name, self.push_sublayer)
		if template == self.bad_route then
			self:advance(-1)
			for _ = 1, self.braces do
				self:emit(self.emit_pos, "{")
			end
			self.braces = 0
		else
			if #self:layer() == self.emit_pos then
				local inner = self:remove()
				if type(template[1]) == "table" then
					insert(template[1], 1, inner)
				else
					template[1] = Wikitext:new{inner, template[1]}
				end
			end
			template.title = self.title
			self.braces = self.braces - 2
			self.brace_head = self.brace_head - 2
			template.pos = self.brace_head
			self:emit(Template:new(template))
		end
	end
	
	function Parser:template_or_argument()
		self:advance(2)
		self.braces = 2
		while self:read() == "{" do
			self:advance()
			self.braces = self.braces + 1
		end
		self.emit_pos = #self:layer() + 1
		self.brace_head = self.raw_head
		repeat
			if self.braces == 1 then
				self:emit(self.emit_pos, "{")
				break
			elseif self.braces == 2 then
				self:template()
			else
				self:argument()
			end
			self:advance()
		until self.braces == 0
		self:advance(-1)
	end
end

-- Text not in <onlyinclude></onlyinclude>.
function Parser:not_onlyinclude()
	local this, nxt, nxt2 = self:read(0, 1, 2)
	while not (
		this == "" or
		this == "<" and nxt == "onlyinclude" and nxt2 == ">"
	) do
		self:advance()
		this, nxt, nxt2 = nxt, nxt2, self:read(2)
	end
	self:advance(2)
end

-- Tag.
do
	local function is_ignored_tag(self, check)
		return self.transcluded and check == "includeonly" or
			not self.transcluded and (
				check == "noinclude" or
				check == "onlyinclude"
			)
	end
	
	-- Handlers.
	local handle_start
	local handle_ignored_tag_start
	local handle_ignored_tag
	local handle_after_tag_name
	local handle_before_attribute_name
	local handle_attribute_name
	local handle_before_attribute_value
	local handle_quoted_attribute_value
	local handle_unquoted_attribute_value
	local handle_after_attribute_value
	local handle_tag_block
	local handle_end
	
	function handle_start(self, this)
		if this == "/" then
			local check = lower(self:read(1))
			if is_ignored_tag(self, check) then
				self.name = check
				self.ignored = true
				self:advance()
				self.handler = handle_ignored_tag_start
				return
			end
			return self:fail_route()
		end
		local check = lower(this)
		if is_ignored_tag(self, check) then
			self.name = check
			self.ignored = true
			self.handler = handle_ignored_tag_start
		elseif (
			check == "noinclude" and self.transcluded or
			check == "includeonly" and not self.transcluded
		) then
			self.name = check
			self.ignored = true
			self.handler = handle_after_tag_name
		elseif data.tags[check] then
			self.name = check
			self.handler = handle_after_tag_name
		else
			return self:fail_route()
		end
	end
	
	function handle_ignored_tag_start(self, this)
		if this == ">" then
			return self:pop()
		elseif this == "/" and self:read(1) == ">" then
			self.self_closing = true
			self:advance()
			return self:pop()
		elseif is_space(this) then
			self.handler = handle_ignored_tag
		else
			return self:fail_route()
		end
	end
	
	function handle_ignored_tag(self, this)
		if this == ">" then
			return self:pop()
		elseif this == "" then
			return self:fail_route()
		end
	end
	
	function handle_after_tag_name(self, this)
		if this == "/" and self:read(1) == ">" then
			self.self_closing = true
			self:advance()
			return self:pop()
		elseif this == ">" then
			self.handler = handle_tag_block
		elseif is_space(this) then
			self.handler = handle_before_attribute_name
		else
			return self:fail_route()
		end
	end
	
	function handle_before_attribute_name(self, this)
		if this == "/" and self:read(1) == ">" then
			self.self_closing = true
			self:advance()
			return self:pop()
		elseif this == ">" then
			self.handler = handle_tag_block
		elseif this ~= "/" and not is_space(this) then
			self:push_sublayer(handle_attribute_name)
			return self:consume()
		elseif this == "" then
			return self:fail_route()
		end
	end
	
	function handle_attribute_name(self, this)
		if this == "/" or this == ">" or is_space(this) then
			self:pop_sublayer()
			return self:consume()
		elseif this == "=" then
			self.attr_name = ulower(concat(self:pop_sublayer()))
			self.handler = handle_before_attribute_value
		elseif this == "" then
			return self:fail_route()
		else
			self:emit(this)
		end
	end
	
	function handle_before_attribute_value(self, this)
		if this == "/" or this == ">" then
			handle_after_attribute_value(self, "")
			return self:consume()
		elseif is_space(this) then
			handle_after_attribute_value(self, "")
		elseif this == "\"" or this == "'" then
			self:push_sublayer(handle_quoted_attribute_value)
			rawset(self:layer(), "quoter", this)
		elseif this == "" then
			return self:fail_route()
		else
			self:push_sublayer(handle_unquoted_attribute_value)
			return self:consume()
		end
	end
	
	function handle_quoted_attribute_value(self, this)
		if this == ">" then
			handle_after_attribute_value(self, concat(self:pop_sublayer()))
			return self:consume()
		elseif this == self.quoter then
			handle_after_attribute_value(self, concat(self:pop_sublayer()))
		elseif this == "" then
			return self:fail_route()
		else
			self:emit(this)
		end
	end
			
	function handle_unquoted_attribute_value(self, this)
		if this == "/" or this == ">" then
			handle_after_attribute_value(self, concat(self:pop_sublayer()))
			return self:consume()
		elseif is_space(this) then
			handle_after_attribute_value(self, concat(self:pop_sublayer()))
		elseif this == "" then
			return self:fail_route()
		else
			self:emit(this)
		end
	end
	
	function handle_after_attribute_value(self, attr_value)
		self.attributes = self.attributes or {}
		self.attributes[self.attr_name] = attr_value
		self.attr_name = nil
		self.handler = handle_before_attribute_name
	end
	
	function handle_tag_block(self, this)
		if (
			this == "<" and
			self:read(1) == "/" and
			lower(self:read(2)) == self.name
		) then
			local tag_end = self:get(handle_end, self.advance, 3)
			if tag_end == self.bad_route then
				self:emit("<")
			else
				return self:pop()
			end
		elseif this == "" then
			return self:fail_route()
		else
			self:emit(this)
		end
	end
	
	function handle_end(self, this)
		if this == ">" then
			return self:pop()
		elseif not is_space(this) then
			return self:fail_route()
		end
	end
	
	function Parser:tag()
		local tag = self:get(handle_start, self.advance)
		if tag == self.bad_route then
			self:emit("<")
		else
			self:emit(Tag:new(tag))
		end
	end
end

-- Block handlers.
do
	local function handle_heading_block(self, this)
		if this == "\n" then
			self:emit("\n")
			return self:pop()
		else
			return self:block_handler(this)
		end
	end
	
	local function handle_language_conversion_block(self, this)
		if this == "}" and self:read(1) == "-" then
			self:advance()
			self:emit("}", "-")
			return self:pop()
		else
			return self:block_handler(this)
		end
	end
	
	local function handle_wikilink_block(self, this)
		if this == "]" and self:read(1) == "]" then
			self:advance()
			self:emit("]", "]")
			return self:pop()
		else
			return self:block_handler(this)
		end
	end
	
	function Parser:block_handler(this)
		if this == "-" and self:read(1) == "{" then
			self:advance()
			self:emit("-")
			if self:read(1) == "{" then
				self:template_or_argument()
			else
				self:emit_tokens(self:get(handle_language_conversion_block))
			end
		elseif this == "=" and (
			self:read(-1) == "\n" or
			self:read(-1) == ""
		) then
			self:advance()
			self:emit("=")
			self:emit_tokens(self:get(handle_heading_block))
		elseif this == "[" and self:read(1) == "[" then
			self:advance()
			self:emit("[")
			self:emit_tokens(self:get(handle_wikilink_block))
		else
			return self:main_handler(this)
		end
	end
end

function Parser:main_handler(this)
	if this == "<" then
		 if (
			self:read(1) == "!" and
			self:read(2) == "-" and
			self:read(3) == "-"
		 ) then
			self:advance(4)
			local this, nxt, nxt2 = self:read(0, 1, 2)
			while not (
				this == "" or
				this == "-" and nxt == "-" and nxt2 == ">"
			) do
				self:advance()
				this, nxt, nxt2 = nxt, nxt2, self:read(2)
			end
			self:advance(2)
		 elseif (
		 	self.onlyinclude and
		 	self:read(1) == "/" and
		 	self:read(2) == "onlyinclude" and
		 	self:read(3) == ">"
		) then
			self:advance(4)
			self:not_onlyinclude()
		else
			self:tag()
		end
	elseif this == "{" and self:read(1) == "{" then
		self:template_or_argument()
	elseif this == "" then
		return self:pop()
	else
		self:emit(this)
	end
end

do
	local function do_parse(self, str, title, transcluded)
		rawset(self, "title", title)
		if transcluded then
			rawset(self, "transcluded", true)
			if match(str, "<onlyinclude>") and match(str, "</onlyinclude>") then
				rawset(self, "onlyinclude", true)
				self:not_onlyinclude()
				self:advance()
			end
		end
	end
	
	function export.parse(str, title, transcluded)
		local text = {}
		for chunk, char in gmatch(str, "([^%s!\"'%-/<=>%[%]{|}]*)(.?)") do
			if #chunk > 0 then
				insert(text, chunk)
			end
			if #char > 0 then
				insert(text, char)
			end
		end
		local tokens = Parser:parse(
			text,
			Parser.main_handler,
			do_parse,
			str,
			title,
			transcluded
		)
		return tokens
	end
end

function export.parseTemplate(text)
	text = export.parse(text)
	if text and text.type == "template" then
		return trim(tostring(text[1])), text:get_params()
	end
end

function export.findTemplates(text)
	text = export.parse(text)
	text.i = 0
	text._next = text.next
	-- If text is itself a template object, return it on the first iteration.
	local self_ret, node = text.type == "template"
	return function()
		if self_ret then
			self_ret = false
			return trim(tostring(text[1])), text:get_params(), tostring(text), 1
		end
		repeat
			node = text:iterate()
		until not node or (type(node) == "table" and node.type == "template")
		if node then
			return trim(tostring(node[1])), node:get_params(), tostring(node), node.pos
		end
	end
end

return export