feat(scraper): move "Object properties" to AST
This commit is contained in:
parent
916af3913d
commit
7e225cf452
|
@ -1,14 +1,10 @@
|
||||||
local Function_Info = require "entity.Function_Info"
|
local ast <const> = require("ast")
|
||||||
local List = require "pl.List"
|
local type Node = require("types.Node")
|
||||||
local logger = require "logger"
|
local logger = require "logger"
|
||||||
local Map = require "pl.Map"
|
|
||||||
local Module_Doc = require "entity.Module_Doc"
|
|
||||||
local scan = require "web_sanitize.query.scan_html"
|
local scan = require "web_sanitize.query.scan_html"
|
||||||
local scraper_utils = require "scraper.utils"
|
local scraper_utils = require "scraper.utils"
|
||||||
local stringx = require "pl.stringx"
|
local stringx = require "pl.stringx"
|
||||||
local Type_Info = require "entity.Type_Info"
|
|
||||||
local utils = require "utils"
|
local utils = require "utils"
|
||||||
local Variable_Info = require "entity.Variable_Info"
|
|
||||||
|
|
||||||
local log = logger.log("scraper")
|
local log = logger.log("scraper")
|
||||||
|
|
||||||
|
@ -16,94 +12,93 @@ local function extract_node_text(node: scan.HTMLNode): string
|
||||||
return utils.sanitize_string(node:inner_text())
|
return utils.sanitize_string(node:inner_text())
|
||||||
end
|
end
|
||||||
|
|
||||||
local function parse_parameter_types(parameter_type: string): List<Type_Info.Type_Info>
|
local function parse_parameter_types(parameter_type: string): { string }
|
||||||
if parameter_type == "" then
|
if parameter_type == "" then
|
||||||
local type_info: Type_Info.Type_Info = Type_Info("any")
|
return { "any" }
|
||||||
return List({ type_info })
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return stringx.split(parameter_type, " or "):map(
|
local types = {}
|
||||||
function(type_name: string): Type_Info.Type_Info
|
for t in stringx.split(parameter_type, " or "):iter() do
|
||||||
return Type_Info(utils.sanitize_string(type_name))
|
table.insert(types, t)
|
||||||
end
|
end
|
||||||
)
|
return types
|
||||||
end
|
end
|
||||||
|
|
||||||
local function extract_item_name(item_name_node: scan.HTMLNode): string
|
local function extract_item_name(item_name_node: scan.HTMLNode): string
|
||||||
return item_name_node and ((item_name_node.attr.name as string):gsub("^.*[%.:]", ""))
|
return item_name_node and ((item_name_node.attr.name as string):gsub("^.*[%.:]", ""))
|
||||||
end
|
end
|
||||||
|
|
||||||
local function extract_function_parameter_Parameters(tr_node: scan.HTMLNode): { Variable_Info.Variable_Info }
|
-- local function extract_function_parameter_Parameters(tr_node: scan.HTMLNode): { Variable_Info.Variable_Info }
|
||||||
local query_selectors = {
|
-- local query_selectors = {
|
||||||
name = "span.parameter",
|
-- name = "span.parameter",
|
||||||
types = "span.types"
|
-- types = "span.types"
|
||||||
}
|
-- }
|
||||||
|
|
||||||
return scraper_utils.scrape_tuples(
|
-- return scraper_utils.scrape_tuples(
|
||||||
tr_node:outer_html(),
|
-- tr_node:outer_html(),
|
||||||
{ query_selectors.name, query_selectors.types },
|
-- { query_selectors.name, query_selectors.types },
|
||||||
function(nodes: { string : scan.HTMLNode | nil }): Variable_Info.Variable_Info
|
-- function(nodes: { string : scan.HTMLNode | nil }): Variable_Info.Variable_Info
|
||||||
return Variable_Info(
|
-- return Variable_Info(
|
||||||
extract_node_text(nodes[query_selectors.name] as scan.HTMLNode),
|
-- extract_node_text(nodes[query_selectors.name] as scan.HTMLNode),
|
||||||
parse_parameter_types(extract_node_text(nodes[query_selectors.types] as scan.HTMLNode))
|
-- parse_parameter_types(extract_node_text(nodes[query_selectors.types] as scan.HTMLNode))
|
||||||
)
|
-- )
|
||||||
end)
|
-- end)
|
||||||
end
|
-- end
|
||||||
|
|
||||||
local function extract_function_parameters(function_parameters_node: scan.HTMLNode): { Variable_Info.Variable_Info }
|
-- local function extract_function_parameters(function_parameters_node: scan.HTMLNode): { Variable_Info.Variable_Info }
|
||||||
local current_record_parameter: Type_Info.Type_Info | nil = nil
|
-- local current_record_parameter: Type_Info.Type_Info | nil = nil
|
||||||
|
|
||||||
return scraper_utils.scrape(
|
-- return scraper_utils.scrape(
|
||||||
function_parameters_node:outer_html(),
|
-- function_parameters_node:outer_html(),
|
||||||
"tr",
|
-- "tr",
|
||||||
function(line_node: scan.HTMLNode): Variable_Info.Variable_Info
|
-- function(line_node: scan.HTMLNode): Variable_Info.Variable_Info
|
||||||
local parameters = extract_function_parameter_Parameters(line_node)
|
-- local parameters = extract_function_parameter_Parameters(line_node)
|
||||||
if #parameters == 0 then
|
-- if #parameters == 0 then
|
||||||
return nil
|
-- return nil
|
||||||
elseif #parameters ~= 1 then
|
-- elseif #parameters ~= 1 then
|
||||||
log:error(logger.message_with_metadata("Expected 1 parameter by <tr> node",
|
-- log:error(logger.message_with_metadata("Expected 1 parameter by <tr> node",
|
||||||
{ len = #parameters, line_node = line_node, parameters = parameters }))
|
-- { len = #parameters, line_node = line_node, parameters = parameters }))
|
||||||
error("Expected 1 parameter by <tr> node")
|
-- error("Expected 1 parameter by <tr> node")
|
||||||
end
|
-- end
|
||||||
local name, types = parameters[1].name, parameters[1].types
|
-- local name, types = parameters[1].name, parameters[1].types
|
||||||
|
|
||||||
if line_node.attr ~= nil and line_node.attr.class == "see_also_sublist" and current_record_parameter then
|
-- if line_node.attr ~= nil and line_node.attr.class == "see_also_sublist" and current_record_parameter then
|
||||||
local record_parameter = current_record_parameter as Type_Info.Type_Info
|
-- local record_parameter = current_record_parameter as Type_Info.Type_Info
|
||||||
if not record_parameter.record_entries then
|
-- if not record_parameter.record_entries then
|
||||||
record_parameter.record_entries = Map()
|
-- record_parameter.record_entries = Map()
|
||||||
end
|
-- end
|
||||||
|
|
||||||
(record_parameter.record_entries as Map<string, List<Type_Info.Type_Info>>):set(name, types)
|
-- (record_parameter.record_entries as Map<string, List<Type_Info.Type_Info>>):set(name, types)
|
||||||
|
|
||||||
return nil
|
-- return nil
|
||||||
end
|
-- end
|
||||||
|
|
||||||
if #types == 1 and types[1].name == "table" then
|
-- if #types == 1 and types[1].name == "table" then
|
||||||
local record_name = utils.capitalize(name)
|
-- local record_name = utils.capitalize(name)
|
||||||
current_record_parameter = Type_Info(record_name)
|
-- current_record_parameter = Type_Info(record_name)
|
||||||
return Variable_Info(
|
-- return Variable_Info(
|
||||||
name,
|
-- name,
|
||||||
List({ current_record_parameter })
|
-- List({ current_record_parameter })
|
||||||
)
|
-- )
|
||||||
end
|
-- end
|
||||||
|
|
||||||
return Variable_Info(name, types)
|
-- return Variable_Info(name, types)
|
||||||
end)
|
-- end)
|
||||||
end
|
-- end
|
||||||
|
|
||||||
local function extract_function_return_types(function_return_types_node: scan.HTMLNode): List<Type_Info.Type_Info>
|
-- local function extract_function_return_types(function_return_types_node: scan.HTMLNode): List<Type_Info.Type_Info>
|
||||||
if not function_return_types_node then
|
-- if not function_return_types_node then
|
||||||
return {}
|
-- return {}
|
||||||
end
|
-- end
|
||||||
|
|
||||||
local selector = "span.types .type"
|
-- local selector = "span.types .type"
|
||||||
local html = function_return_types_node:outer_html()
|
-- local html = function_return_types_node:outer_html()
|
||||||
|
|
||||||
return List(scraper_utils.scrape(html, selector, extract_node_text)):map(
|
-- return List(scraper_utils.scrape(html, selector, extract_node_text)):map(
|
||||||
function(type_name: string): Type_Info.Type_Info
|
-- function(type_name: string): Type_Info.Type_Info
|
||||||
return Type_Info(type_name)
|
-- return Type_Info(type_name)
|
||||||
end)
|
-- end)
|
||||||
end
|
-- end
|
||||||
|
|
||||||
local function extract_property_constraints(property_constraint_node: scan.HTMLNode): { string }
|
local function extract_property_constraints(property_constraint_node: scan.HTMLNode): { string }
|
||||||
return scraper_utils.scrape(
|
return scraper_utils.scrape(
|
||||||
|
@ -113,147 +108,179 @@ local function extract_property_constraints(property_constraint_node: scan.HTMLN
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
local function extract_section_functions(dl: string): { Function_Info.Function_Info }
|
-- local function extract_section_functions(dl: string): { Function_Info.Function_Info }
|
||||||
local query_selectors = {
|
-- local query_selectors = {
|
||||||
header = "dt",
|
-- header = "dt",
|
||||||
name = "a",
|
-- name = "a",
|
||||||
body = "dd",
|
-- body = "dd",
|
||||||
parameters = "table",
|
-- parameters = "table",
|
||||||
return_types = "ol",
|
-- return_types = "ol",
|
||||||
}
|
-- }
|
||||||
|
|
||||||
return scraper_utils.scrape_tuples(
|
-- return scraper_utils.scrape_tuples(
|
||||||
dl,
|
-- dl,
|
||||||
{ query_selectors.header, query_selectors.body },
|
-- { query_selectors.header, query_selectors.body },
|
||||||
function(nodes: { string : scan.HTMLNode | nil }): Function_Info.Function_Info
|
-- function(nodes: { string : scan.HTMLNode | nil }): Function_Info.Function_Info
|
||||||
if not nodes[query_selectors.header] or not nodes[query_selectors.body] then
|
-- if not nodes[query_selectors.header] or not nodes[query_selectors.body] then
|
||||||
log:warn(
|
-- log:warn(
|
||||||
logger.message_with_metadata(
|
-- logger.message_with_metadata(
|
||||||
"Missing header or body",
|
-- "Missing header or body",
|
||||||
{ nodes = nodes }
|
-- { nodes = nodes }
|
||||||
)
|
-- )
|
||||||
)
|
-- )
|
||||||
error("Missing header or body")
|
-- error("Missing header or body")
|
||||||
end
|
-- end
|
||||||
local header = nodes[query_selectors.header] as scan.HTMLNode
|
-- local header = nodes[query_selectors.header] as scan.HTMLNode
|
||||||
local body = nodes[query_selectors.body] as scan.HTMLNode
|
-- local body = nodes[query_selectors.body] as scan.HTMLNode
|
||||||
local body_elements = scraper_utils.extract_nodes(
|
-- local body_elements = scraper_utils.extract_nodes(
|
||||||
body:outer_html(),
|
-- body:outer_html(),
|
||||||
{ query_selectors.parameters, query_selectors.return_types }
|
-- { query_selectors.parameters, query_selectors.return_types }
|
||||||
)
|
-- )
|
||||||
return Function_Info(
|
-- return Function_Info(
|
||||||
scraper_utils.scrape(
|
-- scraper_utils.scrape(
|
||||||
header:outer_html(),
|
-- header:outer_html(),
|
||||||
query_selectors.name,
|
-- query_selectors.name,
|
||||||
extract_item_name
|
-- extract_item_name
|
||||||
)[1],
|
-- )[1],
|
||||||
#body_elements:get(query_selectors.parameters) ~= 0 and
|
-- #body_elements:get(query_selectors.parameters) ~= 0 and
|
||||||
List(extract_function_parameters(body_elements:get(query_selectors.parameters)[1])) or
|
-- List(extract_function_parameters(body_elements:get(query_selectors.parameters)[1])) or
|
||||||
(List() as List<Variable_Info.Variable_Info>),
|
-- (List() as List<Variable_Info.Variable_Info>),
|
||||||
#body_elements:get(query_selectors.return_types) ~= 0 and
|
-- #body_elements:get(query_selectors.return_types) ~= 0 and
|
||||||
extract_function_return_types(body_elements:get(query_selectors.return_types)[1]) or
|
-- extract_function_return_types(body_elements:get(query_selectors.return_types)[1]) or
|
||||||
(List() as List<Type_Info.Type_Info>)
|
-- (List() as List<Type_Info.Type_Info>)
|
||||||
)
|
-- )
|
||||||
end
|
-- end
|
||||||
)
|
-- )
|
||||||
end
|
-- end
|
||||||
|
|
||||||
local function extract_section_variables(dl: string): { Variable_Info.Variable_Info }
|
local function extract_section_variables(dl: string): { Node }, { string }
|
||||||
local query_selectors = {
|
local query_selectors <const>: { string : string } = {
|
||||||
variable_name = "dt a",
|
variable_name = "dt a",
|
||||||
variable_summary_type = "dt span.summary_type",
|
variable_summary_type = "dt span.summary_type",
|
||||||
variable_property_constraint = "dd span.property_type",
|
variable_property_constraint = "dd span.property_type",
|
||||||
}
|
}
|
||||||
|
|
||||||
return scraper_utils.scrape_tuples(
|
local variables <const> = {}
|
||||||
|
local signals <const> = {}
|
||||||
|
|
||||||
|
for nodes in scraper_utils.iter_tuples(
|
||||||
dl,
|
dl,
|
||||||
{ query_selectors.variable_name, query_selectors.variable_summary_type, query_selectors.variable_property_constraint },
|
utils.values(query_selectors)
|
||||||
function(nodes: { string : scan.HTMLNode | nil }): Variable_Info.Variable_Info
|
) do
|
||||||
local variable_info = Variable_Info()
|
local node = ast.create_node("variable", extract_item_name(nodes[query_selectors.variable_name]))
|
||||||
|
node.types = parse_parameter_types(extract_node_text(nodes[query_selectors.variable_summary_type]))
|
||||||
|
|
||||||
variable_info.name = extract_item_name(nodes[query_selectors.variable_name])
|
if #node.types == 1 and node.types[1] == "string" then
|
||||||
variable_info.types = parse_parameter_types(extract_node_text(nodes[query_selectors.variable_summary_type]))
|
|
||||||
|
|
||||||
if #variable_info.types == 1 and variable_info.types[1].name == "string" then
|
|
||||||
log:debug("extract variable string with constraints, this is an enum")
|
log:debug("extract variable string with constraints, this is an enum")
|
||||||
variable_info.constraints = List(extract_property_constraints(nodes[query_selectors.variable_property_constraint])):map(
|
local type_enum <const> = ast.create_node("enum", utils.capitalize(node.name))
|
||||||
function(constraint: string): string
|
for _, constraint in ipairs(extract_property_constraints(nodes[query_selectors.variable_property_constraint])) do
|
||||||
return (constraint:gsub(""", ""))
|
table.insert(
|
||||||
end
|
type_enum.children,
|
||||||
|
ast.create_node("identifier", (constraint:gsub(""", "")))
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
table.insert(variables, type_enum)
|
||||||
return variable_info
|
node.types = { type_enum.name }
|
||||||
end
|
end
|
||||||
)
|
|
||||||
|
table.insert(variables, node)
|
||||||
|
table.insert(signals, string.format("property::%s", node.name)) -- TODO : actually scrape the signals from the doc
|
||||||
|
end
|
||||||
|
|
||||||
|
return variables, signals
|
||||||
end
|
end
|
||||||
|
|
||||||
local function extract_section_signal(dl: string): { string }
|
-- local function extract_section_signal(dl: string): { string }
|
||||||
local selector = "dt strong"
|
-- local selector = "dt strong"
|
||||||
|
|
||||||
return scraper_utils.scrape(dl, selector, extract_node_text)
|
-- return scraper_utils.scrape(dl, selector, extract_node_text)
|
||||||
end
|
-- end
|
||||||
|
|
||||||
local enum Section
|
local enum Section
|
||||||
"Constructors"
|
-- "Constructors"
|
||||||
"Static module functions"
|
-- "Static module functions"
|
||||||
"Object properties"
|
"Object properties"
|
||||||
"Object methods"
|
-- "Object methods"
|
||||||
"Signals"
|
-- "Signals"
|
||||||
end
|
end
|
||||||
|
|
||||||
local section_scrapers: { Section : function(html: string, module_doc: Module_Doc.Module_Doc) } = {
|
-- returns
|
||||||
["Constructors"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
-- - Nodes that should be added to the module
|
||||||
module_doc.constructors = List(extract_section_functions(html))
|
-- - Nodes that should be added to the global scope
|
||||||
end,
|
-- - Strings that should be added to the record Signals
|
||||||
["Static module functions"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
local section_scrapers <total>: { Section : function(html: string): { Node }, { Node }, { string } } = {
|
||||||
module_doc.static_functions = List(extract_section_functions(html))
|
-- ["Constructors"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
||||||
end,
|
-- module_doc.constructors = List(extract_section_functions(html))
|
||||||
["Object properties"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
-- end,
|
||||||
module_doc.properties = List(extract_section_variables(html))
|
-- ["Static module functions"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
||||||
end,
|
-- module_doc.static_functions = List(extract_section_functions(html))
|
||||||
["Object methods"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
-- end,
|
||||||
local self_parameter = Variable_Info("self", List({ Type_Info(module_doc.record_name) }))
|
["Object properties"] = function(html: string): { Node }, { Node }, { string }
|
||||||
module_doc.methods = List(extract_section_functions(html)):map(
|
local properties, signals = extract_section_variables(html)
|
||||||
function(method: Function_Info.Function_Info): Function_Info.Function_Info
|
return properties, {}, signals
|
||||||
method.parameters:insert(1, self_parameter)
|
|
||||||
return method
|
|
||||||
end
|
|
||||||
)
|
|
||||||
end,
|
|
||||||
["Signals"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
|
||||||
module_doc.signals = List(extract_section_signal(html))
|
|
||||||
end,
|
end,
|
||||||
|
-- ["Object methods"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
||||||
|
-- local self_parameter = Variable_Info("self", List({ Type_Info(module_doc.record_name) }))
|
||||||
|
-- module_doc.methods = List(extract_section_functions(html)):map(
|
||||||
|
-- function(method: Function_Info.Function_Info): Function_Info.Function_Info
|
||||||
|
-- method.parameters:insert(1, self_parameter)
|
||||||
|
-- return method
|
||||||
|
-- end
|
||||||
|
-- )
|
||||||
|
-- end,
|
||||||
|
-- ["Signals"] = function(html: string, module_doc: Module_Doc.Module_Doc)
|
||||||
|
-- module_doc.signals = List(extract_section_signal(html))
|
||||||
|
-- end,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- local function extract_node_module_name(node: Node): string
|
||||||
|
-- return (node.name:gsub("(.*)[%.:].+$", "%1"))
|
||||||
|
-- end
|
||||||
|
|
||||||
local module = {}
|
local module = {}
|
||||||
|
|
||||||
function module.get_doc_from_page(html: string, module_name: string): Module_Doc.Module_Doc
|
function module.get_doc_from_page(html: string, module_name: string): Node, { Node }
|
||||||
local nodes = scraper_utils.extract_nodes(html, {
|
local html_nodes = scraper_utils.extract_nodes(html, {
|
||||||
"h2.section-header",
|
"h2.section-header",
|
||||||
"dl.function",
|
"dl.function",
|
||||||
})
|
})
|
||||||
|
|
||||||
if #nodes:get "h2.section-header" ~= #nodes:get "dl.function" then
|
if #html_nodes:get "h2.section-header" ~= #html_nodes:get "dl.function" then
|
||||||
error "The list aren't the same size!"
|
error "The list aren't the same size!"
|
||||||
end
|
end
|
||||||
|
|
||||||
local module_doc = Module_Doc()
|
local record_name <const> = utils.capitalize((module_name:gsub(".*%.", "")))
|
||||||
module_doc.record_name = utils.capitalize((module_name:gsub(".*%.", "")))
|
local module_root <const> = ast.create_node("module", record_name)
|
||||||
|
local other_nodes <const>: { Node } = {}
|
||||||
|
|
||||||
for i = 1, #nodes:get("h2.section-header") do
|
local module_signals_node <const> = ast.create_node("enum", "Signal")
|
||||||
local h2 = nodes:get("h2.section-header")[i]
|
table.insert(module_root.children, module_signals_node)
|
||||||
|
|
||||||
|
for i = 1, #html_nodes:get("h2.section-header") do
|
||||||
|
local h2 = html_nodes:get("h2.section-header")[i]
|
||||||
local section_name = utils.sanitize_string(h2:inner_text()) as Section -- promote to Section, we then test if the section_name is in the table
|
local section_name = utils.sanitize_string(h2:inner_text()) as Section -- promote to Section, we then test if the section_name is in the table
|
||||||
local dl_html = nodes:get("dl.function")[i]:outer_html()
|
local dl_html = html_nodes:get("dl.function")[i]:outer_html()
|
||||||
|
|
||||||
if section_scrapers[section_name] then
|
if section_scrapers[section_name] then
|
||||||
section_scrapers[section_name](dl_html, module_doc)
|
local module_nodes, global_nodes, signals_name = section_scrapers[section_name](dl_html)
|
||||||
|
for _, node in ipairs(module_nodes) do
|
||||||
|
table.insert(module_root.children, node)
|
||||||
|
end
|
||||||
|
for _, node in ipairs(global_nodes) do
|
||||||
|
table.insert(other_nodes, node)
|
||||||
|
end
|
||||||
|
for _, signal_name in ipairs(signals_name) do
|
||||||
|
table.insert(
|
||||||
|
module_signals_node.children,
|
||||||
|
ast.create_node("identifier", signal_name)
|
||||||
|
)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
log:warn("Section scraper not implemented: " .. section_name)
|
log:warn("Section scraper not implemented: " .. section_name)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return module_doc
|
return module_root, other_nodes
|
||||||
end
|
end
|
||||||
|
|
||||||
return module
|
return module
|
||||||
|
|
|
@ -72,4 +72,44 @@ function scraper_utils.scrape_tuples<T>(html: string, query_selectors: { string
|
||||||
return ret
|
return ret
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function scraper_utils.iter_tuples(html: string, query_selectors: { string }): function(): { string : scan.HTMLNode }
|
||||||
|
local siblings: { string : { scan.HTMLNode } } = {}
|
||||||
|
for _, query_selector in ipairs(query_selectors) do
|
||||||
|
siblings[query_selector] = {}
|
||||||
|
end
|
||||||
|
|
||||||
|
scanner.scan_html(
|
||||||
|
html,
|
||||||
|
function(stack: scan.NodeStack)
|
||||||
|
for _, query_selector in ipairs(query_selectors) do
|
||||||
|
if stack:is(query_selector) then
|
||||||
|
table.insert(siblings[query_selector], stack:current())
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
)
|
||||||
|
|
||||||
|
local siblings_count = #siblings[query_selectors[1]]
|
||||||
|
for _, query_selector in ipairs(query_selectors) do
|
||||||
|
if #siblings[query_selector] ~= siblings_count then
|
||||||
|
error("Query selectors do not have the same number of siblings")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local i = 0
|
||||||
|
return function(): { string : scan.HTMLNode }
|
||||||
|
i = i + 1
|
||||||
|
if i > siblings_count then
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
local node_list: { string : scan.HTMLNode } = {}
|
||||||
|
for _, query_selector in ipairs(query_selectors) do
|
||||||
|
node_list[query_selector] = siblings[query_selector][i]
|
||||||
|
end
|
||||||
|
|
||||||
|
return node_list
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return scraper_utils
|
return scraper_utils
|
||||||
|
|
|
@ -35,6 +35,16 @@ function utils.map<T, U>(list: { T }, iteratee: function(value: T, position: int
|
||||||
return mapped
|
return mapped
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function utils.values<T>(t: table): { T }
|
||||||
|
local values: { T } = {}
|
||||||
|
|
||||||
|
for _, v in pairs(t) do
|
||||||
|
table.insert(values, v as T)
|
||||||
|
end
|
||||||
|
|
||||||
|
return values
|
||||||
|
end
|
||||||
|
|
||||||
function utils.sanitize_string(s: string): string
|
function utils.sanitize_string(s: string): string
|
||||||
return (stringx.strip(web_sanitize.extract_text(s)))
|
return (stringx.strip(web_sanitize.extract_text(s)))
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue