feat(scraper): function can populate `other_nodes`

To allow `extract_section_functions`  to populate `other_node`, we need
to use the `module_name` and migrate the function name scrap method to
use the actually displayed text instead of playing with the node `name`
attribute.

Because of this change, we also need to update
`extract_section_variables`. Not a big deal. BTW, we resolved an issue
where `string` variable can wrongly be detected as `enum`.
This commit is contained in:
Aire-One 2023-04-16 12:54:36 +02:00
parent 9156736682
commit 896c7f2f04
2 changed files with 38 additions and 20 deletions

View File

@ -30,6 +30,7 @@
"luasec",
"luasocket",
"mkdir",
"modname",
"rockspec",
"setopt",
"stringx",

View File

@ -24,8 +24,14 @@ local function parse_parameter_types(parameter_type: string): { string }
return types
end
local function extract_item_name(item_name_node: scan.HTMLNode): string
return item_name_node and ((item_name_node.attr.name as string):gsub("^.*[%.:]", ""))
local function extract_item_name(item_name_node: scan.HTMLNode): string, string | nil
if not item_name_node then
return
end
local module_name_node <const> = scraper_utils.find(item_name_node:outer_html(), "span.function_modname")[1]
local module_name = module_name_node and module_name_node:inner_text():gsub("[%.:]$", "")
local name <const> = item_name_node:inner_text():gsub("^.*[%.:](.+)%s*[%(%{].*[%)%}]", "%1")
return utils.sanitize_string(name), module_name and utils.sanitize_string(module_name) or nil
end
local function extract_function_parameters(table_html: string): { Node }
@ -81,21 +87,23 @@ local function extract_property_constraints(property_constraint_node: scan.HTMLN
)
end
local function extract_section_functions(dl: string): { Node }
local function extract_section_functions(dl: string, module_name: string | nil): { Node }, { Node}
local list_query_selectors <const>: { string : string } = {
function_name = "dt a",
function_name = "dt strong",
body = "dd",
}
local functions<const>: { Node } = {}
local functions <const>: { Node } = {}
local other_functions <const>: { Node } = {}
for nodes in scraper_utils.iter_tuples(
dl,
utils.values(list_query_selectors)
) do
local function_name <const>, function_module_name <const> = extract_item_name(nodes[list_query_selectors.function_name])
local function_node <const> = ast.create_node(
"function",
extract_item_name(nodes[list_query_selectors.function_name])
function_name
)
local body_html = nodes[list_query_selectors.body]:outer_html()
@ -110,15 +118,20 @@ local function extract_section_functions(dl: string): { Node }
extract_function_return_types(return_node:outer_html()) or
{}
table.insert(functions, function_node)
if module_name and function_module_name and module_name ~= function_module_name then
function_node.name = function_module_name .. "." .. function_node.name
table.insert(other_functions, function_node)
else
table.insert(functions, function_node)
end
end
return functions
return functions, other_functions
end
local function extract_section_variables(dl: string): { Node }, { string }
local query_selectors <const>: { string : string } = {
variable_name = "dt a",
variable_name = "dt strong",
variable_summary_type = "dt span.summary_type",
variable_property_constraint = "dd span.property_type",
}
@ -130,11 +143,11 @@ local function extract_section_variables(dl: string): { Node }, { string }
dl,
utils.values(query_selectors)
) do
local node = ast.create_node("variable", extract_item_name(nodes[query_selectors.variable_name]))
local node = ast.create_node("variable", (extract_item_name(nodes[query_selectors.variable_name])))
node.types = parse_parameter_types(extract_node_text(nodes[query_selectors.variable_summary_type]))
if #node.types == 1 and node.types[1] == "string" then
log:debug("extract variable string with constraints, this is an enum")
log:debug("extract variable string with constraints, this is an enum", { name = node.name })
local type_enum <const> = ast.create_node("enum", utils.capitalize(node.name))
for _, constraint in ipairs(extract_property_constraints(nodes[query_selectors.variable_property_constraint])) do
table.insert(
@ -142,8 +155,12 @@ local function extract_section_variables(dl: string): { Node }, { string }
ast.create_node("identifier", (constraint:gsub("&quot;", "")))
)
end
table.insert(variables, type_enum)
node.types = { type_enum.name }
if #type_enum.children == 0 then
log:debug("Enum has no children, get back to variable", { name = node.name })
else
table.insert(variables, type_enum)
node.types = { type_enum.name }
end
end
table.insert(variables, node)
@ -171,23 +188,23 @@ end
-- - Nodes that should be added to the module
-- - Nodes that should be added to the global scope
-- - Strings that should be added to the record Signals
local section_scrapers <total>: { Section : function(html: string, module_name: string): { Node }, { Node }, { string } } = {
local section_scrapers <total>: { Section : function(html: string, record_name: string, module_name: string): { Node }, { Node }, { string } } = {
["Constructors"] = function(html: string): { Node }, { Node }, { string }
return extract_section_functions(html), {}, {}
end,
["Static module functions"] = function(html: string): { Node }, { Node }, { string }
local static_functions = extract_section_functions(html)
return static_functions, {}, {}
["Static module functions"] = function(html: string, _: string, module_name: string): { Node }, { Node }, { string }
local static_functions, other_functions = extract_section_functions(html, module_name)
return static_functions, other_functions, {}
end,
["Object properties"] = function(html: string): { Node }, { Node }, { string }
local properties, signals = extract_section_variables(html)
return properties, {}, signals
end,
["Object methods"] = function(html: string, module_name: string): { Node }, { Node }, { string }
["Object methods"] = function(html: string, record_name: string): { Node }, { Node }, { string }
local methods <const> = extract_section_functions(html)
for _, method in ipairs(methods) do
local self_parameter = ast.create_node("variable", "self")
self_parameter.types = { module_name }
self_parameter.types = { record_name }
table.insert(method.parameters, 1, self_parameter)
end
return methods, {}, {}
@ -227,7 +244,7 @@ function module.get_doc_from_page(html: string, module_name: string): Node, { No
local dl_html = html_nodes:get("dl.function")[i]:outer_html()
if section_scrapers[section_name] then
local module_nodes, global_nodes, signals_name = section_scrapers[section_name](dl_html, record_name)
local module_nodes, global_nodes, signals_name = section_scrapers[section_name](dl_html, record_name, module_name)
for _, node in ipairs(module_nodes) do
table.insert(module_root.children, node)
end