feat(scraper): basic Object Properties extractor
ci/woodpecker/pr/lint Pipeline was successful Details
ci/woodpecker/pr/build Pipeline was successful Details

This commit is contained in:
Aire-One 2022-10-31 13:42:45 +01:00
parent ee9d23afd6
commit 1fa62b7955
3 changed files with 60 additions and 4 deletions

View File

@ -1,5 +1,6 @@
local Function_Info = require "entity.Function_Info"
local List = require "pl.List"
local Variable_Info = require "entity.Variable_Info"
local record Module_Doc
metamethod __call: function(Module_Doc): Module_Doc
@ -10,6 +11,7 @@ local record Module_Doc
constructors: List<Function_Info.Function_Info>
methods: List<Function_Info.Function_Info>
properties: List<Variable_Info.Variable_Info>
static_functions: List<Function_Info.Function_Info>
signals: List<string>
end

View File

@ -0,0 +1,21 @@
local List = require "pl.List"
local record Variable_Info
metamethod __call: function(Variable_Info): Variable_Info
Variable_Info: Variable_Info
name: string
types: List<string>
end
local __Variable_Info: metatable<Variable_Info> = {
__call = function(_self: Variable_Info): Variable_Info
return {
name = "",
types = List(),
}
end,
}
return setmetatable({} as Variable_Info, __Variable_Info)

View File

@ -6,6 +6,7 @@ local scan = require "web_sanitize.query.scan_html"
local scraper_utils = require "scraper.utils"
local stringx = require "pl.stringx"
local utils = require "utils"
local Variable_Info = require "entity.Variable_Info"
local log = logger.log("scraper")
@ -21,8 +22,8 @@ local function parse_parameter_types(parameter_type: string): List<string>
return stringx.split(parameter_type, " or "):map(utils.sanitize_string)
end
local function extract_function_name(function_name_node: scan.HTMLNode): string
return function_name_node and ((function_name_node.attr.name as string):gsub(".*:", ""))
local function extract_item_name(item_name_node: scan.HTMLNode): string
return item_name_node and ((item_name_node.attr.name as string):gsub(".*:", ""))
end
local function extract_function_parameters(function_parameters_node: scan.HTMLNode): { Function_Info.Parameter }
@ -54,6 +55,18 @@ local function extract_function_return_types(function_return_types_node: scan.HT
return scraper_utils.scrape(html, selector, extract_node_text)
end
local function extract_property_types(property_types_node: scan.HTMLNode): { string }
if not property_types_node then
return {}
end
return scraper_utils.scrape(
property_types_node:outer_html(),
"tr.see_also_sublist",
extract_node_text
)
end
local function extract_section_functions(dl: string): { Function_Info.Function_Info }
local query_selectors = {
function_name = "dt a",
@ -68,7 +81,7 @@ local function extract_section_functions(dl: string): { Function_Info.Function_I
local function_info = Function_Info()
function_info.name =
extract_function_name(nodes[query_selectors.function_name])
extract_item_name(nodes[query_selectors.function_name])
function_info.parameters =
List(extract_function_parameters(nodes[query_selectors.function_parameters]))
function_info.return_types = List(
@ -82,6 +95,26 @@ local function extract_section_functions(dl: string): { Function_Info.Function_I
)
end
local function extract_section_variables(dl: string): { Variable_Info.Variable_Info }
local query_selectors = {
variable_name = "dt a",
variable_type = "dd span.property_type",
}
return scraper_utils.scrape_tuples(
dl,
{ query_selectors.variable_name, query_selectors.variable_type },
function(nodes: { string : scan.HTMLNode | nil }): Variable_Info.Variable_Info
local variable_info = Variable_Info()
variable_info.name = extract_item_name(nodes[query_selectors.variable_name])
variable_info.types = List(extract_property_types(nodes[query_selectors.variable_type]))
return variable_info
end
)
end
local function extract_section_signal(dl: string): { string }
local selector = "dt strong"
@ -113,7 +146,7 @@ function module.get_doc_from_page(html: string, module_name: string): Module_Doc
elseif section_name == "Static module functions" then
module_doc.static_functions = List(extract_section_functions(dl_html))
elseif section_name == "Object properties" then
log:warn("Not implemented: Object properties")
module_doc.properties = List(extract_section_variables(dl_html))
elseif section_name == "Deprecated object properties" then
log:warn("Not implemented: Deprecated object properties")
elseif section_name == "Object methods" then