From 1fa62b7955785628ae83c9c62b561261f6f02edf Mon Sep 17 00:00:00 2001 From: Aire-One Date: Mon, 31 Oct 2022 13:42:45 +0100 Subject: [PATCH] feat(scraper): basic Object Properties extractor --- src/awesomewm.d.tl/entity/Module_Doc.tl | 2 ++ src/awesomewm.d.tl/entity/Variable_Info.tl | 21 +++++++++++ src/awesomewm.d.tl/scraper/module_doc.tl | 41 +++++++++++++++++++--- 3 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 src/awesomewm.d.tl/entity/Variable_Info.tl diff --git a/src/awesomewm.d.tl/entity/Module_Doc.tl b/src/awesomewm.d.tl/entity/Module_Doc.tl index d8a687e..c964f39 100644 --- a/src/awesomewm.d.tl/entity/Module_Doc.tl +++ b/src/awesomewm.d.tl/entity/Module_Doc.tl @@ -1,5 +1,6 @@ local Function_Info = require "entity.Function_Info" local List = require "pl.List" +local Variable_Info = require "entity.Variable_Info" local record Module_Doc metamethod __call: function(Module_Doc): Module_Doc @@ -10,6 +11,7 @@ local record Module_Doc constructors: List methods: List + properties: List static_functions: List signals: List end diff --git a/src/awesomewm.d.tl/entity/Variable_Info.tl b/src/awesomewm.d.tl/entity/Variable_Info.tl new file mode 100644 index 0000000..22855fd --- /dev/null +++ b/src/awesomewm.d.tl/entity/Variable_Info.tl @@ -0,0 +1,21 @@ +local List = require "pl.List" + +local record Variable_Info + metamethod __call: function(Variable_Info): Variable_Info + + Variable_Info: Variable_Info + + name: string + types: List +end + +local __Variable_Info: metatable = { + __call = function(_self: Variable_Info): Variable_Info + return { + name = "", + types = List(), + } + end, +} + +return setmetatable({} as Variable_Info, __Variable_Info) diff --git a/src/awesomewm.d.tl/scraper/module_doc.tl b/src/awesomewm.d.tl/scraper/module_doc.tl index 80f74b3..00b957b 100644 --- a/src/awesomewm.d.tl/scraper/module_doc.tl +++ b/src/awesomewm.d.tl/scraper/module_doc.tl @@ -6,6 +6,7 @@ local scan = require "web_sanitize.query.scan_html" local scraper_utils = require "scraper.utils" local stringx = require "pl.stringx" local utils = require "utils" +local Variable_Info = require "entity.Variable_Info" local log = logger.log("scraper") @@ -21,8 +22,8 @@ local function parse_parameter_types(parameter_type: string): List return stringx.split(parameter_type, " or "):map(utils.sanitize_string) end -local function extract_function_name(function_name_node: scan.HTMLNode): string - return function_name_node and ((function_name_node.attr.name as string):gsub(".*:", "")) +local function extract_item_name(item_name_node: scan.HTMLNode): string + return item_name_node and ((item_name_node.attr.name as string):gsub(".*:", "")) end local function extract_function_parameters(function_parameters_node: scan.HTMLNode): { Function_Info.Parameter } @@ -54,6 +55,18 @@ local function extract_function_return_types(function_return_types_node: scan.HT return scraper_utils.scrape(html, selector, extract_node_text) end +local function extract_property_types(property_types_node: scan.HTMLNode): { string } + if not property_types_node then + return {} + end + + return scraper_utils.scrape( + property_types_node:outer_html(), + "tr.see_also_sublist", + extract_node_text + ) +end + local function extract_section_functions(dl: string): { Function_Info.Function_Info } local query_selectors = { function_name = "dt a", @@ -68,7 +81,7 @@ local function extract_section_functions(dl: string): { Function_Info.Function_I local function_info = Function_Info() function_info.name = - extract_function_name(nodes[query_selectors.function_name]) + extract_item_name(nodes[query_selectors.function_name]) function_info.parameters = List(extract_function_parameters(nodes[query_selectors.function_parameters])) function_info.return_types = List( @@ -82,6 +95,26 @@ local function extract_section_functions(dl: string): { Function_Info.Function_I ) end +local function extract_section_variables(dl: string): { Variable_Info.Variable_Info } + local query_selectors = { + variable_name = "dt a", + variable_type = "dd span.property_type", + } + + return scraper_utils.scrape_tuples( + dl, + { query_selectors.variable_name, query_selectors.variable_type }, + function(nodes: { string : scan.HTMLNode | nil }): Variable_Info.Variable_Info + local variable_info = Variable_Info() + + variable_info.name = extract_item_name(nodes[query_selectors.variable_name]) + variable_info.types = List(extract_property_types(nodes[query_selectors.variable_type])) + + return variable_info + end + ) +end + local function extract_section_signal(dl: string): { string } local selector = "dt strong" @@ -113,7 +146,7 @@ function module.get_doc_from_page(html: string, module_name: string): Module_Doc elseif section_name == "Static module functions" then module_doc.static_functions = List(extract_section_functions(dl_html)) elseif section_name == "Object properties" then - log:warn("Not implemented: Object properties") + module_doc.properties = List(extract_section_variables(dl_html)) elseif section_name == "Deprecated object properties" then log:warn("Not implemented: Deprecated object properties") elseif section_name == "Object methods" then