From 915673668246a42940840b3a58df55599eca2cc0 Mon Sep 17 00:00:00 2001 From: Aire-One Date: Wed, 12 Apr 2023 00:19:53 +0200 Subject: [PATCH] feat(scraper): implement all `section_scrapers` --- .vscode/settings.json | 1 + src/awesomewm.d.tl/scraper/module_doc.tl | 222 ++++++++++------------- src/awesomewm.d.tl/scraper/utils.tl | 26 +-- 3 files changed, 101 insertions(+), 148 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 9825638..3a1c509 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -34,6 +34,7 @@ "setopt", "stringx", "Stylua", + "sublist", "tablex", "tmpl", "wibox", diff --git a/src/awesomewm.d.tl/scraper/module_doc.tl b/src/awesomewm.d.tl/scraper/module_doc.tl index 230e82a..1bea452 100644 --- a/src/awesomewm.d.tl/scraper/module_doc.tl +++ b/src/awesomewm.d.tl/scraper/module_doc.tl @@ -28,77 +28,50 @@ local function extract_item_name(item_name_node: scan.HTMLNode): string return item_name_node and ((item_name_node.attr.name as string):gsub("^.*[%.:]", "")) end --- local function extract_function_parameter_Parameters(tr_node: scan.HTMLNode): { Variable_Info.Variable_Info } --- local query_selectors = { --- name = "span.parameter", --- types = "span.types" --- } +local function extract_function_parameters(table_html: string): { Node } + local current_record_parameter: Node = nil --- return scraper_utils.scrape_tuples( --- tr_node:outer_html(), --- { query_selectors.name, query_selectors.types }, --- function(nodes: { string : scan.HTMLNode | nil }): Variable_Info.Variable_Info --- return Variable_Info( --- extract_node_text(nodes[query_selectors.name] as scan.HTMLNode), --- parse_parameter_types(extract_node_text(nodes[query_selectors.types] as scan.HTMLNode)) --- ) --- end) --- end + return scraper_utils.scrape(table_html, "tr", function(tr: scan.HTMLNode): Node + local tr_html = tr:outer_html() + local name_node = scraper_utils.find(tr_html, "span.parameter")[1] + local types_node = scraper_utils.find(tr_html, "span.types")[1] + if not name_node or not types_node then + return nil + end --- local function extract_function_parameters(function_parameters_node: scan.HTMLNode): { Variable_Info.Variable_Info } --- local current_record_parameter: Type_Info.Type_Info | nil = nil + local name = extract_node_text(name_node) + local types = parse_parameter_types(extract_node_text(types_node)) --- return scraper_utils.scrape( --- function_parameters_node:outer_html(), --- "tr", --- function(line_node: scan.HTMLNode): Variable_Info.Variable_Info --- local parameters = extract_function_parameter_Parameters(line_node) --- if #parameters == 0 then --- return nil --- elseif #parameters ~= 1 then --- log:error(logger.message_with_metadata("Expected 1 parameter by node", --- { len = #parameters, line_node = line_node, parameters = parameters })) --- error("Expected 1 parameter by node") --- end --- local name, types = parameters[1].name, parameters[1].types + if tr.attr ~= nil and tr.attr.class == "see_also_sublist" and current_record_parameter then + local field = ast.create_node("variable", name) + field.types = types + table.insert(current_record_parameter.children, field) + return nil + end --- if line_node.attr ~= nil and line_node.attr.class == "see_also_sublist" and current_record_parameter then --- local record_parameter = current_record_parameter as Type_Info.Type_Info --- if not record_parameter.record_entries then --- record_parameter.record_entries = Map() --- end + -- We wrongly tried to convert a table to a record + if current_record_parameter then + current_record_parameter.token = "variable" + current_record_parameter.name = utils.lowercase(current_record_parameter.name) + current_record_parameter.types = { "table" } + current_record_parameter.children = nil + current_record_parameter = nil + end --- (record_parameter.record_entries as Map>):set(name, types) + if #types == 1 and types[1] == "table" then + current_record_parameter = ast.create_node("record", utils.capitalize(name)) + return current_record_parameter + end --- return nil --- end + local field = ast.create_node("variable", name) + field.types = types + return field + end) +end --- if #types == 1 and types[1].name == "table" then --- local record_name = utils.capitalize(name) --- current_record_parameter = Type_Info(record_name) --- return Variable_Info( --- name, --- List({ current_record_parameter }) --- ) --- end - --- return Variable_Info(name, types) --- end) --- end - --- local function extract_function_return_types(function_return_types_node: scan.HTMLNode): List --- if not function_return_types_node then --- return {} --- end - --- local selector = "span.types .type" --- local html = function_return_types_node:outer_html() - --- return List(scraper_utils.scrape(html, selector, extract_node_text)):map( --- function(type_name: string): Type_Info.Type_Info --- return Type_Info(type_name) --- end) --- end +local function extract_function_return_types(ol_html: string): { string } + return scraper_utils.scrape(ol_html, "span.types .type", extract_node_text) +end local function extract_property_constraints(property_constraint_node: scan.HTMLNode): { string } return scraper_utils.scrape( @@ -108,50 +81,40 @@ local function extract_property_constraints(property_constraint_node: scan.HTMLN ) end --- local function extract_section_functions(dl: string): { Function_Info.Function_Info } --- local query_selectors = { --- header = "dt", --- name = "a", --- body = "dd", --- parameters = "table", --- return_types = "ol", --- } +local function extract_section_functions(dl: string): { Node } + local list_query_selectors : { string : string } = { + function_name = "dt a", + body = "dd", + } --- return scraper_utils.scrape_tuples( --- dl, --- { query_selectors.header, query_selectors.body }, --- function(nodes: { string : scan.HTMLNode | nil }): Function_Info.Function_Info --- if not nodes[query_selectors.header] or not nodes[query_selectors.body] then --- log:warn( --- logger.message_with_metadata( --- "Missing header or body", --- { nodes = nodes } --- ) --- ) --- error("Missing header or body") --- end --- local header = nodes[query_selectors.header] as scan.HTMLNode --- local body = nodes[query_selectors.body] as scan.HTMLNode --- local body_elements = scraper_utils.extract_nodes( --- body:outer_html(), --- { query_selectors.parameters, query_selectors.return_types } --- ) --- return Function_Info( --- scraper_utils.scrape( --- header:outer_html(), --- query_selectors.name, --- extract_item_name --- )[1], --- #body_elements:get(query_selectors.parameters) ~= 0 and --- List(extract_function_parameters(body_elements:get(query_selectors.parameters)[1])) or --- (List() as List), --- #body_elements:get(query_selectors.return_types) ~= 0 and --- extract_function_return_types(body_elements:get(query_selectors.return_types)[1]) or --- (List() as List) --- ) --- end --- ) --- end + local functions: { Node } = {} + + for nodes in scraper_utils.iter_tuples( + dl, + utils.values(list_query_selectors) + ) do + local function_node = ast.create_node( + "function", + extract_item_name(nodes[list_query_selectors.function_name]) + ) + + local body_html = nodes[list_query_selectors.body]:outer_html() + + local parameter_node = scraper_utils.find(body_html, "table")[1] + function_node.parameters = parameter_node and + extract_function_parameters(parameter_node:outer_html()) or + {} + + local return_node = scraper_utils.find(body_html, "ol")[1] + function_node.return_types = return_node and + extract_function_return_types(return_node:outer_html()) or + {} + + table.insert(functions, function_node) + end + + return functions +end local function extract_section_variables(dl: string): { Node }, { string } local query_selectors : { string : string } = { @@ -160,8 +123,8 @@ local function extract_section_variables(dl: string): { Node }, { string } variable_property_constraint = "dd span.property_type", } - local variables = {} - local signals = {} + local variables : { Node } = {} + local signals : { string } = {} for nodes in scraper_utils.iter_tuples( dl, @@ -197,10 +160,10 @@ local function extract_section_signal(dl: string): { string } end local enum Section - -- "Constructors" - -- "Static module functions" + "Constructors" + "Static module functions" "Object properties" - -- "Object methods" + "Object methods" "Signals" end @@ -208,26 +171,27 @@ end -- - Nodes that should be added to the module -- - Nodes that should be added to the global scope -- - Strings that should be added to the record Signals -local section_scrapers : { Section : function(html: string): { Node }, { Node }, { string } } = { - -- ["Constructors"] = function(html: string, module_doc: Module_Doc.Module_Doc) - -- module_doc.constructors = List(extract_section_functions(html)) - -- end, - -- ["Static module functions"] = function(html: string, module_doc: Module_Doc.Module_Doc) - -- module_doc.static_functions = List(extract_section_functions(html)) - -- end, +local section_scrapers : { Section : function(html: string, module_name: string): { Node }, { Node }, { string } } = { + ["Constructors"] = function(html: string): { Node }, { Node }, { string } + return extract_section_functions(html), {}, {} + end, + ["Static module functions"] = function(html: string): { Node }, { Node }, { string } + local static_functions = extract_section_functions(html) + return static_functions, {}, {} + end, ["Object properties"] = function(html: string): { Node }, { Node }, { string } local properties, signals = extract_section_variables(html) return properties, {}, signals end, - -- ["Object methods"] = function(html: string, module_doc: Module_Doc.Module_Doc) - -- local self_parameter = Variable_Info("self", List({ Type_Info(module_doc.record_name) })) - -- module_doc.methods = List(extract_section_functions(html)):map( - -- function(method: Function_Info.Function_Info): Function_Info.Function_Info - -- method.parameters:insert(1, self_parameter) - -- return method - -- end - -- ) - -- end, + ["Object methods"] = function(html: string, module_name: string): { Node }, { Node }, { string } + local methods = extract_section_functions(html) + for _, method in ipairs(methods) do + local self_parameter = ast.create_node("variable", "self") + self_parameter.types = { module_name } + table.insert(method.parameters, 1, self_parameter) + end + return methods, {}, {} + end, ["Signals"] = function(html: string): { Node }, { Node }, { string } local signals = extract_section_signal(html) return {}, {}, signals @@ -263,7 +227,7 @@ function module.get_doc_from_page(html: string, module_name: string): Node, { No local dl_html = html_nodes:get("dl.function")[i]:outer_html() if section_scrapers[section_name] then - local module_nodes, global_nodes, signals_name = section_scrapers[section_name](dl_html) + local module_nodes, global_nodes, signals_name = section_scrapers[section_name](dl_html, record_name) for _, node in ipairs(module_nodes) do table.insert(module_root.children, node) end diff --git a/src/awesomewm.d.tl/scraper/utils.tl b/src/awesomewm.d.tl/scraper/utils.tl index 33032ca..8454e03 100644 --- a/src/awesomewm.d.tl/scraper/utils.tl +++ b/src/awesomewm.d.tl/scraper/utils.tl @@ -48,28 +48,16 @@ function scraper_utils.extract_nodes(html: string, query_selectors: { string }): return siblings end -function scraper_utils.scrape_tuples(html: string, query_selectors: { string }, extract_callback: function(tuple: { string : scan.HTMLNode | nil }): T): { T } - local nodes = scraper_utils.extract_nodes(html, query_selectors) +function scraper_utils.find(html: string, query_selector: string): { scan.HTMLNode } + local nodes: { scan.HTMLNode } = {} - local ret: { T } = {} - - for i = 1, #nodes:get(query_selectors[1]) do - local node_list: { string : scan.HTMLNode | nil } = {} - tablex.foreach(query_selectors, function(query_selector: string) - node_list[query_selector] = nodes:get(query_selector)[i] or nil - end) - local success, info_or_error = pcall(extract_callback, node_list) - - if not success then - local error_message = info_or_error as string - log:error(logger.message_with_metadata("Extraction error", { error = error_message })) - else - local info = info_or_error as T - table.insert(ret, info) + scanner.scan_html(html, function(stack: scan.NodeStack) + if stack:is(query_selector) then + table.insert(nodes, stack:current()) end - end + end) - return ret + return nodes end function scraper_utils.iter_tuples(html: string, query_selectors: { string }): function(): { string : scan.HTMLNode }