From 483cfa6a6f36377353fd22ffcc790e677624fa81 Mon Sep 17 00:00:00 2001 From: Aire-One Date: Sun, 2 Oct 2022 20:55:42 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20move=20sources=20to=20Teal=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../crawler/{init.lua => init.tl} | 8 +- src/awesomewm.d.tl/entities/Function_Info.lua | 23 ------ src/awesomewm.d.tl/entities/Function_Info.tl | 30 ++++++++ src/awesomewm.d.tl/entities/Module_Doc.lua | 12 --- src/awesomewm.d.tl/entities/Module_Doc.tl | 12 +++ src/awesomewm.d.tl/entities/Module_Info.lua | 10 --- src/awesomewm.d.tl/entities/Module_Info.tl | 8 ++ .../generator/{init.lua => init.tl} | 24 ++++-- .../generator/{snippets.lua => snippets.tl} | 26 +++++-- src/awesomewm.d.tl/{init.lua => init.tl} | 0 src/awesomewm.d.tl/logger/init.lua | 18 ----- src/awesomewm.d.tl/logger/init.tl | 17 +++++ src/awesomewm.d.tl/properties.lua | 66 ----------------- src/awesomewm.d.tl/properties.tl | 72 ++++++++++++++++++ .../scraper/{init.lua => init.tl} | 0 .../scraper/{module_doc.lua => module_doc.tl} | 31 ++++---- ...dule_info_list.lua => module_info_list.tl} | 7 +- src/awesomewm.d.tl/scraper/utils.lua | 68 ----------------- src/awesomewm.d.tl/scraper/utils.tl | 73 +++++++++++++++++++ src/awesomewm.d.tl/utils.lua | 70 ------------------ src/awesomewm.d.tl/utils.tl | 72 ++++++++++++++++++ 21 files changed, 348 insertions(+), 299 deletions(-) rename src/awesomewm.d.tl/crawler/{init.lua => init.tl} (86%) delete mode 100644 src/awesomewm.d.tl/entities/Function_Info.lua create mode 100644 src/awesomewm.d.tl/entities/Function_Info.tl delete mode 100644 src/awesomewm.d.tl/entities/Module_Doc.lua create mode 100644 src/awesomewm.d.tl/entities/Module_Doc.tl delete mode 100644 src/awesomewm.d.tl/entities/Module_Info.lua create mode 100644 src/awesomewm.d.tl/entities/Module_Info.tl rename src/awesomewm.d.tl/generator/{init.lua => init.tl} (71%) rename src/awesomewm.d.tl/generator/{snippets.lua => snippets.tl} (64%) rename src/awesomewm.d.tl/{init.lua => init.tl} (100%) delete mode 100644 src/awesomewm.d.tl/logger/init.lua create mode 100644 src/awesomewm.d.tl/logger/init.tl delete mode 100644 src/awesomewm.d.tl/properties.lua create mode 100644 src/awesomewm.d.tl/properties.tl rename src/awesomewm.d.tl/scraper/{init.lua => init.tl} (100%) rename src/awesomewm.d.tl/scraper/{module_doc.lua => module_doc.tl} (64%) rename src/awesomewm.d.tl/scraper/{module_info_list.lua => module_info_list.tl} (70%) delete mode 100644 src/awesomewm.d.tl/scraper/utils.lua create mode 100644 src/awesomewm.d.tl/scraper/utils.tl delete mode 100644 src/awesomewm.d.tl/utils.lua create mode 100644 src/awesomewm.d.tl/utils.tl diff --git a/src/awesomewm.d.tl/crawler/init.lua b/src/awesomewm.d.tl/crawler/init.tl similarity index 86% rename from src/awesomewm.d.tl/crawler/init.lua rename to src/awesomewm.d.tl/crawler/init.tl index 1305b9c..c1d745c 100644 --- a/src/awesomewm.d.tl/crawler/init.lua +++ b/src/awesomewm.d.tl/crawler/init.tl @@ -6,7 +6,7 @@ local tablex = require "pl.tablex" local crawler = {} -local function http_request(url) +local function http_request(url: string): string local body, code_or_error = http.request(url) if not body then @@ -14,7 +14,7 @@ local function http_request(url) error { "Request failed", err = err, url = url } end - local code = code_or_error + local code = code_or_error as integer if code < 200 and code >= 300 then error { @@ -27,7 +27,7 @@ local function http_request(url) return body end -local function get_resource_by_protocol(url) +local function get_resource_by_protocol(url: string): boolean, string local protocol, resource = url:match "^(%a+)://(.*)$" if not not tablex.find({ "http", "https" }, protocol) then @@ -45,7 +45,7 @@ local function get_resource_by_protocol(url) end end -function crawler.fetch(url) +function crawler.fetch(url: string): string local success, result = get_resource_by_protocol(url) if not success then diff --git a/src/awesomewm.d.tl/entities/Function_Info.lua b/src/awesomewm.d.tl/entities/Function_Info.lua deleted file mode 100644 index 956ac11..0000000 --- a/src/awesomewm.d.tl/entities/Function_Info.lua +++ /dev/null @@ -1,23 +0,0 @@ -local class = require "pl.class" -local List = require "pl.List" - -local Function_Info = class.Module_Doc() - -function Function_Info:_init() - self.name = "" - self.parameters = List() - self.return_types = List() -end - -function Function_Info:append_return_type(return_type) - self.return_types:append(return_type) -end - -function Function_Info:append_parameter(name, type) - self.parameters:append { - name = name, - type = type, - } -end - -return Function_Info diff --git a/src/awesomewm.d.tl/entities/Function_Info.tl b/src/awesomewm.d.tl/entities/Function_Info.tl new file mode 100644 index 0000000..4e20839 --- /dev/null +++ b/src/awesomewm.d.tl/entities/Function_Info.tl @@ -0,0 +1,30 @@ +local List = require "pl.List" + +local record Parameter + name: string + type: string +end + +local record Function_Info + metamethod __call: function(Function_Info): Function_Info + + name: string + parameters: List + return_types: List + + append_parameter: function(self: Function_Info, parameter: string) + append_return_type: function(self: Function_Info, return_type: string) +end + +function Function_Info:append_parameter(name: string, type: string) + self.parameters:append { + name = name, + type = type, + } +end + +function Function_Info:append_return_type(return_type: string) + self.return_types:append(return_type) +end + +return Function_Info diff --git a/src/awesomewm.d.tl/entities/Module_Doc.lua b/src/awesomewm.d.tl/entities/Module_Doc.lua deleted file mode 100644 index 48ddc31..0000000 --- a/src/awesomewm.d.tl/entities/Module_Doc.lua +++ /dev/null @@ -1,12 +0,0 @@ -local class = require "pl.class" -local List = require "pl.List" - -local Module_Doc = class.Module_Doc() - -function Module_Doc:_init() - self.constructors = List() - self.methods = List() - self.static_functions = List() -end - -return Module_Doc diff --git a/src/awesomewm.d.tl/entities/Module_Doc.tl b/src/awesomewm.d.tl/entities/Module_Doc.tl new file mode 100644 index 0000000..165b31f --- /dev/null +++ b/src/awesomewm.d.tl/entities/Module_Doc.tl @@ -0,0 +1,12 @@ +local Function_Info = require "entities.Function_Info" +local List = require "pl.List" + +local record Module_Doc + metamethod __call: function(Module_Doc): Module_Doc + + constructors: List + methods: List + static_functions: List +end + +return Module_Doc diff --git a/src/awesomewm.d.tl/entities/Module_Info.lua b/src/awesomewm.d.tl/entities/Module_Info.lua deleted file mode 100644 index df163d9..0000000 --- a/src/awesomewm.d.tl/entities/Module_Info.lua +++ /dev/null @@ -1,10 +0,0 @@ -local class = require "pl.class" - -local Module_Info = class.Module_Info() - -function Module_Info:_init(name, uri) - self.name = name - self.uri = uri -end - -return Module_Info diff --git a/src/awesomewm.d.tl/entities/Module_Info.tl b/src/awesomewm.d.tl/entities/Module_Info.tl new file mode 100644 index 0000000..9c2380d --- /dev/null +++ b/src/awesomewm.d.tl/entities/Module_Info.tl @@ -0,0 +1,8 @@ +local record Module_Info + metamethod __call: function(Module_Info, name: string, uri: string): Module_Info + + name: string + uri: string +end + +return Module_Info diff --git a/src/awesomewm.d.tl/generator/init.lua b/src/awesomewm.d.tl/generator/init.tl similarity index 71% rename from src/awesomewm.d.tl/generator/init.lua rename to src/awesomewm.d.tl/generator/init.tl index 335bb79..d72d20d 100644 --- a/src/awesomewm.d.tl/generator/init.lua +++ b/src/awesomewm.d.tl/generator/init.tl @@ -5,7 +5,7 @@ local log = require "logger" local utils = require "utils" local snippets = require "generator.snippets" -local tmpl = (function(mod) +local tmpl = (function(mod: string): string local package_path = utils.do_or_fail(path.package_path, mod) local package_dir = path.dirname(package_path) return utils.do_or_fail(file.read, package_dir .. "/template.tl.tmpl", false) @@ -13,10 +13,22 @@ end)(...) local generator = {} -function generator.generate_teal(data) +local record Generate_Teal_Data_Record + section: string + items: { snippets.Anonymous_Function_Record } +end +function generator.generate_teal(data: { Generate_Teal_Data_Record }): string -- TODO : add the required modules to the generated code -- TODO : replace this with a proper way to get the module name (will also probably need the module path) - local module_data = { name = "module_name" } + local record Module_Data_Record + name: string + static_functions: { snippets.Anonymous_Function_Record } + constructors: { snippets.Anonymous_Function_Record } + methods: { string } + properties: { snippets.Anonymous_Function_Record } + signals: { snippets.Anonymous_Function_Record } + end + local module_data: Module_Data_Record = { name = "module_name" } for _, item in ipairs(data) do if item.section == "Static functions" then -- TODO @@ -49,19 +61,19 @@ function generator.generate_teal(data) return utils.do_or_fail(template.substitute, tmpl, env) end -function generator.write(file_content, file_path) +function generator.write(file_content:string, file_path: string) -- Make sure the directory we want to write the file to exists local directory = path.dirname(file_path) if not path.isdir(directory) then path.mkdir(directory) end - local success, error = file.write(file_path, file_content, false) + local success, error_message = file.write(file_path, file_content, false) if not success then log:error { "generator.write error", - error = error, + error = error_message, } return end diff --git a/src/awesomewm.d.tl/generator/snippets.lua b/src/awesomewm.d.tl/generator/snippets.tl similarity index 64% rename from src/awesomewm.d.tl/generator/snippets.lua rename to src/awesomewm.d.tl/generator/snippets.tl index 9d29310..78ad3dc 100644 --- a/src/awesomewm.d.tl/generator/snippets.lua +++ b/src/awesomewm.d.tl/generator/snippets.tl @@ -2,10 +2,10 @@ local utils = require "utils" local template = require "pl.template" -- Refactor scraper code to use pl.List objects -local function join(arr, delim) +local function join(arr: { string }, delim: string): string local ret = "" - for i, type in ipairs(arr) do - ret = ret .. type + for i, t in ipairs(arr) do + ret = ret .. t if i < #arr then ret = ret .. delim end @@ -13,13 +13,27 @@ local function join(arr, delim) return ret end -local snippets = {} +local record Anonymous_Function_Parameter_Record + name: string + type: { string } +end -function snippets.types_list(types) +local record Anonymous_Function_Record + name: string + parameters: { Anonymous_Function_Parameter_Record } + returns: { string } +end + +local snippets = { + Anonymous_Function_Parameter_Record = Anonymous_Function_Parameter_Record, + Anonymous_Function_Record = Anonymous_Function_Record, +} + +function snippets.types_list(types: { string }): string return join(types, ", ") end -function snippets.anonymous_function(item) +function snippets.anonymous_function(item: Anonymous_Function_Record): string local parameters_string = "" if item.parameters then for i, param in ipairs(item.parameters) do diff --git a/src/awesomewm.d.tl/init.lua b/src/awesomewm.d.tl/init.tl similarity index 100% rename from src/awesomewm.d.tl/init.lua rename to src/awesomewm.d.tl/init.tl diff --git a/src/awesomewm.d.tl/logger/init.lua b/src/awesomewm.d.tl/logger/init.lua deleted file mode 100644 index 76a4f92..0000000 --- a/src/awesomewm.d.tl/logger/init.lua +++ /dev/null @@ -1,18 +0,0 @@ -local ansicolors = require "ansicolors" -local console = require "logging.console" -local ll = require "logging" - -local log = console { - logLevel = ll.DEBUG, - destination = "stdout", - timestampPattern = "[%y-%m-%d %H:%M:%S]", - logPatterns = { - [ll.DEBUG] = ansicolors "%date%{cyan} %level %message %{reset}(%source)\n", - [ll.INFO] = ansicolors "%date %level %message\n", - [ll.WARN] = ansicolors "%date%{yellow} %level %message\n", - [ll.ERROR] = ansicolors "%date%{red bright} %level %message %{reset}(%source)\n", - [ll.FATAL] = ansicolors "%date%{magenta bright} %level %message %{reset}(%source)\n", - }, -} - -return log diff --git a/src/awesomewm.d.tl/logger/init.tl b/src/awesomewm.d.tl/logger/init.tl new file mode 100644 index 0000000..7470b33 --- /dev/null +++ b/src/awesomewm.d.tl/logger/init.tl @@ -0,0 +1,17 @@ +local ansicolors = require "ansicolors" +local logging_console = require "logging.console" + +local log = logging_console { + logLevel = "DEBUG", + destination = "stdout", + timestampPattern = "[%y-%m-%d %H:%M:%S]", + logPatterns = { + DEBUG = ansicolors "%date%{cyan} %level %message %{reset}(%source)\n", + INFO = ansicolors "%date %level %message\n", + WARN = ansicolors "%date%{yellow} %level %message\n", + ERROR = ansicolors "%date%{red bright} %level %message %{reset}(%source)\n", + FATAL = ansicolors "%date%{magenta bright} %level %message %{reset}(%source)\n", + }, +} + +return log diff --git a/src/awesomewm.d.tl/properties.lua b/src/awesomewm.d.tl/properties.lua deleted file mode 100644 index 0ca6f9a..0000000 --- a/src/awesomewm.d.tl/properties.lua +++ /dev/null @@ -1,66 +0,0 @@ -local properties = {} - --- properties.base_url = "https://awesomewm.org/apidoc" -properties.base_url = "file:///usr/share/doc/awesome/doc" - -properties.index_uri = "/index.html" - -properties.out_directory = "generated" - ---- Pages from the navigation menu to ignore. --- Sets to ignore documentations and sample file. I also added libraries with --- low quality API documentation, I'll probably work on them later, lets start --- with what works the best first. -properties.ignored_modules = { - -- Sample files - "rc.lua", - "theme.lua", - - -- Utility libraries - "gears.debug", - "gears.filesystem", - "gears.geometry", - "gears.math", - "gears.object", - "gears.protected_call", - "gears.sort", - "gears.string", - "gears.table", - "gears.wallpaper", - - -- Theme related libraries - "beautiful", - "gears.color", - "gears.shape", - - -- Classes - "awful.widget.common", - "gears.cache", - "gears.matrix", - "menubar.icon_theme", - "menubar.index_theme", - "signals", - "wibox.drawable", - "wibox.hierarchy", - "wibox.widget.base", - "xproperties", - - -- Documentation - "Authors", - "Readme", - "Contributing", - "The Widget system", - "Creating new widget", - "Default configuration file documentation", - "Change Awesome appearance", - "My first Awesome", - "The AwesomeWM client layout system", - "Startup options", - "Building and Testing", - "Using Cairo and LGI", - "Tips for upgrading your configuration", - "NEWS", - "FAQ", -} - -return properties diff --git a/src/awesomewm.d.tl/properties.tl b/src/awesomewm.d.tl/properties.tl new file mode 100644 index 0000000..5ce4a59 --- /dev/null +++ b/src/awesomewm.d.tl/properties.tl @@ -0,0 +1,72 @@ +local record Properties + base_url: string + index_uri: string + + out_directory: string + + --- Pages from the navigation menu to ignore. + -- Sets to ignore documentations and sample file. I also added libraries with + -- low quality API documentation, I'll probably work on them later, lets start + -- with what works the best first. + ignored_modules: { string } +end + +local properties: Properties = { + -- base_url = "https://awesomewm.org/apidoc", + base_url = "file:///usr/share/doc/awesome/doc", + index_uri = "/index.html", + out_directory = "generated", + ignored_modules = { + -- Sample files + "rc.lua", + "theme.lua", + + -- Utility libraries + "gears.debug", + "gears.filesystem", + "gears.geometry", + "gears.math", + "gears.object", + "gears.protected_call", + "gears.sort", + "gears.string", + "gears.table", + "gears.wallpaper", + + -- Theme related libraries + "beautiful", + "gears.color", + "gears.shape", + + -- Classes + "awful.widget.common", + "gears.cache", + "gears.matrix", + "menubar.icon_theme", + "menubar.index_theme", + "signals", + "wibox.drawable", + "wibox.hierarchy", + "wibox.widget.base", + "xproperties", + + -- Documentation + "Authors", + "Readme", + "Contributing", + "The Widget system", + "Creating new widget", + "Default configuration file documentation", + "Change Awesome appearance", + "My first Awesome", + "The AwesomeWM client layout system", + "Startup options", + "Building and Testing", + "Using Cairo and LGI", + "Tips for upgrading your configuration", + "NEWS", + "FAQ", + } +} + +return properties diff --git a/src/awesomewm.d.tl/scraper/init.lua b/src/awesomewm.d.tl/scraper/init.tl similarity index 100% rename from src/awesomewm.d.tl/scraper/init.lua rename to src/awesomewm.d.tl/scraper/init.tl diff --git a/src/awesomewm.d.tl/scraper/module_doc.lua b/src/awesomewm.d.tl/scraper/module_doc.tl similarity index 64% rename from src/awesomewm.d.tl/scraper/module_doc.lua rename to src/awesomewm.d.tl/scraper/module_doc.tl index be2efb6..304745e 100644 --- a/src/awesomewm.d.tl/scraper/module_doc.lua +++ b/src/awesomewm.d.tl/scraper/module_doc.tl @@ -1,13 +1,15 @@ local Function_Info = require "entities.Function_Info" +local List = require "pl.List" local Module_Doc = require "entities.Module_Doc" +local scan = require "web_sanitize.query.scan_html" local scraper_utils = require "scraper.utils" local utils = require "utils" -local function extract_function_name(function_name_node) - return function_name_node and (function_name_node.attr.name:gsub(".*:", "")) +local function extract_function_name(function_name_node: scan.HTMLNode): string + return function_name_node and ((function_name_node.attr.name as string):gsub(".*:", "")) end -local function extract_function_return_types(function_return_types_node) +local function extract_function_return_types(function_return_types_node: scan.HTMLNode): { string } if not function_return_types_node then return {} end @@ -15,12 +17,12 @@ local function extract_function_return_types(function_return_types_node) local selector = "span.types .type" local html = function_return_types_node:outer_html() - return scraper_utils.scrape(html, selector, function(node) + return scraper_utils.scrape(html, selector, function(node: scan.HTMLNode): string return utils.sanitize_string(node:inner_text()) end) end -local function extract_section_functions(dl) +local function extract_section_functions(dl: string): { Function_Info } local query_selectors = { function_name = "dt a", function_return_type = "dd ol", @@ -29,13 +31,15 @@ local function extract_section_functions(dl) return scraper_utils.scrape_tuples( dl, { query_selectors.function_name, query_selectors.function_return_type }, - function(nodes) + function(nodes: { string : scan.HTMLNode | nil }): Function_Info local function_info = Function_Info() function_info.name = extract_function_name(nodes[query_selectors.function_name]) - function_info.return_types = extract_function_return_types( - nodes[query_selectors.function_return_type] + function_info.return_types = List( + extract_function_return_types( + nodes[query_selectors.function_return_type] + ) ) return function_info @@ -45,7 +49,7 @@ end local module = {} -function module.get_doc_from_page(html) +function module.get_doc_from_page(html: string): Module_Doc local nodes = scraper_utils.extract_nodes(html, { "h2.section-header", "dl.function", @@ -57,20 +61,21 @@ function module.get_doc_from_page(html) local module_doc = Module_Doc() - for i, h2 in ipairs(nodes:get "h2.section-header") do + for i = 1, #nodes:get("h2.section-header") do + local h2 = nodes:get("h2.section-header")[i] local section_name = utils.sanitize_string(h2:inner_text()) local dl_html = nodes:get("dl.function")[i]:outer_html() if section_name == "Constructors" then - module_doc.constructors = extract_section_functions(dl_html) + module_doc.constructors = List(extract_section_functions(dl_html)) elseif section_name == "Static module functions" then - module_doc.static_functions = extract_section_functions(dl_html) + module_doc.static_functions = List(extract_section_functions(dl_html)) elseif section_name == "Object properties" then print "Not implemented: Deprecated object properties" elseif section_name == "Deprecated object properties" then print "Not implemented: Deprecated object properties" elseif section_name == "Object methods" then - module_doc.methods = extract_section_functions(dl_html) + module_doc.methods = List(extract_section_functions(dl_html)) elseif section_name == "Signals" then print "Not implemented: Signals" else diff --git a/src/awesomewm.d.tl/scraper/module_info_list.lua b/src/awesomewm.d.tl/scraper/module_info_list.tl similarity index 70% rename from src/awesomewm.d.tl/scraper/module_info_list.lua rename to src/awesomewm.d.tl/scraper/module_info_list.tl index f2a8b5f..8efbb09 100644 --- a/src/awesomewm.d.tl/scraper/module_info_list.lua +++ b/src/awesomewm.d.tl/scraper/module_info_list.tl @@ -1,4 +1,5 @@ local Module_Info = require "entities.Module_Info" +local scan = require "web_sanitize.query.scan_html" local scraper_utils = require "scraper.utils" local utils = require "utils" @@ -6,9 +7,9 @@ local module = {} local MODULE_A_TAG_QUERY_SELECTOR = "div#navigation ul li a" -local function extract_module_info(node) +local function extract_module_info(node: scan.HTMLNode): Module_Info local name = utils.sanitize_string(node:inner_text()) - local uri = node.attr.href + local uri = node.attr.href as string if not (name and uri) then error("Can't extract module info from node: " .. node:outer_html()) @@ -17,7 +18,7 @@ local function extract_module_info(node) return Module_Info(name, uri) end -function module.get_modules_from_index(html) +function module.get_modules_from_index(html: string): { Module_Info } return scraper_utils.scrape( html, MODULE_A_TAG_QUERY_SELECTOR, diff --git a/src/awesomewm.d.tl/scraper/utils.lua b/src/awesomewm.d.tl/scraper/utils.lua deleted file mode 100644 index 4668c0c..0000000 --- a/src/awesomewm.d.tl/scraper/utils.lua +++ /dev/null @@ -1,68 +0,0 @@ -local List = require "pl.List" -local log = require "logger" -local Map = require "pl.Map" -local scanner = require "web_sanitize.query.scan_html" -local tablex = require "pl.tablex" - -local scraper_utils = {} - -function scraper_utils.scrape(html, query_selector, extract_callback) - local ret = {} - - scanner.scan_html(html, function(stack) - if stack:is(query_selector) then - local node = stack:current() - local success, info = pcall(extract_callback, node) - - if not success then - log:error { message = info } - else - table.insert(ret, info) - end - end - end) - - return ret -end - -function scraper_utils.extract_nodes(html, query_selectors) - local siblings = Map() - - tablex.foreach(query_selectors, function(query_selector) - siblings:set(query_selector, List()) - end) - - scanner.scan_html(html, function(stack) - tablex.foreach(query_selectors, function(query_selector) - if stack:is(query_selector) then - siblings:get(query_selector):append(stack:current()) - end - end) - end) - - return siblings -end - -function scraper_utils.scrape_tuples(html, query_selectors, extract_callback) - local nodes = scraper_utils.extract_nodes(html, query_selectors) - - local ret = {} - - for i = 1, #nodes:get(query_selectors[1]) do - local node_list = {} - tablex.foreach(query_selectors, function(query_selector) - node_list[query_selector] = nodes:get(query_selector)[i] or nil - end) - local success, info = pcall(extract_callback, node_list) - - if not success then - log:error { message = info } - else - table.insert(ret, info) - end - end - - return ret -end - -return scraper_utils diff --git a/src/awesomewm.d.tl/scraper/utils.tl b/src/awesomewm.d.tl/scraper/utils.tl new file mode 100644 index 0000000..651ffd7 --- /dev/null +++ b/src/awesomewm.d.tl/scraper/utils.tl @@ -0,0 +1,73 @@ +local List = require "pl.List" +local log = require "logger" +local Map = require "pl.Map" +local scan = require "web_sanitize.query.scan_html" +local scanner = require "web_sanitize.query.scan_html" +local tablex = require "pl.tablex" + +local scraper_utils = {} + +function scraper_utils.scrape(html: string, query_selector: string, extract_callback: function(node: scan.HTMLNode): T): { T } + local ret: { T } = {} + + scanner.scan_html(html, function(stack: scan.NodeStack) + if stack:is(query_selector) then + local node = stack:current() + local success, info_or_error = pcall(extract_callback, node) + + if not success then + local error_message = info_or_error as string + log:error { message = error_message } + else + local info = info_or_error as T + table.insert(ret, info) + end + end + end) + + return ret +end + +function scraper_utils.extract_nodes(html: string, query_selectors: { string }): Map> + local siblings: Map> = Map() + + tablex.foreach(query_selectors, function(query_selector: string) + siblings:set(query_selector, List()) + end) + + scanner.scan_html(html, function(stack: scan.NodeStack) + tablex.foreach(query_selectors, function(query_selector: string) + if stack:is(query_selector) then + siblings:get(query_selector):append(stack:current()) + end + end) + end) + + return siblings +end + +function scraper_utils.scrape_tuples(html: string, query_selectors: { string }, extract_callback: function(tuple: { string : scan.HTMLNode | nil }): T): { T } + local nodes = scraper_utils.extract_nodes(html, query_selectors) + + local ret: { T } = {} + + for i = 1, #nodes:get(query_selectors[1]) do + local node_list: { string : scan.HTMLNode | nil } = {} + tablex.foreach(query_selectors, function(query_selector: string) + node_list[query_selector] = nodes:get(query_selector)[i] or nil + end) + local success, info_or_error = pcall(extract_callback, node_list) + + if not success then + local error_message = info_or_error as string + log:error { message = error_message } + else + local info = info_or_error as T + table.insert(ret, info) + end + end + + return ret +end + +return scraper_utils diff --git a/src/awesomewm.d.tl/utils.lua b/src/awesomewm.d.tl/utils.lua deleted file mode 100644 index ee4fb08..0000000 --- a/src/awesomewm.d.tl/utils.lua +++ /dev/null @@ -1,70 +0,0 @@ -local web_sanitize = require "web_sanitize" - -local utils = {} - -function utils.has_item(table, item) - for k, v in pairs(table) do - if v == item then - return k - end - end - - return nil -end - -function utils.filter(list, predicate) - local filtered = {} - - for position, value in ipairs(list) do - if predicate(value, position) then - table.insert(filtered, value) - end - end - - return filtered -end - -function utils.map(list, iteratee) - local mapped = {} - - for position, value in ipairs(list) do - table.insert(mapped, iteratee(value, position)) - end - - return mapped -end - -function utils.sanitize_string(string) - return utils.trim( - utils.replace(web_sanitize.extract_text(string), "^%s*(.-)%s*$", "%1") - ) -end - --- Extracted from teh Penlight Lua library. --- Sometime Lua string.gsub can't match unescaped strings. --- https://stackoverflow.com/a/72666170 -function utils.escape(string) - return (string:gsub("[%-%.%+%[%]%(%)%$%^%%%?%*]", "%%%1")) -end - -function utils.replace(string, old, new, n) - return (string:gsub(utils.escape(old), new:gsub("%%", "%%%%"), n)) -end - -function utils.trim(string) - return string:match "^%s*(.-)%s*$" -end - -function utils.do_or_fail(func, ...) - local log = require "logger" - local res, err = func(...) - - if not res then - log:error { "do_or_fail failed!", error = err } - error(err) - end - - return res -end - -return utils diff --git a/src/awesomewm.d.tl/utils.tl b/src/awesomewm.d.tl/utils.tl new file mode 100644 index 0000000..6c48228 --- /dev/null +++ b/src/awesomewm.d.tl/utils.tl @@ -0,0 +1,72 @@ +local web_sanitize = require "web_sanitize" + +local utils = {} + +function utils.has_item(t: table, item: any): any + for k, v in pairs(t) do + if v == item then + return k + end + end + + return nil +end + +function utils.filter(list: { T }, predicate: function(value: T, position: integer): boolean): { T } + local filtered: { T } = {} + + for position, value in ipairs(list) do + if predicate(value, position) then + table.insert(filtered, value) + end + end + + return filtered +end + +function utils.map(list: { T }, iteratee: function(value: T, position: integer): U): { U } + local mapped: { U } = {} + + for position, value in ipairs(list) do + table.insert(mapped, iteratee(value, position)) + end + + return mapped +end + +-- Extracted from teh Penlight Lua library. +-- Sometime Lua string.gsub can't match unescaped strings. +-- https://stackoverflow.com/a/72666170 +function utils.escape(s: string): string + return (s:gsub("[%-%.%+%[%]%(%)%$%^%%%?%*]", "%%%1")) +end + +function utils.replace(s: string, old: string, new: string, n: number): string + return (s:gsub(utils.escape(old), new:gsub("%%", "%%%%"), n)) +end + +function utils.trim(s: string): string + return s:match "^%s*(.-)%s*$" +end + +function utils.sanitize_string(s: string): string + return utils.trim( + utils.replace(web_sanitize.extract_text(s), "^%s*(.-)%s*$", "%1") + ) +end + +-- At some point, we should probably write a wrapper to make penlight's function work with pcalls. +local type Func = function(...: any): T | nil, string +function utils.do_or_fail(func: Func, ...: any): T + local log = require "logger" + local res, err = func(...) + + if not res then + log:error { "do_or_fail failed!", error = err } + error(err) + end + + return res +end + +return utils