Reviewed-on: #14
This commit is contained in:
commit
82d438f87a
|
@ -1,9 +1,10 @@
|
|||
local file = require "pl.file"
|
||||
local http = require "socket.http"
|
||||
local inspect = require "inspect"
|
||||
local log = require "logger"
|
||||
local logger = require "logger"
|
||||
local tablex = require "pl.tablex"
|
||||
|
||||
local log = logger.log("crawler")
|
||||
|
||||
local crawler = {}
|
||||
|
||||
local function http_request(url: string): string
|
||||
|
@ -46,14 +47,17 @@ local function get_resource_by_protocol(url: string): boolean, string
|
|||
end
|
||||
|
||||
function crawler.fetch(url: string): string
|
||||
local success, result = get_resource_by_protocol(url)
|
||||
local success, result_or_error = get_resource_by_protocol(url)
|
||||
|
||||
if not success then
|
||||
log:error(inspect { "Fetch failed", status = success, error = result })
|
||||
local err = result_or_error
|
||||
log:error(logger.message_with_metadata("Fetch failed", { status = success, error = err }))
|
||||
return
|
||||
end
|
||||
|
||||
log:info(inspect { message = "Successfully fetched resource", url = url })
|
||||
local result = result_or_error
|
||||
|
||||
log:info("Successfully fetched resource from " .. url)
|
||||
|
||||
return result
|
||||
end
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
local file = require "pl.file"
|
||||
local log = require "logger"
|
||||
local logger = require "logger"
|
||||
local path = require "pl.path"
|
||||
|
||||
local log = logger.log("file_writer")
|
||||
|
||||
local function write_file(file_content: string, file_path: string): boolean, nil | string
|
||||
-- Make sure the directory we want to write the file to exists
|
||||
local directory = path.dirname(file_path)
|
||||
|
@ -18,14 +20,14 @@ function module.write(file_content: string, file_path: string)
|
|||
local success, error_message = write_file(file_content, file_path)
|
||||
|
||||
if not success then
|
||||
log:error {
|
||||
"generator.write error",
|
||||
log:error(logger.message_with_metadata("Failed to write file", {
|
||||
file_path = file_path,
|
||||
error = error_message,
|
||||
}
|
||||
}))
|
||||
return
|
||||
end
|
||||
|
||||
log:info { "Successfully wrote file", file = file_path }
|
||||
log:info("Successfully wrote file to " .. file_path)
|
||||
end
|
||||
|
||||
return module
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
local crawler = require "crawler"
|
||||
local filesystem = require "filesystem"
|
||||
local inspect = require "inspect"
|
||||
local log = require "logger"
|
||||
local logger = require "logger"
|
||||
local properties = require "properties"
|
||||
local scraper = require "scraper"
|
||||
local generator = require "generator"
|
||||
|
||||
log:info(
|
||||
inspect { message = "Start extraction", base_url = properties.base_url }
|
||||
)
|
||||
local log = logger.log("main")
|
||||
|
||||
log:info(logger.message_with_metadata("Start", { properties = properties }))
|
||||
|
||||
local index = crawler.fetch(properties.base_url .. properties.index_uri)
|
||||
|
||||
|
@ -16,8 +15,7 @@ local index = crawler.fetch(properties.base_url .. properties.index_uri)
|
|||
-- scraper.get_modules_from_index(index, properties.ignored_modules)
|
||||
local module_infos = scraper.module_info_list.get_modules_from_index(index)
|
||||
|
||||
log:info(inspect { modules_found = #module_infos })
|
||||
|
||||
log:info("Finished Module List scrapping, found " .. #module_infos .. " modules")
|
||||
-- for i = 1, 1 do -- #modules do
|
||||
-- local m = modules[i]
|
||||
-- log:info(inspect { try = m })
|
||||
|
@ -31,8 +29,8 @@ local html =
|
|||
local module_doc = scraper.module_doc.get_doc_from_page(html)
|
||||
-- log:info(inspect { module_doc = module_doc })
|
||||
|
||||
-- local items = scraper.get_doc_from_page(page)
|
||||
-- log:info(inspect { items })
|
||||
-- -- local items = scraper.get_doc_from_page(page)
|
||||
-- -- log:info(inspect { items })
|
||||
|
||||
local mod = "textbox"
|
||||
filesystem.file_writer.write(
|
||||
|
|
|
@ -1,17 +1,71 @@
|
|||
local ansicolors = require "ansicolors"
|
||||
local inspect = require "inspect"
|
||||
local Logging = require "logging"
|
||||
local logging_console = require "logging.console"
|
||||
|
||||
local log = logging_console {
|
||||
logLevel = "DEBUG",
|
||||
destination = "stdout",
|
||||
timestampPattern = "[%y-%m-%d %H:%M:%S]",
|
||||
logPatterns = {
|
||||
DEBUG = ansicolors "%date%{cyan} %level %message %{reset}(%source)\n",
|
||||
INFO = ansicolors "%date %level %message\n",
|
||||
WARN = ansicolors "%date%{yellow} %level %message\n",
|
||||
ERROR = ansicolors "%date%{red bright} %level %message %{reset}(%source)\n",
|
||||
FATAL = ansicolors "%date%{magenta bright} %level %message %{reset}(%source)\n",
|
||||
},
|
||||
}
|
||||
local function colorize(level: Logging.Level, message: string): string
|
||||
if level == "DEBUG" then
|
||||
return ansicolors("%{cyan}" .. message)
|
||||
elseif level == "INFO" then
|
||||
return ansicolors("%{green}" .. message)
|
||||
elseif level == "WARN" then
|
||||
return ansicolors("%{yellow}" .. message)
|
||||
elseif level == "ERROR" then
|
||||
return ansicolors("%{red}" .. message)
|
||||
elseif level == "FATAL" then
|
||||
return ansicolors("%{magenta}" .. message)
|
||||
else
|
||||
error("Unknown level: " .. level)
|
||||
end
|
||||
end
|
||||
|
||||
return log
|
||||
local function format(level: Logging.Level, tag: string, show_source: boolean): string
|
||||
return string.format("%s (%s) %s%s\n",
|
||||
"%date",
|
||||
tag,
|
||||
colorize(level, "%level: %message"),
|
||||
show_source and " (%source)" or "")
|
||||
end
|
||||
|
||||
local function new(tag: string): Logging.Log
|
||||
local logger = logging_console {
|
||||
logLevel = "DEBUG",
|
||||
destination = "stdout",
|
||||
timestampPattern = "[%y-%m-%d %H:%M:%S]",
|
||||
logPatterns = {
|
||||
DEBUG = format("DEBUG", tag, true),
|
||||
INFO = format("INFO", tag),
|
||||
WARN = format("WARN", tag, true),
|
||||
ERROR = format("ERROR", tag, true),
|
||||
FATAL = format("FATAL", tag, true),
|
||||
},
|
||||
}
|
||||
|
||||
logger:info("Logger initialized")
|
||||
|
||||
return logger
|
||||
end
|
||||
|
||||
local loggers: { string : Logging.Log } = {}
|
||||
|
||||
local function get_logger(tag: string): Logging.Log
|
||||
if not loggers[tag] then
|
||||
loggers[tag] = new(tag)
|
||||
end
|
||||
|
||||
return loggers[tag]
|
||||
end
|
||||
|
||||
local module = {}
|
||||
|
||||
function module.log(tag: string): Logging.Log
|
||||
return get_logger(tag)
|
||||
end
|
||||
|
||||
function module.message_with_metadata(message: string, metadata: any): string
|
||||
return string.format("%s\n%s",
|
||||
message,
|
||||
inspect(metadata))
|
||||
end
|
||||
|
||||
return module
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
local Function_Info = require "entities.Function_Info"
|
||||
local List = require "pl.List"
|
||||
local logger = require "logger"
|
||||
local Module_Doc = require "entities.Module_Doc"
|
||||
local scan = require "web_sanitize.query.scan_html"
|
||||
local scraper_utils = require "scraper.utils"
|
||||
local utils = require "utils"
|
||||
|
||||
local log = logger.log("scraper")
|
||||
|
||||
local function extract_function_name(function_name_node: scan.HTMLNode): string
|
||||
return function_name_node and ((function_name_node.attr.name as string):gsub(".*:", ""))
|
||||
end
|
||||
|
@ -71,13 +74,13 @@ function module.get_doc_from_page(html: string): Module_Doc.Module_Doc
|
|||
elseif section_name == "Static module functions" then
|
||||
module_doc.static_functions = List(extract_section_functions(dl_html))
|
||||
elseif section_name == "Object properties" then
|
||||
print "Not implemented: Object properties"
|
||||
log:warn("Not implemented: Object properties")
|
||||
elseif section_name == "Deprecated object properties" then
|
||||
print "Not implemented: Deprecated object properties"
|
||||
log:warn("Not implemented: Deprecated object properties")
|
||||
elseif section_name == "Object methods" then
|
||||
module_doc.methods = List(extract_section_functions(dl_html))
|
||||
elseif section_name == "Signals" then
|
||||
print "Not implemented: Signals"
|
||||
log:warn("Not implemented: Signals")
|
||||
else
|
||||
error("Unknown section name: " .. section_name)
|
||||
end
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
local List = require "pl.List"
|
||||
local log = require "logger"
|
||||
local logger = require "logger"
|
||||
local Map = require "pl.Map"
|
||||
local scan = require "web_sanitize.query.scan_html"
|
||||
local scanner = require "web_sanitize.query.scan_html"
|
||||
local tablex = require "pl.tablex"
|
||||
|
||||
local log = logger.log("scrapper")
|
||||
|
||||
local scraper_utils = {}
|
||||
|
||||
function scraper_utils.scrape<T>(html: string, query_selector: string, extract_callback: function(node: scan.HTMLNode): T): { T }
|
||||
|
@ -17,7 +19,7 @@ function scraper_utils.scrape<T>(html: string, query_selector: string, extract_c
|
|||
|
||||
if not success then
|
||||
local error_message = info_or_error as string
|
||||
log:error { message = error_message }
|
||||
log:error(logger.message_with_metadata("Extraction error", { error = error_message }))
|
||||
else
|
||||
local info = info_or_error as T
|
||||
table.insert(ret, info)
|
||||
|
@ -60,7 +62,7 @@ function scraper_utils.scrape_tuples<T>(html: string, query_selectors: { string
|
|||
|
||||
if not success then
|
||||
local error_message = info_or_error as string
|
||||
log:error { message = error_message }
|
||||
log:error(logger.message_with_metadata("Extraction error", { error = error_message }))
|
||||
else
|
||||
local info = info_or_error as T
|
||||
table.insert(ret, info)
|
||||
|
|
|
@ -57,11 +57,13 @@ end
|
|||
|
||||
-- At some point, we should probably write a wrapper to make penlight's function work with pcalls.
|
||||
function utils.do_or_fail<T>(func: function<T>(...: any): (T | nil, string), ...: any): T
|
||||
local log = require "logger"
|
||||
local logger = require "logger"
|
||||
local log = logger.log("utils")
|
||||
|
||||
local ok, res, err = pcall(func, ...)
|
||||
|
||||
if not (ok and res) then
|
||||
log:error { "do_or_fail failed!", error = err }
|
||||
log:error(logger.message_with_metadata("do_or_fail failed!", { error = err }))
|
||||
error(err)
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue