feat(crawler): manage `file://` protocol
ci/woodpecker/push/lint Pipeline was successful Details
ci/woodpecker/pr/lint Pipeline was successful Details

This commit is contained in:
Aire-One 2022-09-11 03:17:40 +02:00
parent 5806f894d0
commit 10c9c893b1
2 changed files with 23 additions and 2 deletions

View File

@ -23,6 +23,7 @@
"mkdir", "mkdir",
"setopt", "setopt",
"Stylua", "Stylua",
"tablex",
"tmpl", "tmpl",
"wibox", "wibox",
"writefunction" "writefunction"

View File

@ -1,10 +1,12 @@
local file = require "pl.file"
local http = require "socket.http" local http = require "socket.http"
local inspect = require "inspect" local inspect = require "inspect"
local log = require "logger" local log = require "logger"
local tablex = require "pl.tablex"
local crawler = {} local crawler = {}
function crawler.request(url) local function http_request(url)
local body, code_or_error = http.request(url) local body, code_or_error = http.request(url)
if not body then if not body then
@ -25,8 +27,26 @@ function crawler.request(url)
return body return body
end end
local function get_resource_by_protocol(url)
local protocol, resource = url:match "^(%a+)://(.*)$"
if not not tablex.find({ "http", "https" }, protocol) then
local success, result = pcall(http_request, url)
return success, result
elseif protocol == "file" then
local res, err = file.read(resource, false)
if res then
return true, res
else
return false, err
end
else
error { "Unsupported protocol", protocol = protocol }
end
end
function crawler.fetch(url) function crawler.fetch(url)
local success, result = pcall(crawler.request, url) local success, result = get_resource_by_protocol(url)
if not success then if not success then
log:error(inspect { "Fetch failed", status = success, error = result }) log:error(inspect { "Fetch failed", status = success, error = result })