Migrate from lua-curl to luasocket (#5) #7

Merged
Aire-One merged 5 commits from feat/#5 into master 2022-09-18 20:29:48 +02:00
3 changed files with 36 additions and 17 deletions

View File

@ -18,9 +18,12 @@
"luacheckrc",
"lualogging",
"Luarocks",
"luasec",
"luasocket",
"mkdir",
"setopt",
"Stylua",
"tablex",
"tmpl",
"wibox",
"writefunction"

View File

@ -12,10 +12,11 @@ dependencies = {
"lualogging 1.6.0",
"inspect 3.1.3",
"ansicolors 1.0.2",
"lua-curl 0.3.13",
"htmlparser 0.3.9",
"web_sanitize 1.3.0",
"penlight 1.13.1",
"luasocket 3.1.0-1",
"luasec 1.2.0-1",
}
build = {
type = "builtin",

View File

@ -1,37 +1,52 @@
local curl = require "cURL"
local file = require "pl.file"
local http = require "socket.http"
local inspect = require "inspect"
local log = require "logger"
local tablex = require "pl.tablex"
local crawler = {}
function crawler.request(url)
local queue = {}
local easy = curl.easy():setopt_url(url):setopt_writefunction(function(buf)
table.insert(queue, buf)
end)
local function http_request(url)
local body, code_or_error = http.request(url)
local ok, err = easy:perform()
if not ok then
easy:close()
error { message = "curl.easy failed", err = err }
if not body then
local err = code_or_error
error { "Request failed", err = err, url = url }
end
local code, body = easy:getinfo_response_code(), table.concat(queue)
easy:close()
local code = code_or_error
if code < 200 and code >= 300 then
error {
message = "curl response code is not 200",
"Request successful, but returned non-200 code",
code = code,
body = body,
}
end
return queue
return body
end
local function get_resource_by_protocol(url)
local protocol, resource = url:match "^(%a+)://(.*)$"
if not not tablex.find({ "http", "https" }, protocol) then
local success, result = pcall(http_request, url)
return success, result
elseif protocol == "file" then
local res, err = file.read(resource, false)
if res then
return true, res
else
return false, err
end
else
error { "Unsupported protocol", protocol = protocol }
end
end
function crawler.fetch(url)
local success, result = pcall(crawler.request, url)
local success, result = get_resource_by_protocol(url)
if not success then
log:error(inspect { "Fetch failed", status = success, error = result })
@ -40,7 +55,7 @@ function crawler.fetch(url)
log:info(inspect { message = "Successfully fetched resource", url = url })
return table.concat(result, "")
return result
end
return crawler