Merge pull request 'Migrate from lua-curl to luasocket (#5)' (#7) from feat/#5 into master
ci/woodpecker/push/lint Pipeline was successful Details

Reviewed-on: #7
This commit is contained in:
Aire-One 2022-09-18 20:29:45 +02:00
commit 3f8ebe8753
3 changed files with 36 additions and 17 deletions

View File

@ -18,9 +18,12 @@
"luacheckrc", "luacheckrc",
"lualogging", "lualogging",
"Luarocks", "Luarocks",
"luasec",
"luasocket",
"mkdir", "mkdir",
"setopt", "setopt",
"Stylua", "Stylua",
"tablex",
"tmpl", "tmpl",
"wibox", "wibox",
"writefunction" "writefunction"

View File

@ -12,10 +12,11 @@ dependencies = {
"lualogging 1.6.0", "lualogging 1.6.0",
"inspect 3.1.3", "inspect 3.1.3",
"ansicolors 1.0.2", "ansicolors 1.0.2",
"lua-curl 0.3.13",
"htmlparser 0.3.9", "htmlparser 0.3.9",
"web_sanitize 1.3.0", "web_sanitize 1.3.0",
"penlight 1.13.1", "penlight 1.13.1",
"luasocket 3.1.0-1",
"luasec 1.2.0-1",
} }
build = { build = {
type = "builtin", type = "builtin",

View File

@ -1,37 +1,52 @@
local curl = require "cURL" local file = require "pl.file"
local http = require "socket.http"
local inspect = require "inspect" local inspect = require "inspect"
local log = require "logger" local log = require "logger"
local tablex = require "pl.tablex"
local crawler = {} local crawler = {}
function crawler.request(url) local function http_request(url)
local queue = {} local body, code_or_error = http.request(url)
local easy = curl.easy():setopt_url(url):setopt_writefunction(function(buf)
table.insert(queue, buf)
end)
local ok, err = easy:perform() if not body then
if not ok then local err = code_or_error
easy:close() error { "Request failed", err = err, url = url }
error { message = "curl.easy failed", err = err }
end end
local code, body = easy:getinfo_response_code(), table.concat(queue) local code = code_or_error
easy:close()
if code < 200 and code >= 300 then if code < 200 and code >= 300 then
error { error {
message = "curl response code is not 200", "Request successful, but returned non-200 code",
code = code, code = code,
body = body, body = body,
} }
end end
return queue return body
end
local function get_resource_by_protocol(url)
local protocol, resource = url:match "^(%a+)://(.*)$"
if not not tablex.find({ "http", "https" }, protocol) then
local success, result = pcall(http_request, url)
return success, result
elseif protocol == "file" then
local res, err = file.read(resource, false)
if res then
return true, res
else
return false, err
end
else
error { "Unsupported protocol", protocol = protocol }
end
end end
function crawler.fetch(url) function crawler.fetch(url)
local success, result = pcall(crawler.request, url) local success, result = get_resource_by_protocol(url)
if not success then if not success then
log:error(inspect { "Fetch failed", status = success, error = result }) log:error(inspect { "Fetch failed", status = success, error = result })
@ -40,7 +55,7 @@ function crawler.fetch(url)
log:info(inspect { message = "Successfully fetched resource", url = url }) log:info(inspect { message = "Successfully fetched resource", url = url })
return table.concat(result, "") return result
end end
return crawler return crawler