Merge pull request 'Migrate from lua-curl to luasocket (#5)' (#7) from feat/#5 into master
ci/woodpecker/push/lint Pipeline was successful
Details
ci/woodpecker/push/lint Pipeline was successful
Details
Reviewed-on: #7
This commit is contained in:
commit
3f8ebe8753
|
@ -18,9 +18,12 @@
|
||||||
"luacheckrc",
|
"luacheckrc",
|
||||||
"lualogging",
|
"lualogging",
|
||||||
"Luarocks",
|
"Luarocks",
|
||||||
|
"luasec",
|
||||||
|
"luasocket",
|
||||||
"mkdir",
|
"mkdir",
|
||||||
"setopt",
|
"setopt",
|
||||||
"Stylua",
|
"Stylua",
|
||||||
|
"tablex",
|
||||||
"tmpl",
|
"tmpl",
|
||||||
"wibox",
|
"wibox",
|
||||||
"writefunction"
|
"writefunction"
|
||||||
|
|
|
@ -12,10 +12,11 @@ dependencies = {
|
||||||
"lualogging 1.6.0",
|
"lualogging 1.6.0",
|
||||||
"inspect 3.1.3",
|
"inspect 3.1.3",
|
||||||
"ansicolors 1.0.2",
|
"ansicolors 1.0.2",
|
||||||
"lua-curl 0.3.13",
|
|
||||||
"htmlparser 0.3.9",
|
"htmlparser 0.3.9",
|
||||||
"web_sanitize 1.3.0",
|
"web_sanitize 1.3.0",
|
||||||
"penlight 1.13.1",
|
"penlight 1.13.1",
|
||||||
|
"luasocket 3.1.0-1",
|
||||||
|
"luasec 1.2.0-1",
|
||||||
}
|
}
|
||||||
build = {
|
build = {
|
||||||
type = "builtin",
|
type = "builtin",
|
||||||
|
|
|
@ -1,37 +1,52 @@
|
||||||
local curl = require "cURL"
|
local file = require "pl.file"
|
||||||
|
local http = require "socket.http"
|
||||||
local inspect = require "inspect"
|
local inspect = require "inspect"
|
||||||
local log = require "logger"
|
local log = require "logger"
|
||||||
|
local tablex = require "pl.tablex"
|
||||||
|
|
||||||
local crawler = {}
|
local crawler = {}
|
||||||
|
|
||||||
function crawler.request(url)
|
local function http_request(url)
|
||||||
local queue = {}
|
local body, code_or_error = http.request(url)
|
||||||
local easy = curl.easy():setopt_url(url):setopt_writefunction(function(buf)
|
|
||||||
table.insert(queue, buf)
|
|
||||||
end)
|
|
||||||
|
|
||||||
local ok, err = easy:perform()
|
if not body then
|
||||||
if not ok then
|
local err = code_or_error
|
||||||
easy:close()
|
error { "Request failed", err = err, url = url }
|
||||||
error { message = "curl.easy failed", err = err }
|
|
||||||
end
|
end
|
||||||
|
|
||||||
local code, body = easy:getinfo_response_code(), table.concat(queue)
|
local code = code_or_error
|
||||||
easy:close()
|
|
||||||
|
|
||||||
if code < 200 and code >= 300 then
|
if code < 200 and code >= 300 then
|
||||||
error {
|
error {
|
||||||
message = "curl response code is not 200",
|
"Request successful, but returned non-200 code",
|
||||||
code = code,
|
code = code,
|
||||||
body = body,
|
body = body,
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
return queue
|
return body
|
||||||
|
end
|
||||||
|
|
||||||
|
local function get_resource_by_protocol(url)
|
||||||
|
local protocol, resource = url:match "^(%a+)://(.*)$"
|
||||||
|
|
||||||
|
if not not tablex.find({ "http", "https" }, protocol) then
|
||||||
|
local success, result = pcall(http_request, url)
|
||||||
|
return success, result
|
||||||
|
elseif protocol == "file" then
|
||||||
|
local res, err = file.read(resource, false)
|
||||||
|
if res then
|
||||||
|
return true, res
|
||||||
|
else
|
||||||
|
return false, err
|
||||||
|
end
|
||||||
|
else
|
||||||
|
error { "Unsupported protocol", protocol = protocol }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function crawler.fetch(url)
|
function crawler.fetch(url)
|
||||||
local success, result = pcall(crawler.request, url)
|
local success, result = get_resource_by_protocol(url)
|
||||||
|
|
||||||
if not success then
|
if not success then
|
||||||
log:error(inspect { "Fetch failed", status = success, error = result })
|
log:error(inspect { "Fetch failed", status = success, error = result })
|
||||||
|
@ -40,7 +55,7 @@ function crawler.fetch(url)
|
||||||
|
|
||||||
log:info(inspect { message = "Successfully fetched resource", url = url })
|
log:info(inspect { message = "Successfully fetched resource", url = url })
|
||||||
|
|
||||||
return table.concat(result, "")
|
return result
|
||||||
end
|
end
|
||||||
|
|
||||||
return crawler
|
return crawler
|
||||||
|
|
Loading…
Reference in New Issue