Migrate from lua-curl to luasocket (#5) #7
|
@ -18,9 +18,12 @@
|
|||
"luacheckrc",
|
||||
"lualogging",
|
||||
"Luarocks",
|
||||
"luasec",
|
||||
"luasocket",
|
||||
"mkdir",
|
||||
"setopt",
|
||||
"Stylua",
|
||||
"tablex",
|
||||
"tmpl",
|
||||
"wibox",
|
||||
"writefunction"
|
||||
|
|
|
@ -12,10 +12,11 @@ dependencies = {
|
|||
"lualogging 1.6.0",
|
||||
"inspect 3.1.3",
|
||||
"ansicolors 1.0.2",
|
||||
"lua-curl 0.3.13",
|
||||
"htmlparser 0.3.9",
|
||||
"web_sanitize 1.3.0",
|
||||
"penlight 1.13.1",
|
||||
"luasocket 3.1.0-1",
|
||||
"luasec 1.2.0-1",
|
||||
}
|
||||
build = {
|
||||
type = "builtin",
|
||||
|
|
|
@ -1,37 +1,52 @@
|
|||
local curl = require "cURL"
|
||||
local file = require "pl.file"
|
||||
local http = require "socket.http"
|
||||
local inspect = require "inspect"
|
||||
local log = require "logger"
|
||||
local tablex = require "pl.tablex"
|
||||
|
||||
local crawler = {}
|
||||
|
||||
function crawler.request(url)
|
||||
local queue = {}
|
||||
local easy = curl.easy():setopt_url(url):setopt_writefunction(function(buf)
|
||||
table.insert(queue, buf)
|
||||
end)
|
||||
local function http_request(url)
|
||||
local body, code_or_error = http.request(url)
|
||||
|
||||
local ok, err = easy:perform()
|
||||
if not ok then
|
||||
easy:close()
|
||||
error { message = "curl.easy failed", err = err }
|
||||
if not body then
|
||||
local err = code_or_error
|
||||
error { "Request failed", err = err, url = url }
|
||||
end
|
||||
|
||||
local code, body = easy:getinfo_response_code(), table.concat(queue)
|
||||
easy:close()
|
||||
local code = code_or_error
|
||||
|
||||
if code < 200 and code >= 300 then
|
||||
error {
|
||||
message = "curl response code is not 200",
|
||||
"Request successful, but returned non-200 code",
|
||||
code = code,
|
||||
body = body,
|
||||
}
|
||||
end
|
||||
|
||||
return queue
|
||||
return body
|
||||
end
|
||||
|
||||
local function get_resource_by_protocol(url)
|
||||
local protocol, resource = url:match "^(%a+)://(.*)$"
|
||||
|
||||
if not not tablex.find({ "http", "https" }, protocol) then
|
||||
local success, result = pcall(http_request, url)
|
||||
return success, result
|
||||
elseif protocol == "file" then
|
||||
local res, err = file.read(resource, false)
|
||||
if res then
|
||||
return true, res
|
||||
else
|
||||
return false, err
|
||||
end
|
||||
else
|
||||
error { "Unsupported protocol", protocol = protocol }
|
||||
end
|
||||
end
|
||||
|
||||
function crawler.fetch(url)
|
||||
local success, result = pcall(crawler.request, url)
|
||||
local success, result = get_resource_by_protocol(url)
|
||||
|
||||
if not success then
|
||||
log:error(inspect { "Fetch failed", status = success, error = result })
|
||||
|
@ -40,7 +55,7 @@ function crawler.fetch(url)
|
|||
|
||||
log:info(inspect { message = "Successfully fetched resource", url = url })
|
||||
|
||||
return table.concat(result, "")
|
||||
return result
|
||||
end
|
||||
|
||||
return crawler
|
||||
|
|
Loading…
Reference in New Issue