luasocket/etc/check-links.lua

93 lines
2.8 KiB
Lua
Raw Normal View History

2003-03-28 22:08:50 +01:00
-----------------------------------------------------------------------------
-- Little program that checks links in HTML files
-- LuaSocket sample files
-- Author: Diego Nehab
-- RCS ID: $Id$
2003-03-28 22:08:50 +01:00
-----------------------------------------------------------------------------
socket.http.TIMEOUT = 10
2001-09-27 22:02:58 +02:00
cache = {}
function readfile(path)
2004-03-26 01:18:41 +01:00
path = socket.url.unescape(path)
local file, error = io.open(path, "r")
2001-09-27 22:02:58 +02:00
if file then
2004-03-26 01:18:41 +01:00
local body = file:read("*a")
file:close()
2001-09-27 22:02:58 +02:00
return body
else return nil, error end
end
function getstatus(url)
local parsed = socket.url.parse(url, { scheme = "file" })
2003-03-28 22:08:50 +01:00
if cache[url] then return cache[url] end
2001-09-27 22:02:58 +02:00
local res
if parsed.scheme == "http" then
2004-03-26 01:18:41 +01:00
local request = { url = url, method = "HEAD" }
local response = socket.http.request(request)
2001-09-27 22:02:58 +02:00
if response.code == 200 then res = nil
else res = response.status or response.error end
elseif parsed.scheme == "file" then
2004-03-26 01:18:41 +01:00
local file, error = io.open(socket.url.unescape(parsed.path), "r")
2001-09-27 22:02:58 +02:00
if file then
2004-03-26 01:18:41 +01:00
file:close()
2001-09-27 22:02:58 +02:00
res = nil
else res = error end
else res = string.format("unhandled scheme '%s'", parsed.scheme) end
2003-03-28 22:08:50 +01:00
cache[url] = res
2001-09-27 22:02:58 +02:00
return res
end
function retrieve(url)
local parsed = socket.url.parse(url, { scheme = "file" })
2004-03-26 01:18:41 +01:00
local body, headers, code, error
local base = url
2001-09-27 22:02:58 +02:00
if parsed.scheme == "http" then
2004-03-26 01:18:41 +01:00
body, headers, code, error = socket.http.get(url)
if code == 200 then
base = base or headers.location
2001-09-27 22:02:58 +02:00
end
elseif parsed.scheme == "file" then
body, error = readfile(parsed.path)
else error = string.format("unhandled scheme '%s'", parsed.scheme) end
2001-09-27 22:02:58 +02:00
return base, body, error
end
function getlinks(body, base)
-- get rid of comments
body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
2001-09-27 22:02:58 +02:00
local links = {}
-- extract links
2004-03-26 01:18:41 +01:00
body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
table.insert(links, socket.url.absolute(base, href))
2001-09-27 22:02:58 +02:00
end)
2004-03-26 01:18:41 +01:00
body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
table.insert(links, socket.url.absolute(base, href))
2001-09-27 22:02:58 +02:00
end)
2004-03-26 01:18:41 +01:00
string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
table.insert(links, socket.url.absolute(base, href))
2001-09-27 22:02:58 +02:00
end)
return links
end
function checklinks(url)
local base, body, error = retrieve(url)
if not body then print(error) return end
local links = getlinks(body, base)
for _, l in ipairs(links) do
io.stderr:write("\t", l, "\n")
local err = getstatus(l)
if err then io.stderr:write('\t', l, ": ", err, "\n") end
2001-09-27 22:02:58 +02:00
end
end
arg = arg or {}
if table.getn(arg) < 1 then
print("Usage:\n luasocket check-links.lua {<url>}")
2001-09-27 22:02:58 +02:00
exit()
end
for _, a in ipairs(arg) do
print("Checking ", a)
checklinks(socket.url.absolute("file:", a))
2001-09-27 22:02:58 +02:00
end