luasocket/src/url.lua

303 lines
11 KiB
Lua
Raw Normal View History

2001-09-12 20:27:40 +02:00
-----------------------------------------------------------------------------
-- URI parsing, composition and relative URL resolution
-- LuaSocket toolkit.
2001-09-12 20:27:40 +02:00
-- Author: Diego Nehab
2004-06-04 17:15:45 +02:00
-----------------------------------------------------------------------------
2004-05-30 23:36:22 +02:00
2004-06-04 17:15:45 +02:00
-----------------------------------------------------------------------------
-- Declare module
-----------------------------------------------------------------------------
2004-11-27 08:58:04 +01:00
local string = require("string")
2005-06-14 06:29:23 +02:00
local base = _G
2004-11-27 08:58:04 +01:00
local table = require("table")
2004-12-23 23:32:12 +01:00
module("socket.url")
2005-09-29 08:11:42 +02:00
-----------------------------------------------------------------------------
-- Module version
-----------------------------------------------------------------------------
_VERSION = "URL 1.0.2"
2005-09-29 08:11:42 +02:00
-----------------------------------------------------------------------------
-- Encodes a string into its escaped hexadecimal representation
-- Input
-- s: binary string to be encoded
-- Returns
-- escaped representation of string binary
-----------------------------------------------------------------------------
function escape(s)
return (string.gsub(s, "([^A-Za-z0-9_])", function(c)
return string.format("%%%02x", string.byte(c))
end))
end
-----------------------------------------------------------------------------
-- Protects a path segment, to prevent it from interfering with the
-- url parsing.
-- Input
-- s: binary string to be encoded
-- Returns
-- escaped representation of string binary
-----------------------------------------------------------------------------
local function make_set(t)
local s = {}
for i,v in base.ipairs(t) do
s[t[i]] = 1
end
return s
end
-- these are allowed withing a path segment, along with alphanum
-- other characters must be escaped
local segment_set = make_set {
"-", "_", ".", "!", "~", "*", "'", "(",
")", ":", "@", "&", "=", "+", "$", ",",
}
local function protect_segment(s)
return string.gsub(s, "([^A-Za-z0-9_])", function (c)
if segment_set[c] then return c
else return string.format("%%%02x", string.byte(c)) end
end)
end
-----------------------------------------------------------------------------
-- Encodes a string into its escaped hexadecimal representation
-- Input
-- s: binary string to be encoded
-- Returns
-- escaped representation of string binary
-----------------------------------------------------------------------------
function unescape(s)
return (string.gsub(s, "%%(%x%x)", function(hex)
2004-11-27 08:58:04 +01:00
return string.char(base.tonumber(hex, 16))
end))
end
-----------------------------------------------------------------------------
-- Builds a path from a base path and a relative path
-- Input
-- base_path
-- relative_path
-- Returns
-- corresponding absolute path
-----------------------------------------------------------------------------
local function absolute_path(base_path, relative_path)
if string.sub(relative_path, 1, 1) == "/" then return relative_path end
local path = string.gsub(base_path, "[^/]*$", "")
path = path .. relative_path
path = string.gsub(path, "([^/]*%./)", function (s)
if s ~= "./" then return s else return "" end
end)
path = string.gsub(path, "/%.$", "/")
local reduced
while reduced ~= path do
reduced = path
path = string.gsub(reduced, "([^/]*/%.%./)", function (s)
if s ~= "../../" then return "" else return s end
end)
end
path = string.gsub(reduced, "([^/]*/%.%.)$", function (s)
if s ~= "../.." then return "" else return s end
end)
return path
end
2001-09-12 20:27:40 +02:00
-----------------------------------------------------------------------------
-- Parses a url and returns a table with all its parts according to RFC 2396
-- The following grammar describes the names given to the URL parts
-- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
-- <authority> ::= <userinfo>@<host>:<port>
-- <userinfo> ::= <user>[:<password>]
-- <path> :: = {<segment>/}<segment>
-- Input
-- url: uniform resource locator of request
-- default: table with default values for each field
-- Returns
-- table with the following fields, where RFC naming conventions have
-- been preserved:
-- scheme, authority, userinfo, user, password, host, port,
2001-09-12 20:27:40 +02:00
-- path, params, query, fragment
-- Obs:
-- the leading '/' in {/<path>} is considered part of <path>
-----------------------------------------------------------------------------
function parse(url, default)
2001-09-12 20:27:40 +02:00
-- initialize default parameters
2004-06-17 00:51:04 +02:00
local parsed = {}
2005-06-13 00:02:21 +02:00
for i,v in base.pairs(default or parsed) do parsed[i] = v end
2001-09-12 20:27:40 +02:00
-- empty url is parsed to nil
2004-05-28 08:16:43 +02:00
if not url or url == "" then return nil, "invalid url" end
2001-09-12 20:27:40 +02:00
-- remove whitespace
2004-03-26 01:18:41 +01:00
-- url = string.gsub(url, "%s", "")
2001-09-12 20:27:40 +02:00
-- get fragment
url = string.gsub(url, "#(.*)$", function(f)
parsed.fragment = f
return ""
end)
2001-09-12 20:27:40 +02:00
-- get scheme
url = string.gsub(url, "^([%w][%w%+%-%.]*)%:",
function(s) parsed.scheme = s; return "" end)
2001-09-12 20:27:40 +02:00
-- get authority
url = string.gsub(url, "^//([^/]*)", function(n)
parsed.authority = n
return ""
end)
-- get query string
url = string.gsub(url, "%?(.*)", function(q)
parsed.query = q
return ""
end)
2001-09-12 20:27:40 +02:00
-- get params
url = string.gsub(url, "%;(.*)", function(p)
parsed.params = p
return ""
end)
2004-03-26 01:18:41 +01:00
-- path is whatever was left
2001-09-12 20:27:40 +02:00
if url ~= "" then parsed.path = url end
local authority = parsed.authority
if not authority then return parsed end
authority = string.gsub(authority,"^([^@]*)@",
function(u) parsed.userinfo = u; return "" end)
authority = string.gsub(authority, ":([^:%]]*)$",
function(p) parsed.port = p; return "" end)
if authority ~= "" then
-- IPv6?
parsed.host = string.match(authority, "^%[(.+)%]$") or authority
end
2001-09-12 20:27:40 +02:00
local userinfo = parsed.userinfo
if not userinfo then return parsed end
userinfo = string.gsub(userinfo, ":([^:]*)$",
function(p) parsed.password = p; return "" end)
parsed.user = userinfo
2001-09-12 20:27:40 +02:00
return parsed
end
-----------------------------------------------------------------------------
-- Rebuilds a parsed URL from its components.
-- Components are protected if any reserved or unallowed characters are found
-- Input
-- parsed: parsed URL, as returned by parse
2001-09-12 20:27:40 +02:00
-- Returns
-- a stringing with the corresponding URL
2001-09-12 20:27:40 +02:00
-----------------------------------------------------------------------------
function build(parsed)
2004-03-26 01:18:41 +01:00
local ppath = parse_path(parsed.path or "")
local url = build_path(ppath)
2001-09-12 20:27:40 +02:00
if parsed.params then url = url .. ";" .. parsed.params end
if parsed.query then url = url .. "?" .. parsed.query end
local authority = parsed.authority
if parsed.host then
authority = parsed.host
if string.find(authority, ":") then -- IPv6?
authority = "[" .. authority .. "]"
end
if parsed.port then authority = authority .. ":" .. parsed.port end
local userinfo = parsed.userinfo
if parsed.user then
userinfo = parsed.user
if parsed.password then
userinfo = userinfo .. ":" .. parsed.password
end
end
if userinfo then authority = userinfo .. "@" .. authority end
end
2012-04-11 22:21:25 +02:00
if authority then url = "//" .. authority .. url end
2001-09-12 20:27:40 +02:00
if parsed.scheme then url = parsed.scheme .. ":" .. url end
if parsed.fragment then url = url .. "#" .. parsed.fragment end
2004-03-26 01:18:41 +01:00
-- url = string.gsub(url, "%s", "")
return url
2001-09-12 20:27:40 +02:00
end
-----------------------------------------------------------------------------
-- Builds a absolute URL from a base and a relative URL according to RFC 2396
-- Input
-- base_url
-- relative_url
-- Returns
-- corresponding absolute url
-----------------------------------------------------------------------------
function absolute(base_url, relative_url)
2005-02-27 19:30:32 +01:00
if base.type(base_url) == "table" then
2012-04-11 22:21:25 +02:00
base_parsed = base_url
2005-02-27 19:30:32 +01:00
base_url = build(base_parsed)
2001-09-12 20:27:40 +02:00
else
2005-02-27 19:30:32 +01:00
base_parsed = parse(base_url)
end
local relative_parsed = parse(relative_url)
if not base_parsed then return relative_url
elseif not relative_parsed then return base_url
elseif relative_parsed.scheme then return relative_url
else
relative_parsed.scheme = base_parsed.scheme
if not relative_parsed.authority then
relative_parsed.authority = base_parsed.authority
if not relative_parsed.path then
relative_parsed.path = base_parsed.path
if not relative_parsed.params then
relative_parsed.params = base_parsed.params
if not relative_parsed.query then
relative_parsed.query = base_parsed.query
2001-09-12 20:27:40 +02:00
end
end
else
relative_parsed.path = absolute_path(base_parsed.path or "",
2005-02-27 19:30:32 +01:00
relative_parsed.path)
2001-09-12 20:27:40 +02:00
end
end
2005-02-27 19:30:32 +01:00
return build(relative_parsed)
2001-09-12 20:27:40 +02:00
end
end
-----------------------------------------------------------------------------
-- Breaks a path into its segments, unescaping the segments
-- Input
-- path
-- Returns
-- segment: a table with one entry per segment
-----------------------------------------------------------------------------
function parse_path(path)
local parsed = {}
path = path or ""
--path = string.gsub(path, "%s", "")
string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end)
for i = 1, table.getn(parsed) do
parsed[i] = unescape(parsed[i])
end
if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end
if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end
return parsed
2001-09-12 20:27:40 +02:00
end
-----------------------------------------------------------------------------
-- Builds a path component from its segments, escaping protected characters.
-- Input
-- parsed: path segments
-- unsafe: if true, segments are not protected before path is built
2001-09-12 20:27:40 +02:00
-- Returns
-- path: corresponding path stringing
2001-09-12 20:27:40 +02:00
-----------------------------------------------------------------------------
function build_path(parsed, unsafe)
local path = ""
local n = table.getn(parsed)
if unsafe then
for i = 1, n-1 do
path = path .. parsed[i]
path = path .. "/"
end
if n > 0 then
path = path .. parsed[n]
if parsed.is_directory then path = path .. "/" end
end
else
for i = 1, n-1 do
path = path .. protect_segment(parsed[i])
path = path .. "/"
end
if n > 0 then
path = path .. protect_segment(parsed[n])
if parsed.is_directory then path = path .. "/" end
end
end
if parsed.is_absolute then path = "/" .. path end
return path
2001-09-12 20:27:40 +02:00
end