reworked absolutising, it's better, but I need to go back to the RFC at some point

This commit is contained in:
jez 2008-08-25 23:05:37 +00:00
parent 3d661b355c
commit cec3c0c659
3 changed files with 77 additions and 18 deletions

View file

@ -20,7 +20,8 @@ namespace Arabica
scheme_(rhs.scheme_),
host_(rhs.host_),
path_(rhs.path_),
port_(rhs.port_)
port_(rhs.port_),
is_absolute_(rhs.is_absolute_)
{
} // URI
@ -36,7 +37,8 @@ namespace Arabica
return scheme_ == rhs.scheme_ &&
host_ == rhs.host_ &&
path_ == rhs.path_ &&
port_ == rhs.port_;
port_ == rhs.port_ &&
is_absolute_ == rhs.is_absolute_;
} // operator==
bool operator!=(const URI& rhs) const
@ -52,17 +54,20 @@ namespace Arabica
std::swap(host_, rhs.host_);
std::swap(path_, rhs.path_);
std::swap(port_, rhs.port_);
std::swap(is_absolute_, rhs.is_absolute_);
} // swap
const std::string& scheme() const { return scheme_; }
const std::string& host() const { return host_; }
const std::string& port() const;
const std::string& path() const { return path_; }
const bool& is_absolute() const { return is_absolute_; }
std::string as_string() const;
private:
void parse(const std::string& URI);
void parse_uri(const std::string& URI);
void parseAuthority(std::string::const_iterator& u, std::string::const_iterator& ue);
void absolutise(URI& relURI);
void combinePath(const std::string& path);
@ -71,6 +76,7 @@ namespace Arabica
std::string host_;
std::string path_;
std::string port_;
bool is_absolute_;
}; // class URI
} // namespace io
} // namespace Arabica

View file

@ -8,22 +8,30 @@ namespace {
const std::string ZERO = "0";
const std::string PORT_EIGHTY = "80";
const std::string PORT_443 = "443";
const std::string SCHEME_HTTP = "http";
const std::string SCHEME_HTTPS = "https";
const std::string SCHEME_FILE = "file";
const std::string COLON = ":";
const char FORWARD_SLASH = '/';
const std::string& wellKnownPort(const std::string& scheme)
{
if(scheme.empty())
return ZERO;
if(scheme == "http")
if(scheme == SCHEME_HTTP)
return PORT_EIGHTY;
if(scheme == "https")
if(scheme == SCHEME_HTTPS)
return PORT_443;
return ZERO;
} // wellKnownPort
} // namespace
URI::URI(const std::string& uri)
URI::URI(const std::string& uri) :
is_absolute_(false)
{
parse(uri);
} // URI
@ -32,7 +40,8 @@ URI::URI(const URI& base, const std::string& relativeUrl) :
scheme_(base.scheme_),
host_(base.host_),
path_(base.path_),
port_(base.port_)
port_(base.port_),
is_absolute_(base.is_absolute_)
{
if(!relativeUrl.empty())
{
@ -52,12 +61,14 @@ std::string URI::as_string() const
{
std::string str;
if(!scheme_.empty())
str.append(scheme_).append("://");
str.append(scheme_).append(COLON);
if(is_absolute_)
str.append("//");
if(!host_.empty())
{
str.append(host_);
if(!port_.empty())
str.append(":").append(port_);
str.append(COLON).append(port_);
}
str.append(path_);
return str;
@ -66,15 +77,24 @@ std::string URI::as_string() const
void fixSlashes(std::string& path)
{
for(int i = path.find('\\'); i != std::string::npos; i = path.find('\\', i))
path[i] = '/';
path[i] = FORWARD_SLASH;
} // fixSlashes
void URI::parse(const std::string& uri)
{
parse_uri(uri);
is_absolute_ = (!scheme_.empty() && !host_.empty()) ||
(((scheme_ == SCHEME_FILE) && (!path_.empty())) &&
((path_[0] == FORWARD_SLASH) || (path_[1] == ':')));
} // parse
void URI::parse_uri(const std::string& uri)
{
// I'd like to use something a bit stronger - http://code.google.com/p/uri-grammar/
// but that would put a Boost Spirit dependence right in the core, which I'm not prepared to do at the moment
int d = uri.find_first_of(":");
int d = uri.find_first_of(COLON);
if(d == std::string::npos)
{
path_ = uri;
@ -87,7 +107,7 @@ void URI::parse(const std::string& uri)
// looks like a windows file path
path_ = uri;
fixSlashes(path_);
scheme_ = "file";
scheme_ = SCHEME_FILE;
return;
} // if ...
@ -97,7 +117,7 @@ void URI::parse(const std::string& uri)
std::string::const_iterator ue = uri.end();
++u;
if(*u == '/' && *(u+1) == '/')
if(*u == FORWARD_SLASH && *(u+1) == FORWARD_SLASH)
{
u += 2;
parseAuthority(u, ue);
@ -108,7 +128,7 @@ void URI::parse(const std::string& uri)
void URI::parseAuthority(std::string::const_iterator& u, std::string::const_iterator& ue)
{
std::string::const_iterator slash = std::find(u, ue, '/');
std::string::const_iterator slash = std::find(u, ue, FORWARD_SLASH);
if(slash == ue)
return;
@ -123,13 +143,14 @@ void URI::parseAuthority(std::string::const_iterator& u, std::string::const_iter
void URI::absolutise(URI& relative)
{
if(!relative.scheme().empty())
if((relative.is_absolute()) ||
((!relative.scheme().empty()) && (relative.scheme() != scheme_)))
{
swap(relative);
return;
}
if(relative.path_[0] == '/')
if(relative.path_[0] == FORWARD_SLASH)
path_ = relative.path_;
else
combinePath(relative.path_);
@ -137,8 +158,8 @@ void URI::absolutise(URI& relative)
void URI::combinePath(const std::string& relPath)
{
if(*(path_.rbegin()) != '/')
path_.erase(path_.rfind('/')+1);
if(*(path_.rbegin()) != FORWARD_SLASH)
path_.erase(path_.rfind(FORWARD_SLASH)+1);
std::string::size_type from = path_.length() - 1;
path_.append(relPath);
@ -146,7 +167,7 @@ void URI::combinePath(const std::string& relPath)
int dots = path_.find("/../", from);
while(dots != std::string::npos)
{
int preceding_slash = (dots > 0) ? path_.rfind('/', dots-1) : 0;
int preceding_slash = (dots > 0) ? path_.rfind(FORWARD_SLASH, dots-1) : 0;
path_.erase(preceding_slash, dots+3-preceding_slash);
dots = path_.find("/../", preceding_slash);
} // while

View file

@ -21,6 +21,7 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("", u.scheme());
assertEquals("0", u.port());
assertEquals(false, u.is_absolute());
assertEquals("woo", u.as_string());
} // test1
@ -32,6 +33,7 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("", u.scheme());
assertEquals("0", u.port());
assertEquals(false, u.is_absolute());
assertEquals("woo.xml", u.as_string());
} // test2
@ -43,6 +45,7 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("", u.scheme());
assertEquals("0", u.port());
assertEquals(false, u.is_absolute());
assertEquals("woo/woo.xml", u.as_string());
} // test3
@ -54,6 +57,7 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("", u.scheme());
assertEquals("0", u.port());
assertEquals(false, u.is_absolute());
assertEquals("/woo/woo.xml", u.as_string());
} // test4
@ -65,6 +69,7 @@ class URITest : public TestCase
assertEquals("localhost", u.host());
assertEquals("http", u.scheme());
assertEquals("80", u.port());
assertEquals(true, u.is_absolute());
assertEquals("http://localhost/woo/woo.xml", u.as_string());
}
@ -76,6 +81,7 @@ class URITest : public TestCase
assertEquals("localhost", u.host());
assertEquals("http", u.scheme());
assertEquals("8080", u.port());
assertEquals(true, u.is_absolute());
assertEquals("http://localhost:8080/woo/woo.xml", u.as_string());
}
@ -87,6 +93,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", u.host());
assertEquals("http", u.scheme());
assertEquals("80", u.port());
assertEquals(true, u.is_absolute());
assertEquals("http://www.jezuk.co.uk/arabica/news", u.as_string());
}
@ -98,6 +105,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", u.host());
assertEquals("http", u.scheme());
assertEquals("8000", u.port());
assertEquals(true, u.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/arabica/news", u.as_string());
}
@ -110,6 +118,7 @@ class URITest : public TestCase
assertEquals("localhost", r.host());
assertEquals("http", r.scheme());
assertEquals("80", r.port());
assertEquals(true, u.is_absolute());
assertEquals("http://localhost/nobby", r.as_string());
} // test9
@ -122,6 +131,7 @@ class URITest : public TestCase
assertEquals("localhost", r.host());
assertEquals("http", r.scheme());
assertEquals("80", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://localhost/", r.as_string());
} // test10
@ -134,6 +144,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/trouser/press", r.as_string());
} // test11
@ -146,6 +157,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/arabica/trouser/press", r.as_string());
} // test12
@ -158,6 +170,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/arabica/trouser/press", r.as_string());
} // test13
@ -170,6 +183,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/trouser/press", r.as_string());
} // test14
@ -182,6 +196,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/trouser/press", r.as_string());
} // test15
@ -194,6 +209,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/arabica/news/trouser/press", r.as_string());
} // test16
@ -206,6 +222,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/arabica/news/trouser/press", r.as_string());
} // test17
@ -218,6 +235,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/arabica/trouser/press", r.as_string());
} // test18
@ -230,6 +248,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, r.is_absolute());
} // test19
void test20()
@ -241,6 +260,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", r.host());
assertEquals("http", r.scheme());
assertEquals("8000", r.port());
assertEquals(true, u.is_absolute());
assertEquals("http://www.jezuk.co.uk:8000/trouser/press", r.as_string());
} // test20
@ -255,6 +275,7 @@ class URITest : public TestCase
assertEquals("www.jezuk.co.uk", u.host());
assertEquals("http", u.scheme());
assertEquals("80", u.port());
assertEquals(true, u.is_absolute());
} // test21
void test22()
@ -348,6 +369,7 @@ class URITest : public TestCase
assertEquals("file", u.scheme());
assertEquals("0", u.port());
assertEquals("file://d:/work/jezuk/arabica/file.xml", u.as_string());
assertEquals(true, u.is_absolute());
} // test32
void test33()
@ -357,6 +379,8 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("file", u.scheme());
assertEquals("/woot.txt", u.path());
assertEquals(true, u.is_absolute());
assertEquals("file:///woot.txt", u.as_string());
} // test33
void test34()
@ -366,6 +390,8 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("file", u.scheme());
assertEquals("/woot.txt", u.path());
assertEquals(true, u.is_absolute());
assertEquals("file:///woot.txt", u.as_string());
} // test34
void test35()
@ -375,6 +401,8 @@ class URITest : public TestCase
assertEquals("", u.host());
assertEquals("file", u.scheme());
assertEquals("woot.txt", u.path());
assertEquals(false, u.is_absolute());
assertEquals("file:woot.txt", u.as_string());
} // URI
void test36()
@ -384,6 +412,8 @@ class URITest : public TestCase
assertEquals("localhost", u.host());
assertEquals("file", u.scheme());
assertEquals("/woot.txt", u.path());
assertEquals(true, u.is_absolute());
assertEquals("file://localhost/woot.txt", u.as_string());
} // test36
void test37()
@ -394,6 +424,7 @@ class URITest : public TestCase
assertEquals("", r.host());
assertEquals("file", r.scheme());
assertEquals("/woot/woot/woo.txt", r.path());
assertEquals(true, r.is_absolute());
} // test37
void test38()
@ -404,6 +435,7 @@ class URITest : public TestCase
assertEquals("", r.host());
assertEquals("file", r.scheme());
assertEquals("/woot/woot/woo.txt", r.path());
assertEquals(true, r.is_absolute());
} // test38
}; // class URITest