merged with mangle-dev branch

This commit is contained in:
jez 2007-07-19 16:59:26 +00:00
parent 59dee666d8
commit 5cef2d9e75
5 changed files with 167 additions and 26 deletions

View file

@ -13,15 +13,14 @@ cc_sources = arabica.cpp \
Utils/utf16utf8codecvt.cpp \
Utils/utf8iso88591codecvt.cpp \
Utils/utf8ucs2codecvt.cpp \
Utils/uri.cpp \
XML/XMLCharacterClasses.cpp
library_includedir=$(includedir)
INCLUDES = -I$(top_srcdir)/include $(PARSER_HEADERS) $(BOOST_CPPFLAGS)
AM_CPPFLAGS = -I$(top_srcdir)/include @PARSER_HEADERS@ $(BOOST_CPPFLAGS)
lib_LTLIBRARIES = libarabica.la
libarabica_la_SOURCES= $(cc_sources)
libarabica_la_LDFLAGS= $(PARSER_LIBS)
libarabica_la_LDFLAGS= @PARSER_LIBS@

View file

@ -10,6 +10,8 @@
#include <istream>
#include <fstream>
#include <Utils/socket_stream.h>
#include <Utils/uri.hpp>
#include <cmath>
InputSourceResolver::InputSourceResolver(const SAX::InputSource& inputSource) :
deleteStream_(false),
@ -30,11 +32,11 @@ void InputSourceResolver::open(const std::string& publicId,
return;
}
// does it look like a URL?
std::string::size_type colonIndex = systemId.find("://");
if(colonIndex != std::string::npos)
// does it look like a URI?
Arabica::io::URI url(systemId);
if(!url.scheme().empty())
{
URIResolver res = findResolver(systemId.substr(0, colonIndex));
URIResolver res = findResolver(url.scheme());
if(res)
byteStream_ = res(systemId);
if(byteStream_)
@ -45,7 +47,7 @@ void InputSourceResolver::open(const std::string& publicId,
} // if ...
// try and open it as a file
std::ifstream* ifs = new std::ifstream(systemId.c_str());
std::ifstream* ifs = new std::ifstream(url.path().c_str());
if(ifs->is_open())
{
deleteStream_ = true;
@ -85,10 +87,10 @@ InputSourceResolver::URIResolver InputSourceResolver::findResolver(std::string m
namespace
{
std::istream* fileResolver(const std::string& fileURL)
std::istream* fileResolver(const std::string& fileURI)
{
int colon = fileURL.find("://");
std::string fileName = fileURL.substr(colon+3);
Arabica::io::URI url(fileURI);
std::string fileName = url.path();
std::ifstream* ifs = new std::ifstream(fileName.c_str());
if(ifs->is_open())
@ -113,7 +115,7 @@ namespace
static bool fileReg = InputSourceResolver::registerResolver("file", fileResolver);
std::istream* httpResolver(const std::string& httpURL)
std::istream* httpResolver(const std::string& httpURI)
{
#ifdef ARABICA_USE_WINSOCK
WORD wVersionRequested;
@ -126,23 +128,16 @@ namespace
return 0;
#endif
int colon1 = httpURL.find("://");
colon1 += 3;
//int colon2 = httpURL.find("://", colon1);
int slash1 = httpURL.find("/", colon1);
Arabica::io::URI url(httpURI);
std::string hostName = httpURL.substr(colon1, slash1 - (colon1));
std::string path = httpURL.substr(slash1);
Arabica::socketstream* ifs = new Arabica::socketstream(hostName.c_str(), 80);
Arabica::socketstream* ifs = new Arabica::socketstream(url.host().c_str(), std::atoi(url.port().c_str()));
if(!ifs->is_open())
return 0;
*ifs << "GET " << path << " HTTP/1.0" << std::endl;
*ifs << "Host: " << hostName << std::endl;
*ifs << "GET " << url.path() << " HTTP/1.0" << std::endl;
*ifs << "Host: " << url.host() << std::endl;
*ifs << "Connection: close" << std::endl;
*ifs << std::endl;
char buffer[1024];
do
{

View file

@ -64,6 +64,12 @@ void lwit_processingInstruction(void *user_data, const xmlChar* target, const xm
p->SAXprocessingInstruction(target, data);
} // lwit_processingInstruction
void lwit_comment(void *user_data, const xmlChar* comment)
{
libxml2_base* p = reinterpret_cast<libxml2_base*>(user_data);
p->SAXcomment(comment);
} // lwit_comment
void lwit_warning(void *user_data, const char* fmt, ...)
{
va_list arg;
@ -168,7 +174,7 @@ static xmlSAXHandler saxHandler = {
lwit_characters, // charactersSAXFunc characters;
lwit_ignorableWhitespace, // ignorableWhitespaceSAXFunc ignorableWhitespace;
lwit_processingInstruction, // processingInstructionSAXFunc processingInstruction;
0, // commentSAXFunc comment;
lwit_comment, // commentSAXFunc comment;
lwit_warning, // warningSAXFunc warning;
lwit_error, // errorSAXFunc error;
lwit_fatalError, // fatalErrorSAXFunc fatalError;

View file

@ -82,7 +82,7 @@ std::codecvt_base::result base64codecvt::do_in(std::mbstate_t& state,
from_next = from;
to_next = to;
while((from_next != from_end) && (to != to_limit))
while((from_next != from_end) && (to_next != to_limit))
{
char b = *from_next++;
size_t i = base64_charset.find(b);

141
src/Utils/uri.cpp Normal file
View file

@ -0,0 +1,141 @@
#include <algorithm>
#include <Utils/uri.hpp>
using namespace Arabica::io;
namespace {
const std::string ZERO = "0";
const std::string PORT_EIGHTY = "80";
const std::string PORT_443 = "443";
const std::string& wellKnownPort(const std::string& scheme)
{
if(scheme.empty())
return ZERO;
if(scheme == "http")
return PORT_EIGHTY;
if(scheme == "https")
return PORT_443;
return ZERO;
} // wellKnownPort
} // namespace
URI::URI(const std::string& uri)
{
parse(uri);
} // URI
URI::URI(const URI& base, const std::string& relativeUrl) :
scheme_(base.scheme_),
host_(base.host_),
path_(base.path_),
port_(base.port_)
{
URI relUrl(relativeUrl);
absolutise(relUrl);
} // URI
const std::string& URI::port() const
{
if(port_.empty())
return wellKnownPort(scheme_);
return port_;
} // port()
std::string URI::as_string() const
{
std::string str;
if(!scheme_.empty())
str.append(scheme_).append("://");
if(!host_.empty())
{
str.append(host_);
if(!port_.empty())
str.append(":").append(port_);
}
str.append(path_);
return str;
} // as_string
void URI::parse(const std::string& uri)
{
// I'd like to use something a bit stronger - http://code.google.com/p/uri-grammar/
// but that would put a Boost Spirit dependence right in the core, which I'm not prepared to do at the moment
int d = uri.find_first_of(":");
if(d == std::string::npos)
{
path_ = uri;
return;
} // if ...
scheme_ = uri.substr(0, d);
std::string::const_iterator u = uri.begin() + d;
std::string::const_iterator ue = uri.end();
++u;
if(*u == '/' && *(u+1) == '/')
{
u += 2;
parseAuthority(u, ue);
} // if ...
path_.append(u, ue);
} // parse
void URI::parseAuthority(std::string::const_iterator& u, std::string::const_iterator& ue)
{
std::string::const_iterator slash = std::find(u, ue, '/');
if(slash == ue)
return;
std::string::const_iterator colon = std::find(u, slash, ':');
host_.append(u, colon);
if(colon != slash)
port_.append(colon+1, slash);
u = slash;
} // parseAuthority
void URI::absolutise(URI& relative)
{
if(!relative.scheme().empty())
{
swap(relative);
return;
}
if(relative.path_[0] == '/')
path_ = relative.path_;
else
combinePath(relative.path_);
} // absolutise
void URI::combinePath(const std::string& relPath)
{
if(*(path_.rbegin()) != '/')
path_.erase(path_.rfind('/')+1);
path_.append(relPath);
int dots = path_.find("/../");
while(dots != std::string::npos)
{
int preceding_slash = (dots > 0) ? path_.rfind('/', dots-1) : 0;
path_.erase(preceding_slash, dots+3-preceding_slash);
dots = path_.find("/../");
} // while
int dot = path_.find("/./");
while(dot != std::string::npos)
{
path_.erase(dot, 2);
dot = path_.find("/./", dot);
}
} // combinePath