mirror of
https://github.com/jezhiggins/arabica
synced 2025-02-05 20:45:56 +01:00
merged with mangle-dev branch
This commit is contained in:
parent
59dee666d8
commit
5cef2d9e75
5 changed files with 167 additions and 26 deletions
|
@ -13,15 +13,14 @@ cc_sources = arabica.cpp \
|
||||||
Utils/utf16utf8codecvt.cpp \
|
Utils/utf16utf8codecvt.cpp \
|
||||||
Utils/utf8iso88591codecvt.cpp \
|
Utils/utf8iso88591codecvt.cpp \
|
||||||
Utils/utf8ucs2codecvt.cpp \
|
Utils/utf8ucs2codecvt.cpp \
|
||||||
|
Utils/uri.cpp \
|
||||||
XML/XMLCharacterClasses.cpp
|
XML/XMLCharacterClasses.cpp
|
||||||
|
|
||||||
library_includedir=$(includedir)
|
AM_CPPFLAGS = -I$(top_srcdir)/include @PARSER_HEADERS@ $(BOOST_CPPFLAGS)
|
||||||
|
|
||||||
INCLUDES = -I$(top_srcdir)/include $(PARSER_HEADERS) $(BOOST_CPPFLAGS)
|
|
||||||
|
|
||||||
lib_LTLIBRARIES = libarabica.la
|
lib_LTLIBRARIES = libarabica.la
|
||||||
libarabica_la_SOURCES= $(cc_sources)
|
libarabica_la_SOURCES= $(cc_sources)
|
||||||
libarabica_la_LDFLAGS= $(PARSER_LIBS)
|
libarabica_la_LDFLAGS= @PARSER_LIBS@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,8 @@
|
||||||
#include <istream>
|
#include <istream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <Utils/socket_stream.h>
|
#include <Utils/socket_stream.h>
|
||||||
|
#include <Utils/uri.hpp>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
InputSourceResolver::InputSourceResolver(const SAX::InputSource& inputSource) :
|
InputSourceResolver::InputSourceResolver(const SAX::InputSource& inputSource) :
|
||||||
deleteStream_(false),
|
deleteStream_(false),
|
||||||
|
@ -30,11 +32,11 @@ void InputSourceResolver::open(const std::string& publicId,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// does it look like a URL?
|
// does it look like a URI?
|
||||||
std::string::size_type colonIndex = systemId.find("://");
|
Arabica::io::URI url(systemId);
|
||||||
if(colonIndex != std::string::npos)
|
if(!url.scheme().empty())
|
||||||
{
|
{
|
||||||
URIResolver res = findResolver(systemId.substr(0, colonIndex));
|
URIResolver res = findResolver(url.scheme());
|
||||||
if(res)
|
if(res)
|
||||||
byteStream_ = res(systemId);
|
byteStream_ = res(systemId);
|
||||||
if(byteStream_)
|
if(byteStream_)
|
||||||
|
@ -45,7 +47,7 @@ void InputSourceResolver::open(const std::string& publicId,
|
||||||
} // if ...
|
} // if ...
|
||||||
|
|
||||||
// try and open it as a file
|
// try and open it as a file
|
||||||
std::ifstream* ifs = new std::ifstream(systemId.c_str());
|
std::ifstream* ifs = new std::ifstream(url.path().c_str());
|
||||||
if(ifs->is_open())
|
if(ifs->is_open())
|
||||||
{
|
{
|
||||||
deleteStream_ = true;
|
deleteStream_ = true;
|
||||||
|
@ -85,10 +87,10 @@ InputSourceResolver::URIResolver InputSourceResolver::findResolver(std::string m
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
std::istream* fileResolver(const std::string& fileURL)
|
std::istream* fileResolver(const std::string& fileURI)
|
||||||
{
|
{
|
||||||
int colon = fileURL.find("://");
|
Arabica::io::URI url(fileURI);
|
||||||
std::string fileName = fileURL.substr(colon+3);
|
std::string fileName = url.path();
|
||||||
|
|
||||||
std::ifstream* ifs = new std::ifstream(fileName.c_str());
|
std::ifstream* ifs = new std::ifstream(fileName.c_str());
|
||||||
if(ifs->is_open())
|
if(ifs->is_open())
|
||||||
|
@ -113,7 +115,7 @@ namespace
|
||||||
|
|
||||||
static bool fileReg = InputSourceResolver::registerResolver("file", fileResolver);
|
static bool fileReg = InputSourceResolver::registerResolver("file", fileResolver);
|
||||||
|
|
||||||
std::istream* httpResolver(const std::string& httpURL)
|
std::istream* httpResolver(const std::string& httpURI)
|
||||||
{
|
{
|
||||||
#ifdef ARABICA_USE_WINSOCK
|
#ifdef ARABICA_USE_WINSOCK
|
||||||
WORD wVersionRequested;
|
WORD wVersionRequested;
|
||||||
|
@ -126,23 +128,16 @@ namespace
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int colon1 = httpURL.find("://");
|
Arabica::io::URI url(httpURI);
|
||||||
colon1 += 3;
|
|
||||||
//int colon2 = httpURL.find("://", colon1);
|
|
||||||
int slash1 = httpURL.find("/", colon1);
|
|
||||||
|
|
||||||
std::string hostName = httpURL.substr(colon1, slash1 - (colon1));
|
Arabica::socketstream* ifs = new Arabica::socketstream(url.host().c_str(), std::atoi(url.port().c_str()));
|
||||||
std::string path = httpURL.substr(slash1);
|
|
||||||
|
|
||||||
Arabica::socketstream* ifs = new Arabica::socketstream(hostName.c_str(), 80);
|
|
||||||
if(!ifs->is_open())
|
if(!ifs->is_open())
|
||||||
return 0;
|
return 0;
|
||||||
*ifs << "GET " << path << " HTTP/1.0" << std::endl;
|
*ifs << "GET " << url.path() << " HTTP/1.0" << std::endl;
|
||||||
*ifs << "Host: " << hostName << std::endl;
|
*ifs << "Host: " << url.host() << std::endl;
|
||||||
*ifs << "Connection: close" << std::endl;
|
*ifs << "Connection: close" << std::endl;
|
||||||
*ifs << std::endl;
|
*ifs << std::endl;
|
||||||
|
|
||||||
|
|
||||||
char buffer[1024];
|
char buffer[1024];
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
|
|
@ -64,6 +64,12 @@ void lwit_processingInstruction(void *user_data, const xmlChar* target, const xm
|
||||||
p->SAXprocessingInstruction(target, data);
|
p->SAXprocessingInstruction(target, data);
|
||||||
} // lwit_processingInstruction
|
} // lwit_processingInstruction
|
||||||
|
|
||||||
|
void lwit_comment(void *user_data, const xmlChar* comment)
|
||||||
|
{
|
||||||
|
libxml2_base* p = reinterpret_cast<libxml2_base*>(user_data);
|
||||||
|
p->SAXcomment(comment);
|
||||||
|
} // lwit_comment
|
||||||
|
|
||||||
void lwit_warning(void *user_data, const char* fmt, ...)
|
void lwit_warning(void *user_data, const char* fmt, ...)
|
||||||
{
|
{
|
||||||
va_list arg;
|
va_list arg;
|
||||||
|
@ -168,7 +174,7 @@ static xmlSAXHandler saxHandler = {
|
||||||
lwit_characters, // charactersSAXFunc characters;
|
lwit_characters, // charactersSAXFunc characters;
|
||||||
lwit_ignorableWhitespace, // ignorableWhitespaceSAXFunc ignorableWhitespace;
|
lwit_ignorableWhitespace, // ignorableWhitespaceSAXFunc ignorableWhitespace;
|
||||||
lwit_processingInstruction, // processingInstructionSAXFunc processingInstruction;
|
lwit_processingInstruction, // processingInstructionSAXFunc processingInstruction;
|
||||||
0, // commentSAXFunc comment;
|
lwit_comment, // commentSAXFunc comment;
|
||||||
lwit_warning, // warningSAXFunc warning;
|
lwit_warning, // warningSAXFunc warning;
|
||||||
lwit_error, // errorSAXFunc error;
|
lwit_error, // errorSAXFunc error;
|
||||||
lwit_fatalError, // fatalErrorSAXFunc fatalError;
|
lwit_fatalError, // fatalErrorSAXFunc fatalError;
|
||||||
|
|
|
@ -82,7 +82,7 @@ std::codecvt_base::result base64codecvt::do_in(std::mbstate_t& state,
|
||||||
from_next = from;
|
from_next = from;
|
||||||
to_next = to;
|
to_next = to;
|
||||||
|
|
||||||
while((from_next != from_end) && (to != to_limit))
|
while((from_next != from_end) && (to_next != to_limit))
|
||||||
{
|
{
|
||||||
char b = *from_next++;
|
char b = *from_next++;
|
||||||
size_t i = base64_charset.find(b);
|
size_t i = base64_charset.find(b);
|
||||||
|
|
141
src/Utils/uri.cpp
Normal file
141
src/Utils/uri.cpp
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <Utils/uri.hpp>
|
||||||
|
|
||||||
|
using namespace Arabica::io;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const std::string ZERO = "0";
|
||||||
|
const std::string PORT_EIGHTY = "80";
|
||||||
|
const std::string PORT_443 = "443";
|
||||||
|
|
||||||
|
const std::string& wellKnownPort(const std::string& scheme)
|
||||||
|
{
|
||||||
|
if(scheme.empty())
|
||||||
|
return ZERO;
|
||||||
|
|
||||||
|
if(scheme == "http")
|
||||||
|
return PORT_EIGHTY;
|
||||||
|
if(scheme == "https")
|
||||||
|
return PORT_443;
|
||||||
|
|
||||||
|
return ZERO;
|
||||||
|
} // wellKnownPort
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
URI::URI(const std::string& uri)
|
||||||
|
{
|
||||||
|
parse(uri);
|
||||||
|
} // URI
|
||||||
|
|
||||||
|
URI::URI(const URI& base, const std::string& relativeUrl) :
|
||||||
|
scheme_(base.scheme_),
|
||||||
|
host_(base.host_),
|
||||||
|
path_(base.path_),
|
||||||
|
port_(base.port_)
|
||||||
|
{
|
||||||
|
URI relUrl(relativeUrl);
|
||||||
|
absolutise(relUrl);
|
||||||
|
} // URI
|
||||||
|
|
||||||
|
const std::string& URI::port() const
|
||||||
|
{
|
||||||
|
if(port_.empty())
|
||||||
|
return wellKnownPort(scheme_);
|
||||||
|
return port_;
|
||||||
|
} // port()
|
||||||
|
|
||||||
|
std::string URI::as_string() const
|
||||||
|
{
|
||||||
|
std::string str;
|
||||||
|
if(!scheme_.empty())
|
||||||
|
str.append(scheme_).append("://");
|
||||||
|
if(!host_.empty())
|
||||||
|
{
|
||||||
|
str.append(host_);
|
||||||
|
if(!port_.empty())
|
||||||
|
str.append(":").append(port_);
|
||||||
|
}
|
||||||
|
str.append(path_);
|
||||||
|
return str;
|
||||||
|
} // as_string
|
||||||
|
|
||||||
|
|
||||||
|
void URI::parse(const std::string& uri)
|
||||||
|
{
|
||||||
|
// I'd like to use something a bit stronger - http://code.google.com/p/uri-grammar/
|
||||||
|
// but that would put a Boost Spirit dependence right in the core, which I'm not prepared to do at the moment
|
||||||
|
|
||||||
|
int d = uri.find_first_of(":");
|
||||||
|
if(d == std::string::npos)
|
||||||
|
{
|
||||||
|
path_ = uri;
|
||||||
|
return;
|
||||||
|
} // if ...
|
||||||
|
|
||||||
|
scheme_ = uri.substr(0, d);
|
||||||
|
|
||||||
|
std::string::const_iterator u = uri.begin() + d;
|
||||||
|
std::string::const_iterator ue = uri.end();
|
||||||
|
|
||||||
|
++u;
|
||||||
|
if(*u == '/' && *(u+1) == '/')
|
||||||
|
{
|
||||||
|
u += 2;
|
||||||
|
parseAuthority(u, ue);
|
||||||
|
} // if ...
|
||||||
|
|
||||||
|
path_.append(u, ue);
|
||||||
|
} // parse
|
||||||
|
|
||||||
|
void URI::parseAuthority(std::string::const_iterator& u, std::string::const_iterator& ue)
|
||||||
|
{
|
||||||
|
std::string::const_iterator slash = std::find(u, ue, '/');
|
||||||
|
if(slash == ue)
|
||||||
|
return;
|
||||||
|
|
||||||
|
std::string::const_iterator colon = std::find(u, slash, ':');
|
||||||
|
host_.append(u, colon);
|
||||||
|
|
||||||
|
if(colon != slash)
|
||||||
|
port_.append(colon+1, slash);
|
||||||
|
|
||||||
|
u = slash;
|
||||||
|
} // parseAuthority
|
||||||
|
|
||||||
|
void URI::absolutise(URI& relative)
|
||||||
|
{
|
||||||
|
if(!relative.scheme().empty())
|
||||||
|
{
|
||||||
|
swap(relative);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(relative.path_[0] == '/')
|
||||||
|
path_ = relative.path_;
|
||||||
|
else
|
||||||
|
combinePath(relative.path_);
|
||||||
|
} // absolutise
|
||||||
|
|
||||||
|
void URI::combinePath(const std::string& relPath)
|
||||||
|
{
|
||||||
|
if(*(path_.rbegin()) != '/')
|
||||||
|
path_.erase(path_.rfind('/')+1);
|
||||||
|
|
||||||
|
path_.append(relPath);
|
||||||
|
|
||||||
|
int dots = path_.find("/../");
|
||||||
|
while(dots != std::string::npos)
|
||||||
|
{
|
||||||
|
int preceding_slash = (dots > 0) ? path_.rfind('/', dots-1) : 0;
|
||||||
|
path_.erase(preceding_slash, dots+3-preceding_slash);
|
||||||
|
dots = path_.find("/../");
|
||||||
|
} // while
|
||||||
|
|
||||||
|
int dot = path_.find("/./");
|
||||||
|
while(dot != std::string::npos)
|
||||||
|
{
|
||||||
|
path_.erase(dot, 2);
|
||||||
|
dot = path_.find("/./", dot);
|
||||||
|
}
|
||||||
|
} // combinePath
|
Loading…
Add table
Reference in a new issue