mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-29 08:36:45 +01:00
merged with mangle-dev branch
This commit is contained in:
parent
59dee666d8
commit
5cef2d9e75
5 changed files with 167 additions and 26 deletions
|
@ -13,15 +13,14 @@ cc_sources = arabica.cpp \
|
|||
Utils/utf16utf8codecvt.cpp \
|
||||
Utils/utf8iso88591codecvt.cpp \
|
||||
Utils/utf8ucs2codecvt.cpp \
|
||||
Utils/uri.cpp \
|
||||
XML/XMLCharacterClasses.cpp
|
||||
|
||||
library_includedir=$(includedir)
|
||||
|
||||
INCLUDES = -I$(top_srcdir)/include $(PARSER_HEADERS) $(BOOST_CPPFLAGS)
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/include @PARSER_HEADERS@ $(BOOST_CPPFLAGS)
|
||||
|
||||
lib_LTLIBRARIES = libarabica.la
|
||||
libarabica_la_SOURCES= $(cc_sources)
|
||||
libarabica_la_LDFLAGS= $(PARSER_LIBS)
|
||||
libarabica_la_LDFLAGS= @PARSER_LIBS@
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <istream>
|
||||
#include <fstream>
|
||||
#include <Utils/socket_stream.h>
|
||||
#include <Utils/uri.hpp>
|
||||
#include <cmath>
|
||||
|
||||
InputSourceResolver::InputSourceResolver(const SAX::InputSource& inputSource) :
|
||||
deleteStream_(false),
|
||||
|
@ -30,11 +32,11 @@ void InputSourceResolver::open(const std::string& publicId,
|
|||
return;
|
||||
}
|
||||
|
||||
// does it look like a URL?
|
||||
std::string::size_type colonIndex = systemId.find("://");
|
||||
if(colonIndex != std::string::npos)
|
||||
// does it look like a URI?
|
||||
Arabica::io::URI url(systemId);
|
||||
if(!url.scheme().empty())
|
||||
{
|
||||
URIResolver res = findResolver(systemId.substr(0, colonIndex));
|
||||
URIResolver res = findResolver(url.scheme());
|
||||
if(res)
|
||||
byteStream_ = res(systemId);
|
||||
if(byteStream_)
|
||||
|
@ -45,7 +47,7 @@ void InputSourceResolver::open(const std::string& publicId,
|
|||
} // if ...
|
||||
|
||||
// try and open it as a file
|
||||
std::ifstream* ifs = new std::ifstream(systemId.c_str());
|
||||
std::ifstream* ifs = new std::ifstream(url.path().c_str());
|
||||
if(ifs->is_open())
|
||||
{
|
||||
deleteStream_ = true;
|
||||
|
@ -85,10 +87,10 @@ InputSourceResolver::URIResolver InputSourceResolver::findResolver(std::string m
|
|||
|
||||
namespace
|
||||
{
|
||||
std::istream* fileResolver(const std::string& fileURL)
|
||||
std::istream* fileResolver(const std::string& fileURI)
|
||||
{
|
||||
int colon = fileURL.find("://");
|
||||
std::string fileName = fileURL.substr(colon+3);
|
||||
Arabica::io::URI url(fileURI);
|
||||
std::string fileName = url.path();
|
||||
|
||||
std::ifstream* ifs = new std::ifstream(fileName.c_str());
|
||||
if(ifs->is_open())
|
||||
|
@ -113,7 +115,7 @@ namespace
|
|||
|
||||
static bool fileReg = InputSourceResolver::registerResolver("file", fileResolver);
|
||||
|
||||
std::istream* httpResolver(const std::string& httpURL)
|
||||
std::istream* httpResolver(const std::string& httpURI)
|
||||
{
|
||||
#ifdef ARABICA_USE_WINSOCK
|
||||
WORD wVersionRequested;
|
||||
|
@ -126,23 +128,16 @@ namespace
|
|||
return 0;
|
||||
#endif
|
||||
|
||||
int colon1 = httpURL.find("://");
|
||||
colon1 += 3;
|
||||
//int colon2 = httpURL.find("://", colon1);
|
||||
int slash1 = httpURL.find("/", colon1);
|
||||
Arabica::io::URI url(httpURI);
|
||||
|
||||
std::string hostName = httpURL.substr(colon1, slash1 - (colon1));
|
||||
std::string path = httpURL.substr(slash1);
|
||||
|
||||
Arabica::socketstream* ifs = new Arabica::socketstream(hostName.c_str(), 80);
|
||||
Arabica::socketstream* ifs = new Arabica::socketstream(url.host().c_str(), std::atoi(url.port().c_str()));
|
||||
if(!ifs->is_open())
|
||||
return 0;
|
||||
*ifs << "GET " << path << " HTTP/1.0" << std::endl;
|
||||
*ifs << "Host: " << hostName << std::endl;
|
||||
*ifs << "GET " << url.path() << " HTTP/1.0" << std::endl;
|
||||
*ifs << "Host: " << url.host() << std::endl;
|
||||
*ifs << "Connection: close" << std::endl;
|
||||
*ifs << std::endl;
|
||||
|
||||
|
||||
char buffer[1024];
|
||||
do
|
||||
{
|
||||
|
|
|
@ -64,6 +64,12 @@ void lwit_processingInstruction(void *user_data, const xmlChar* target, const xm
|
|||
p->SAXprocessingInstruction(target, data);
|
||||
} // lwit_processingInstruction
|
||||
|
||||
void lwit_comment(void *user_data, const xmlChar* comment)
|
||||
{
|
||||
libxml2_base* p = reinterpret_cast<libxml2_base*>(user_data);
|
||||
p->SAXcomment(comment);
|
||||
} // lwit_comment
|
||||
|
||||
void lwit_warning(void *user_data, const char* fmt, ...)
|
||||
{
|
||||
va_list arg;
|
||||
|
@ -168,7 +174,7 @@ static xmlSAXHandler saxHandler = {
|
|||
lwit_characters, // charactersSAXFunc characters;
|
||||
lwit_ignorableWhitespace, // ignorableWhitespaceSAXFunc ignorableWhitespace;
|
||||
lwit_processingInstruction, // processingInstructionSAXFunc processingInstruction;
|
||||
0, // commentSAXFunc comment;
|
||||
lwit_comment, // commentSAXFunc comment;
|
||||
lwit_warning, // warningSAXFunc warning;
|
||||
lwit_error, // errorSAXFunc error;
|
||||
lwit_fatalError, // fatalErrorSAXFunc fatalError;
|
||||
|
|
|
@ -82,7 +82,7 @@ std::codecvt_base::result base64codecvt::do_in(std::mbstate_t& state,
|
|||
from_next = from;
|
||||
to_next = to;
|
||||
|
||||
while((from_next != from_end) && (to != to_limit))
|
||||
while((from_next != from_end) && (to_next != to_limit))
|
||||
{
|
||||
char b = *from_next++;
|
||||
size_t i = base64_charset.find(b);
|
||||
|
|
141
src/Utils/uri.cpp
Normal file
141
src/Utils/uri.cpp
Normal file
|
@ -0,0 +1,141 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <Utils/uri.hpp>
|
||||
|
||||
using namespace Arabica::io;
|
||||
|
||||
namespace {
|
||||
const std::string ZERO = "0";
|
||||
const std::string PORT_EIGHTY = "80";
|
||||
const std::string PORT_443 = "443";
|
||||
|
||||
const std::string& wellKnownPort(const std::string& scheme)
|
||||
{
|
||||
if(scheme.empty())
|
||||
return ZERO;
|
||||
|
||||
if(scheme == "http")
|
||||
return PORT_EIGHTY;
|
||||
if(scheme == "https")
|
||||
return PORT_443;
|
||||
|
||||
return ZERO;
|
||||
} // wellKnownPort
|
||||
} // namespace
|
||||
|
||||
URI::URI(const std::string& uri)
|
||||
{
|
||||
parse(uri);
|
||||
} // URI
|
||||
|
||||
URI::URI(const URI& base, const std::string& relativeUrl) :
|
||||
scheme_(base.scheme_),
|
||||
host_(base.host_),
|
||||
path_(base.path_),
|
||||
port_(base.port_)
|
||||
{
|
||||
URI relUrl(relativeUrl);
|
||||
absolutise(relUrl);
|
||||
} // URI
|
||||
|
||||
const std::string& URI::port() const
|
||||
{
|
||||
if(port_.empty())
|
||||
return wellKnownPort(scheme_);
|
||||
return port_;
|
||||
} // port()
|
||||
|
||||
std::string URI::as_string() const
|
||||
{
|
||||
std::string str;
|
||||
if(!scheme_.empty())
|
||||
str.append(scheme_).append("://");
|
||||
if(!host_.empty())
|
||||
{
|
||||
str.append(host_);
|
||||
if(!port_.empty())
|
||||
str.append(":").append(port_);
|
||||
}
|
||||
str.append(path_);
|
||||
return str;
|
||||
} // as_string
|
||||
|
||||
|
||||
void URI::parse(const std::string& uri)
|
||||
{
|
||||
// I'd like to use something a bit stronger - http://code.google.com/p/uri-grammar/
|
||||
// but that would put a Boost Spirit dependence right in the core, which I'm not prepared to do at the moment
|
||||
|
||||
int d = uri.find_first_of(":");
|
||||
if(d == std::string::npos)
|
||||
{
|
||||
path_ = uri;
|
||||
return;
|
||||
} // if ...
|
||||
|
||||
scheme_ = uri.substr(0, d);
|
||||
|
||||
std::string::const_iterator u = uri.begin() + d;
|
||||
std::string::const_iterator ue = uri.end();
|
||||
|
||||
++u;
|
||||
if(*u == '/' && *(u+1) == '/')
|
||||
{
|
||||
u += 2;
|
||||
parseAuthority(u, ue);
|
||||
} // if ...
|
||||
|
||||
path_.append(u, ue);
|
||||
} // parse
|
||||
|
||||
void URI::parseAuthority(std::string::const_iterator& u, std::string::const_iterator& ue)
|
||||
{
|
||||
std::string::const_iterator slash = std::find(u, ue, '/');
|
||||
if(slash == ue)
|
||||
return;
|
||||
|
||||
std::string::const_iterator colon = std::find(u, slash, ':');
|
||||
host_.append(u, colon);
|
||||
|
||||
if(colon != slash)
|
||||
port_.append(colon+1, slash);
|
||||
|
||||
u = slash;
|
||||
} // parseAuthority
|
||||
|
||||
void URI::absolutise(URI& relative)
|
||||
{
|
||||
if(!relative.scheme().empty())
|
||||
{
|
||||
swap(relative);
|
||||
return;
|
||||
}
|
||||
|
||||
if(relative.path_[0] == '/')
|
||||
path_ = relative.path_;
|
||||
else
|
||||
combinePath(relative.path_);
|
||||
} // absolutise
|
||||
|
||||
void URI::combinePath(const std::string& relPath)
|
||||
{
|
||||
if(*(path_.rbegin()) != '/')
|
||||
path_.erase(path_.rfind('/')+1);
|
||||
|
||||
path_.append(relPath);
|
||||
|
||||
int dots = path_.find("/../");
|
||||
while(dots != std::string::npos)
|
||||
{
|
||||
int preceding_slash = (dots > 0) ? path_.rfind('/', dots-1) : 0;
|
||||
path_.erase(preceding_slash, dots+3-preceding_slash);
|
||||
dots = path_.find("/../");
|
||||
} // while
|
||||
|
||||
int dot = path_.find("/./");
|
||||
while(dot != std::string::npos)
|
||||
{
|
||||
path_.erase(dot, 2);
|
||||
dot = path_.find("/./", dot);
|
||||
}
|
||||
} // combinePath
|
Loading…
Add table
Reference in a new issue