mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-17 18:12:04 +01:00
-
This commit is contained in:
parent
543f7357df
commit
bc0d1c9655
2 changed files with 289 additions and 159 deletions
|
@ -1,159 +0,0 @@
|
|||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable: 4786)
|
||||
#endif
|
||||
|
||||
#include <SAX/helpers/InputSourceResolver.h>
|
||||
#include <istream>
|
||||
#include <fstream>
|
||||
#include <Utils/socket_stream.h>
|
||||
|
||||
InputSourceResolver::InputSourceResolver(const SAX::InputSource& inputSource) :
|
||||
deleteStream_(false),
|
||||
byteStream_(0)
|
||||
{
|
||||
open(inputSource.getPublicId(),
|
||||
inputSource.getSystemId(),
|
||||
inputSource.getByteStream());
|
||||
} // InputSourceResolver
|
||||
|
||||
void InputSourceResolver::open(const std::string& publicId,
|
||||
const std::string& systemId,
|
||||
std::istream* byteStream)
|
||||
{
|
||||
if(byteStream != 0)
|
||||
{
|
||||
byteStream_ = byteStream;
|
||||
return;
|
||||
}
|
||||
|
||||
// does it look like a URL?
|
||||
std::string::size_type colonIndex = systemId.find("://");
|
||||
if(colonIndex != std::string::npos)
|
||||
{
|
||||
URIResolver res = findResolver(systemId.substr(0, colonIndex));
|
||||
if(res)
|
||||
byteStream_ = res(systemId);
|
||||
if(byteStream_)
|
||||
{
|
||||
deleteStream_ = true;
|
||||
return;
|
||||
} // if ...
|
||||
} // if ...
|
||||
|
||||
// try and open it as a file
|
||||
std::ifstream* ifs = new std::ifstream(systemId.c_str());
|
||||
if(ifs->is_open())
|
||||
{
|
||||
deleteStream_ = true;
|
||||
byteStream_ = ifs;
|
||||
}
|
||||
else
|
||||
delete ifs;
|
||||
} // InputSourceResolver
|
||||
|
||||
InputSourceResolver::~InputSourceResolver()
|
||||
{
|
||||
if(deleteStream_)
|
||||
delete byteStream_;
|
||||
} // ~InputSourceResolver
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// resolverMap register/unregister
|
||||
bool InputSourceResolver::registerResolver(const std::string& method, URIResolver resolver)
|
||||
{
|
||||
resolverMap()[method] = resolver;
|
||||
return true;
|
||||
} // registerResolver
|
||||
|
||||
bool InputSourceResolver::unRegisterResolver(const std::string& method)
|
||||
{
|
||||
resolverMapT::iterator i = resolverMap().find(method);
|
||||
if(i != resolverMap().end())
|
||||
resolverMap().erase(i);
|
||||
return true;
|
||||
} // unRegisterResolver
|
||||
|
||||
InputSourceResolver::URIResolver InputSourceResolver::findResolver(std::string method)
|
||||
{
|
||||
resolverMapT::iterator i = resolverMap().find(method);
|
||||
return (i != resolverMap().end()) ? i->second : 0;
|
||||
} // findResolver
|
||||
|
||||
namespace
|
||||
{
|
||||
std::istream* fileResolver(const std::string& fileURL)
|
||||
{
|
||||
int colon = fileURL.find("://");
|
||||
std::string fileName = fileURL.substr(colon+3);
|
||||
|
||||
std::ifstream* ifs = new std::ifstream(fileName.c_str());
|
||||
if(ifs->is_open())
|
||||
return ifs;
|
||||
delete ifs;
|
||||
|
||||
// WIN32 specific stuff
|
||||
for(std::string::iterator i = fileName.begin(); i != fileName.end(); ++i)
|
||||
if(*i == '/')
|
||||
*i = '\\';
|
||||
|
||||
if((fileName[0] == '\\') && (fileName[2] == ':'))
|
||||
fileName.erase(0, 1);
|
||||
|
||||
ifs = new std::ifstream(fileName.c_str());
|
||||
if(ifs->is_open())
|
||||
return ifs;
|
||||
delete ifs;
|
||||
|
||||
return 0;
|
||||
} // fileResolver
|
||||
|
||||
static bool fileReg = InputSourceResolver::registerResolver("file", fileResolver);
|
||||
|
||||
std::istream* httpResolver(const std::string& httpURL)
|
||||
{
|
||||
#ifdef ARABICA_WINDOWS
|
||||
WORD wVersionRequested;
|
||||
WSADATA wsaData;
|
||||
int err;
|
||||
|
||||
wVersionRequested = MAKEWORD(1, 1);
|
||||
err = WSAStartup( wVersionRequested, &wsaData );
|
||||
if(err != 0)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
int colon1 = httpURL.find("://");
|
||||
colon1 += 3;
|
||||
//int colon2 = httpURL.find("://", colon1);
|
||||
int slash1 = httpURL.find("/", colon1);
|
||||
|
||||
std::string hostName = httpURL.substr(colon1, slash1 - (colon1));
|
||||
std::string path = httpURL.substr(slash1);
|
||||
|
||||
Arabica::socketstream* ifs = new Arabica::socketstream(hostName.c_str(), 80);
|
||||
if(!ifs->is_open())
|
||||
return 0;
|
||||
*ifs << "GET " << path << " HTTP/1.0" << std::endl;
|
||||
*ifs << "Host: " << hostName << std::endl;
|
||||
*ifs << "Connection: close" << std::endl;
|
||||
*ifs << std::endl;
|
||||
|
||||
|
||||
char buffer[1024];
|
||||
do
|
||||
{
|
||||
ifs->getline(buffer, sizeof(buffer));
|
||||
}
|
||||
while(buffer[0] != '\r');
|
||||
|
||||
return ifs;
|
||||
} // httpResolver
|
||||
|
||||
static bool httpReg = InputSourceResolver::registerResolver("http", httpResolver);
|
||||
} // namespace
|
||||
|
||||
// end of file
|
289
include/XML
Normal file
289
include/XML
Normal file
|
@ -0,0 +1,289 @@
|
|||
#ifndef ARABICA_XML_UNICODE_CHARACTERS_H
|
||||
#define ARABICA_XML_UNICODE_CHARACTERS_H
|
||||
|
||||
namespace Arabica
|
||||
{
|
||||
|
||||
template<typename charT>
|
||||
struct Unicode
|
||||
{
|
||||
static const charT HORIZONTAL_TABULATION;
|
||||
static const charT LINE_FEED;
|
||||
static const charT CARRIAGE_RETURN;
|
||||
static const charT SPACE;
|
||||
static const charT EXCLAMATION_MARK;
|
||||
static const charT QUOTATION_MARK;
|
||||
static const charT NUMBER_SIGN;
|
||||
static const charT PERCENT_SIGN;
|
||||
static const charT AMPERSAND;
|
||||
static const charT APOSTROPHE;
|
||||
static const charT LEFT_PARENTHESIS;
|
||||
static const charT RIGHT_PARENTHESIS;
|
||||
static const charT ASTERISK;
|
||||
static const charT PLUS_SIGN;
|
||||
static const charT COMMA;
|
||||
static const charT HYPHEN_MINUS;
|
||||
static const charT FULL_STOP;
|
||||
static const charT SLASH;
|
||||
static const charT NUMBER_0;
|
||||
static const charT NUMBER_1;
|
||||
static const charT NUMBER_2;
|
||||
static const charT NUMBER_3;
|
||||
static const charT NUMBER_4;
|
||||
static const charT NUMBER_5;
|
||||
static const charT NUMBER_6;
|
||||
static const charT NUMBER_7;
|
||||
static const charT NUMBER_8;
|
||||
static const charT NUMBER_9;
|
||||
static const charT COLON;
|
||||
static const charT SEMI_COLON;
|
||||
static const charT LESS_THAN_SIGN;
|
||||
static const charT EQUALS_SIGN;
|
||||
static const charT GREATER_THAN_SIGN;
|
||||
static const charT QUESTION_MARK;
|
||||
static const charT CAPITAL_A;
|
||||
static const charT CAPITAL_B;
|
||||
static const charT CAPITAL_C;
|
||||
static const charT CAPITAL_D;
|
||||
static const charT CAPITAL_E;
|
||||
static const charT CAPITAL_F;
|
||||
static const charT CAPITAL_G;
|
||||
static const charT CAPITAL_H;
|
||||
static const charT CAPITAL_I;
|
||||
static const charT CAPITAL_J;
|
||||
static const charT CAPITAL_K;
|
||||
static const charT CAPITAL_L;
|
||||
static const charT CAPITAL_M;
|
||||
static const charT CAPITAL_N;
|
||||
static const charT CAPITAL_O;
|
||||
static const charT CAPITAL_P;
|
||||
static const charT CAPITAL_Q;
|
||||
static const charT CAPITAL_R;
|
||||
static const charT CAPITAL_S;
|
||||
static const charT CAPITAL_T;
|
||||
static const charT CAPITAL_U;
|
||||
static const charT CAPITAL_V;
|
||||
static const charT CAPITAL_W;
|
||||
static const charT CAPITAL_X;
|
||||
static const charT CAPITAL_Y;
|
||||
static const charT CAPITAL_Z;
|
||||
static const charT LEFT_SQUARE_BRACKET;
|
||||
static const charT BACK_SLASH;
|
||||
static const charT RIGHT_SQUARE_BRACKET;
|
||||
static const charT LOW_LINE;
|
||||
static const charT LOWERCASE_A;
|
||||
static const charT LOWERCASE_B;
|
||||
static const charT LOWERCASE_C;
|
||||
static const charT LOWERCASE_D;
|
||||
static const charT LOWERCASE_E;
|
||||
static const charT LOWERCASE_F;
|
||||
static const charT LOWERCASE_G;
|
||||
static const charT LOWERCASE_H;
|
||||
static const charT LOWERCASE_I;
|
||||
static const charT LOWERCASE_J;
|
||||
static const charT LOWERCASE_K;
|
||||
static const charT LOWERCASE_L;
|
||||
static const charT LOWERCASE_M;
|
||||
static const charT LOWERCASE_N;
|
||||
static const charT LOWERCASE_O;
|
||||
static const charT LOWERCASE_P;
|
||||
static const charT LOWERCASE_Q;
|
||||
static const charT LOWERCASE_R;
|
||||
static const charT LOWERCASE_S;
|
||||
static const charT LOWERCASE_T;
|
||||
static const charT LOWERCASE_U;
|
||||
static const charT LOWERCASE_V;
|
||||
static const charT LOWERCASE_W;
|
||||
static const charT LOWERCASE_X;
|
||||
static const charT LOWERCASE_Y;
|
||||
static const charT LOWERCASE_Z;
|
||||
static const charT VERTICAL_BAR;
|
||||
}; // namespace XML
|
||||
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::HORIZONTAL_TABULATION = 0x09;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LINE_FEED = 0x0A;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CARRIAGE_RETURN = 0x0D;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::SPACE = 0x20;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::EXCLAMATION_MARK = 0x21; // !
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::QUOTATION_MARK = 0x22; // "
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_SIGN = 0x23; // #
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::PERCENT_SIGN = 0x25; // %
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::AMPERSAND = 0x26; // &
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::APOSTROPHE = 0x27; // '
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LEFT_PARENTHESIS = 0x28; // (
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::RIGHT_PARENTHESIS = 0x29; // )
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::ASTERISK = 0x2A; // *
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::PLUS_SIGN = 0x2B; // +
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::COMMA = 0x2C; // ,
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::HYPHEN_MINUS = 0x2D; // -
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::FULL_STOP = 0x2E; // .
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::SLASH = 0x2F; // /
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_0 = 0x30;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_1 = 0x31;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_2 = 0x32;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_3 = 0x33;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_4 = 0x34;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_5 = 0x35;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_6 = 0x36;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_7 = 0x37;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_8 = 0x38;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::NUMBER_9 = 0x39;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::COLON = 0x3A; // :
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::SEMI_COLON = 0x3B; // ;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LESS_THAN_SIGN = 0x3C; // <
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::EQUALS_SIGN = 0x3D; // =
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::GREATER_THAN_SIGN = 0x3E; // >
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::QUESTION_MARK = 0x3F; // ?
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_A = 0x41;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_B = 0x42;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_C = 0x43;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_D = 0x44;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_E = 0x45;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_F = 0x46;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_G = 0x47;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_H = 0x48;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_I = 0x49;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_J = 0x4A;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_K = 0x4B;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_L = 0x4C;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_M = 0x4D;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_N = 0x4E;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_O = 0x4F;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_P = 0x50;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_Q = 0x51;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_R = 0x52;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_S = 0x53;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_T = 0x54;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_U = 0x55;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_V = 0x56;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_W = 0x57;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_X = 0x58;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_Y = 0x59;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::CAPITAL_Z = 0x5A;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LEFT_SQUARE_BRACKET = 0x5B; // ]
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::BACK_SLASH = 0x5C; //
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::RIGHT_SQUARE_BRACKET = 0x5D; // [
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOW_LINE = 0x5F; // _
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_A = 0x61;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_B = 0x62;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_C = 0x63;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_D = 0x64;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_E = 0x65;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_F = 0x66;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_G = 0x67;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_H = 0x68;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_I = 0x69;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_J = 0x6A;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_K = 0x6B;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_L = 0x6C;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_M = 0x6D;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_N = 0x6E;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_O = 0x6F;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_P = 0x70;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_Q = 0x71;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_R = 0x72;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_S = 0x73;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_T = 0x74;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_U = 0x75;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_V = 0x76;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_W = 0x77;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_X = 0x78;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_Y = 0x79;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::LOWERCASE_Z = 0x7A;
|
||||
template<typename charT>
|
||||
const charT Unicode<charT>::VERTICAL_BAR = 0x7C; // |
|
||||
|
||||
} // namespace Arabica
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in a new issue