arabica/include/SAX/wrappers/saxexpat.hpp

1086 lines
40 KiB
C++
Raw Normal View History

2003-09-11 12:26:53 +02:00
#ifndef ARABICA_SAX_EXPAT_H
#define ARABICA_SAX_EXPAT_H
2002-06-21 13:16:28 +02:00
//---------------------------------------------------------------------------
// A SAX2 wrapper class for expat.
//---------------------------------------------------------------------------
2007-09-05 00:55:47 +02:00
#include <SAX/ArabicaConfig.hpp>
#include <SAX/XMLReader.hpp>
2002-06-21 13:16:28 +02:00
#include <expat.h>
#include <sstream>
2007-09-05 00:55:47 +02:00
#include <SAX/InputSource.hpp>
#include <SAX/ContentHandler.hpp>
#include <SAX/SAXParseException.hpp>
#include <SAX/SAXNotRecognizedException.hpp>
#include <SAX/SAXNotSupportedException.hpp>
#include <SAX/helpers/DefaultHandler.hpp>
#include <SAX/helpers/AttributesImpl.hpp>
#include <SAX/helpers/NamespaceSupport.hpp>
#include <SAX/helpers/InputSourceResolver.hpp>
#include <SAX/helpers/FeatureNames.hpp>
#include <SAX/helpers/PropertyNames.hpp>
#include <Arabica/StringAdaptor.hpp>
2007-09-05 00:55:47 +02:00
#include <SAX/helpers/AttributeDefaults.hpp>
2002-06-21 13:16:28 +02:00
#include <typeinfo>
#include <map>
#include <Arabica/getparam.hpp>
2002-06-21 13:16:28 +02:00
2007-09-05 11:49:18 +02:00
namespace Arabica
{
2002-06-21 13:16:28 +02:00
namespace SAX
{
////////////////////////////////////////////////////////////////////////////
// the callback functions for expat
namespace expat_wrapper_impl_mumbojumbo
{
extern "C"
{
void ewim_charHandler(void* data, const char* txt, int txtlen);
void ewim_startElement(void* data, const char* qName, const char** atts);
void ewim_endElement(void* data, const char* qName);
void ewim_processingInstruction(void* userData, const char* target, const char* data);
void ewim_elementDeclaration(void* userData,
const XML_Char* name,
XML_Content* model);
void ewim_attListDeclaration(void* userData,
const XML_Char* elname,
const XML_Char* attname,
const XML_Char* att_type,
const XML_Char* dflt,
int isrequired);
2002-06-21 13:16:28 +02:00
void ewim_entityDeclaration(void* userData,
const XML_Char* entityName,
int is_parameter_entity,
const XML_Char* value,
int value_length,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId,
const XML_Char* notationName);
2002-06-21 13:16:28 +02:00
void ewim_notationDeclaration(void* userData,
const XML_Char* notationName,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId);
2002-06-21 13:16:28 +02:00
void ewim_startDoctypeDecl(void* userData,
const XML_Char* doctypeName,
const XML_Char* sysid,
const XML_Char* pubid,
int has_internal_subset);
2002-06-21 13:16:28 +02:00
void ewim_endDoctypeDecl(void* userData);
void ewim_startCdataSection(void* userData);
void ewim_endCdataSection(void* userData);
void ewim_commentHandler(void* userData, const XML_Char* data);
void ewim_skippedEntityHandler(void* userData, const XML_Char* entityName, int is_parameter_entity);
2002-06-21 13:16:28 +02:00
int ewim_externalEntityRefHandler(XML_Parser parser,
const XML_Char* context,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId);
} // extern "C"
class expat2base
{
protected:
virtual ~expat2base() { }
2002-06-21 13:16:28 +02:00
private:
virtual void charHandler(const char* txt, int txtlen) = 0;
virtual void startElement(const char* qName, const char** atts) = 0;
virtual void endElement(const char* qName) = 0;
virtual void processingInstruction(const char* target, const char* data) = 0;
virtual void elementDeclaration(const XML_Char* name,
const XML_Content* model) = 0;
virtual void attListDeclaration(const XML_Char* elname,
const XML_Char* attname,
const XML_Char* att_type,
const XML_Char* dflt,
2004-09-14 22:51:36 +02:00
int isrequired) = 0;
2002-06-21 13:16:28 +02:00
virtual void entityDeclaration(const XML_Char* entityName,
int is_parameter_entity,
const XML_Char* value,
int value_length,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId,
const XML_Char* notationName) = 0;
2002-06-21 13:16:28 +02:00
virtual void notationDeclaration(const XML_Char* notationName,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId) = 0;
2002-06-21 13:16:28 +02:00
virtual void startDoctypeDecl(const XML_Char* doctypeName,
const XML_Char* sysid,
const XML_Char* pubid,
int has_internal_subset) = 0;
2002-06-21 13:16:28 +02:00
virtual void endDoctypeDecl() = 0;
virtual void startCdataSection() = 0;
virtual void endCdataSection() = 0;
virtual void commentHandler(const XML_Char* data) = 0;
virtual void skippedEntity(const XML_Char* entityName) = 0;
2002-06-21 13:16:28 +02:00
virtual int externalEntityRefHandler(XML_Parser parser,
const XML_Char* context,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId) = 0;
friend void ewim_charHandler(void*, const char*, int);
friend void ewim_startElement(void*, const char*, const char**);
friend void ewim_endElement(void*, const char*);
friend void ewim_processingInstruction(void*, const char*, const char*);
friend void ewim_elementDeclaration(void*, const XML_Char*, XML_Content*);
2004-09-14 22:51:36 +02:00
friend void ewim_attListDeclaration(void*, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*, int);
2002-06-21 13:16:28 +02:00
friend void ewim_entityDeclaration(void*, const XML_Char*, int, const XML_Char*, int, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*);
friend void ewim_notationDeclaration(void*, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*);
friend void ewim_startDoctypeDecl(void*, const XML_Char*, const XML_Char*, const XML_Char*, int);
friend void ewim_endDoctypeDecl(void*);
friend void ewim_startCdataSection(void*);
friend void ewim_endCdataSection(void*);
friend void ewim_commentHandler(void*, const XML_Char*);
friend void ewim_skippedEntityHandler(void*, const XML_Char*, int);
2002-06-21 13:16:28 +02:00
friend int ewim_externalEntityRefHandler(XML_Parser, const XML_Char*, const XML_Char*, const XML_Char*, const XML_Char*);
}; // class expat2base
} // namespace expat_wrapper_impl_mumbojumbo
////////////////////////////////////////////////////////////////////////////
// A SAX2 wrapper for expat. expat has an event-based interface of its own,
// so the SAX wrapper maps more or less directly to it.
/**
* expat_wrapper puts an {@link XMLReader XMLReader} interface
2003-03-20 17:31:09 +01:00
* around <a href='http://www.libexpat.org/'>Expat</a>.
2002-06-21 13:16:28 +02:00
* <p>
* For general usage:<br>
* <code>
* &nbsp;&nbsp;expat_wrapper<std::string> parser;<br>
* &nbsp;&nbsp;... <br>
* &nbsp;&nbsp;parser.parse(filename);<br>
* </code>
* expat_wrapper<std::string> will hand out std::strings containing UTF-8 encoded
* data (expat's native format).
* <p>
* If you need std::wstring containing UCS-2 or perhaps you want to use some custom
* string type, then you can simply instantiate expat_wrapper on your string type and
* a policy class which knows how to convert UTF-8 const char*s to your type.
* <p>
* The default policy is:
* <code>
* template<class stringT><br>
* class default_expat_string_convertor<br>
* {<br>
* public:<br>
* &nbsp;&nbsp;stringT makeStringT(const char* str) const<br>
* &nbsp;&nbsp;{<br>
* &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if(!str)<br>
* &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return stringT();<br>
* &nbsp;&nbsp;&nbsp;&nbsp;return stringT(str);<br>
* &nbsp;&nbsp;} // makeStringT<br>
* &nbsp;&nbsp;stringT makeStringT(const char* str, int length) const<br>
* &nbsp;&nbsp;{<br>
* &nbsp;&nbsp;&nbsp;&nbsp;return stringT(str, length);<br>
* &nbsp;&nbsp;} // makeStringT<br>
* <br>
* &nbsp;&nbsp;std::string asStdString(const stringT& str) const<br>
* &nbsp;&nbsp;{<br>
* &nbsp;&nbsp;&nbsp;&nbsp;return str;<br>
* &nbsp;&nbsp;} // asStdString<br>
* }; // class default_string_convertor<br>
* </code>
* <p>
* A custom policy needs to provide the equivalent makeString and asStdString functions.
* <p>
* @author Jez Higgins
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
* @version $Id$
* @see SAX::XMLReader
2002-06-21 13:16:28 +02:00
*/
template<class string_type,
class T0 = Arabica::nil_t,
class T1 = Arabica::nil_t>
2008-07-16 11:01:08 +02:00
class expat_wrapper :
public SAX::XMLReaderInterface<string_type,
typename Arabica::get_string_adaptor<string_type, T0, T1>::type>,
public SAX::Locator<string_type, typename Arabica::get_string_adaptor<string_type, T0, T1>::type>,
public expat_wrapper_impl_mumbojumbo::expat2base
2002-06-21 13:16:28 +02:00
{
2004-09-14 22:51:36 +02:00
public:
2008-07-16 11:01:08 +02:00
typedef SAX::XMLReaderInterface<string_type,
typename Arabica::get_string_adaptor<string_type, T0, T1>::type> XMLReaderT;
2007-09-06 11:04:01 +02:00
typedef typename XMLReaderT::string_adaptor string_adaptor;
typedef string_adaptor SA;
typedef SAX::EntityResolver<string_type, string_adaptor> entityResolverT;
typedef SAX::DTDHandler<string_type, string_adaptor> dtdHandlerT;
typedef SAX::ContentHandler<string_type, string_adaptor> contentHandlerT;
typedef SAX::Attributes<string_type, string_adaptor> attributesT;
typedef SAX::AttributeType<string_type, string_adaptor> attributeTypeT;
typedef SAX::DeclHandler<string_type, string_adaptor> declHandlerT;
typedef SAX::LexicalHandler<string_type, string_adaptor> lexicalHandlerT;
typedef SAX::InputSource<string_type, string_adaptor> inputSourceT;
typedef SAX::Locator<string_type, string_adaptor> locatorT;
typedef SAX::NamespaceSupport<string_type, string_adaptor> namespaceSupportT;
typedef SAX::ErrorHandler<string_type, string_adaptor> errorHandlerT;
typedef SAX::SAXParseException<string_type, string_adaptor> SAXParseExceptionT;
2004-09-14 22:51:36 +02:00
typedef typename XMLReaderT::PropertyBase PropertyBaseT;
typedef typename XMLReaderT::template Property<lexicalHandlerT*> getLexicalHandlerT;
typedef typename XMLReaderT::template Property<lexicalHandlerT&> setLexicalHandlerT;
typedef typename XMLReaderT::template Property<declHandlerT*> getDeclHandlerT;
typedef typename XMLReaderT::template Property<declHandlerT&> setDeclHandlerT;
typedef XML::QualifiedName<string_type, string_adaptor> qualifiedNameT;
2002-06-21 13:16:28 +02:00
2004-09-14 22:51:36 +02:00
expat_wrapper();
virtual ~expat_wrapper();
2002-06-21 13:16:28 +02:00
/////////////////////////////////////////////////
// Configuration
virtual bool getFeature(const string_type& name) const;
virtual void setFeature(const string_type& name, bool value);
2002-06-21 13:16:28 +02:00
/////////////////////////////////////////////////
// Event Handlers
virtual void setEntityResolver(entityResolverT& resolver) { entityResolver_ = &resolver; }
virtual entityResolverT* getEntityResolver() const { return entityResolver_; }
virtual void setDTDHandler(dtdHandlerT& handler) { dtdHandler_ = &handler; }
virtual dtdHandlerT* getDTDHandler() const { return dtdHandler_; }
virtual void setContentHandler(contentHandlerT& handler) { contentHandler_ = &handler; }
virtual contentHandlerT* getContentHandler() const { return contentHandler_; }
2003-09-10 10:47:37 +02:00
virtual void setErrorHandler(errorHandlerT& handler) { errorHandler_ = &handler; }
virtual errorHandlerT* getErrorHandler() const { return errorHandler_; }
virtual void setDeclHandler(declHandlerT& handler) { declHandler_ = &handler; }
virtual declHandlerT* getDeclHandler() const { return declHandler_; }
virtual void setLexicalHandler(lexicalHandlerT& handler) { lexicalHandler_ = &handler; }
virtual lexicalHandlerT* getLexicalHandler() const { return lexicalHandler_; }
2002-06-21 13:16:28 +02:00
//////////////////////////////////////////////////
// Parsing
virtual void parse(inputSourceT& input);
private:
bool do_parse(inputSourceT& source, XML_Parser parser);
2007-09-06 11:04:01 +02:00
public:
2002-06-21 13:16:28 +02:00
//////////////////////////////////////////////////
// Locator
2007-09-06 11:04:01 +02:00
virtual string_type getPublicId() const;
virtual string_type getSystemId() const;
virtual int getLineNumber() const;
virtual int getColumnNumber() const;
2002-06-21 13:16:28 +02:00
///////////////////////////////////////////////////
// properties
protected:
virtual std::auto_ptr<PropertyBaseT> doGetProperty(const string_type& name);
virtual void doSetProperty(const string_type& name, std::auto_ptr<PropertyBaseT> value);
2004-09-14 22:51:36 +02:00
private:
qualifiedNameT processName(const string_type& qName, bool isAttribute);
2002-06-21 13:16:28 +02:00
void reportError(const std::string& message, bool fatal = false);
void checkNotParsing(const string_type& type, const string_type& name) const;
2002-06-21 13:16:28 +02:00
private:
virtual void charHandler(const char* txt, int txtlen);
virtual void startElement(const char* qName, const char** atts);
virtual void endElement(const char* qName);
virtual void processingInstruction(const char* target, const char* data);
virtual void elementDeclaration(const XML_Char* name,
const XML_Content* model);
virtual void attListDeclaration(const XML_Char* elname,
const XML_Char* attname,
const XML_Char* att_type,
const XML_Char* dflt,
2004-09-14 22:51:36 +02:00
int isrequired);
2002-06-21 13:16:28 +02:00
virtual void entityDeclaration(const XML_Char* entityName,
int is_parameter_entity,
const XML_Char* value,
int value_length,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId,
const XML_Char* notationName);
2002-06-21 13:16:28 +02:00
virtual void notationDeclaration(const XML_Char* notationName,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId);
2002-06-21 13:16:28 +02:00
virtual void startDoctypeDecl(const XML_Char* doctypeName,
const XML_Char* sysid,
const XML_Char* pubid,
int has_internal_subset);
2002-06-21 13:16:28 +02:00
virtual void endDoctypeDecl();
virtual void startCdataSection();
virtual void endCdataSection();
virtual void commentHandler(const XML_Char* data);
virtual void skippedEntity(const XML_Char* entityName);
2002-06-21 13:16:28 +02:00
virtual int externalEntityRefHandler(XML_Parser parser,
const XML_Char* context,
const XML_Char* base,
const XML_Char* systemId,
const XML_Char* publicId);
private:
void setCallbacks();
2002-06-21 13:16:28 +02:00
void startElementNoNS(const char* qName, const char** atts);
void endElementNoNS(const char* qName);
void convertXML_Content(std::ostream& os, const XML_Content* model, bool isChild = false);
// member variables
entityResolverT* entityResolver_;
dtdHandlerT* dtdHandler_;
contentHandlerT* contentHandler_;
2003-09-10 10:47:37 +02:00
errorHandlerT* errorHandler_;
2002-06-21 13:16:28 +02:00
declHandlerT* declHandler_;
lexicalHandlerT* lexicalHandler_;
namespaceSupportT nsSupport_;
XML_Parser parser_;
string_type publicId_;
string_type systemId_;
2002-06-21 13:16:28 +02:00
bool parsing_;
// features
bool namespaces_;
bool prefixes_;
bool externalResolving_;
string_type emptyString_;
const SAX::FeatureNames<string_type, string_adaptor> features_;
const SAX::PropertyNames<string_type, string_adaptor> properties_;
const SAX::NamespaceConstants<string_type, string_adaptor> nsc_;
const SAX::AttributeDefaults<string_type, string_adaptor> attrDefaults_;
2002-06-21 13:16:28 +02:00
std::map<string_type, string_type> declaredExternalEnts_;
2002-06-21 13:16:28 +02:00
}; // class expat_wrapper
//////////////////////////////////////////////////////////////////
// expat wrapper definition
template<class string_type, class T0, class T1>
expat_wrapper<string_type, T0, T1>::expat_wrapper() :
2004-09-14 22:51:36 +02:00
entityResolver_(0),
dtdHandler_(0),
contentHandler_(0),
errorHandler_(0),
2002-06-21 13:16:28 +02:00
declHandler_(0),
lexicalHandler_(0),
parser_(XML_ParserCreate(0)),
parsing_(false),
namespaces_(true),
prefixes_(true),
2007-09-06 11:04:01 +02:00
externalResolving_(false)
{
} // expat
template<class string_type, class T0, class T1>
expat_wrapper<string_type, T0, T1>::~expat_wrapper()
{
XML_ParserFree(parser_);
} // ~expat
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::setCallbacks()
2002-06-21 13:16:28 +02:00
{
XML_SetUserData(parser_, reinterpret_cast<void*>(static_cast<expat_wrapper_impl_mumbojumbo::expat2base*>(this)));
XML_SetCharacterDataHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_charHandler);
XML_SetElementHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_startElement, expat_wrapper_impl_mumbojumbo::ewim_endElement);
XML_SetElementDeclHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_elementDeclaration);
XML_SetAttlistDeclHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_attListDeclaration);
XML_SetEntityDeclHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_entityDeclaration);
XML_SetNotationDeclHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_notationDeclaration);
XML_SetDoctypeDeclHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_startDoctypeDecl, expat_wrapper_impl_mumbojumbo::ewim_endDoctypeDecl);
XML_SetCdataSectionHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_startCdataSection, expat_wrapper_impl_mumbojumbo::ewim_endCdataSection);
XML_SetCommentHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_commentHandler);
XML_SetSkippedEntityHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_skippedEntityHandler);
2002-06-21 13:16:28 +02:00
XML_SetExternalEntityRefHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_externalEntityRefHandler);
XML_SetProcessingInstructionHandler(parser_, expat_wrapper_impl_mumbojumbo::ewim_processingInstruction);
} // setCallbacks
2002-06-21 13:16:28 +02:00
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::setFeature(const string_type& name, bool value)
2002-06-21 13:16:28 +02:00
{
if(name == features_.namespaces)
{
2005-11-09 22:13:22 +01:00
checkNotParsing(SA::construct_from_utf8("feature"), name);
2002-06-21 13:16:28 +02:00
namespaces_ = value;
if(!namespaces_ && !prefixes_)
prefixes_ = true;
return;
} // namespaces
if(name == features_.namespace_prefixes)
{
2005-11-09 22:13:22 +01:00
checkNotParsing(SA::construct_from_utf8("feature"), name);
2002-06-21 13:16:28 +02:00
prefixes_ = value;
if(prefixes_ && !namespaces_)
namespaces_ = true;
return;
} // namespace prefixes
if(name == features_.external_general || name == features_.external_parameter)
{
2005-11-09 22:13:22 +01:00
checkNotParsing(SA::construct_from_utf8("feature"), name);
2002-06-21 13:16:28 +02:00
externalResolving_ = value;
return;
} // external entity resolution
#ifndef __BORLANDC__
// this is a hack features validation problem with BCB6 => need to be investigated
2002-06-21 13:16:28 +02:00
if(name == features_.validation)
{
std::ostringstream os;
2005-11-09 22:13:22 +01:00
os << "Feature not supported " << SA::asStdString(name);
2002-06-21 13:16:28 +02:00
throw SAX::SAXNotSupportedException(os.str());
}
else
{
std::ostringstream os;
2005-11-09 22:13:22 +01:00
os << "Feature not recognized " << SA::asStdString(name);
2002-06-21 13:16:28 +02:00
throw SAX::SAXNotRecognizedException(os.str());
}
#endif
2002-06-21 13:16:28 +02:00
} // setFeature
template<class string_type, class T0, class T1>
bool expat_wrapper<string_type, T0, T1>::getFeature(const string_type& name) const
2002-06-21 13:16:28 +02:00
{
if(name == features_.namespaces)
return namespaces_;
if(name == features_.namespace_prefixes)
return prefixes_;
if(name == features_.external_general || name == features_.external_parameter)
return externalResolving_;
if(name == features_.validation)
return false;
2005-11-09 22:13:22 +01:00
throw SAX::SAXNotRecognizedException(std::string("Feature not recognized ") + SA::asStdString(name));
2002-06-21 13:16:28 +02:00
} // getFeature
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::parse(inputSourceT& source)
2002-06-21 13:16:28 +02:00
{
setCallbacks();
2002-06-21 13:16:28 +02:00
publicId_ = source.getPublicId();
systemId_ = source.getSystemId();
if(contentHandler_)
2007-09-06 11:04:01 +02:00
contentHandler_->setDocumentLocator(*this);
2002-06-21 13:16:28 +02:00
parsing_ = true;
if(contentHandler_)
2004-09-14 22:51:36 +02:00
contentHandler_->startDocument();
2002-06-21 13:16:28 +02:00
XML_SetParamEntityParsing(parser_, externalResolving_ ? XML_PARAM_ENTITY_PARSING_ALWAYS : XML_PARAM_ENTITY_PARSING_NEVER);
do_parse(source, parser_);
if(contentHandler_)
2004-09-14 22:51:36 +02:00
contentHandler_->endDocument();
2002-06-21 13:16:28 +02:00
parsing_ = false;
XML_ParserReset(parser_, 0);
2002-06-21 13:16:28 +02:00
} // parse
template<class string_type, class T0, class T1>
bool expat_wrapper<string_type, T0, T1>::do_parse(inputSourceT& source, XML_Parser parser)
2002-06-21 13:16:28 +02:00
{
InputSourceResolver is(source, string_adaptor());
2004-09-14 22:51:36 +02:00
if(is.resolve() == 0)
2002-06-21 13:16:28 +02:00
{
reportError("Could not resolve XML document", true);
2004-09-14 22:51:36 +02:00
return false;
2002-06-21 13:16:28 +02:00
} // if(is.resolver() == 0)
const int BUFF_SIZE = 10*1024;
while(!is.resolve()->eof())
{
char* buffer = (char*)XML_GetBuffer(parser, BUFF_SIZE);
if(buffer == NULL)
{
reportError("Could not acquire expat buffer", true);
return false;
} // if ...
is.resolve()->read(buffer, BUFF_SIZE);
if(XML_ParseBuffer(parser, static_cast<int>(is.resolve()->gcount()), is.resolve()->eof()) == 0)
2002-06-21 13:16:28 +02:00
{
// error
reportError(XML_ErrorString(XML_GetErrorCode(parser_)), true);
return false;
} // if ...
} // while
return true;
} // do_parse
template<class string_type, class T0, class T1>
std::auto_ptr<typename expat_wrapper<string_type, T0, T1>::PropertyBaseT> expat_wrapper<string_type, T0, T1>::doGetProperty(const string_type& name)
2002-06-21 13:16:28 +02:00
{
if(name == properties_.lexicalHandler)
{
2004-09-14 22:51:36 +02:00
getLexicalHandlerT* prop = new getLexicalHandlerT(lexicalHandler_);
2002-09-20 12:16:35 +02:00
return std::auto_ptr<PropertyBaseT>(prop);
2002-06-21 13:16:28 +02:00
}
2004-09-14 22:51:36 +02:00
if(name == properties_.declHandler)
2002-06-21 13:16:28 +02:00
{
2004-09-14 22:51:36 +02:00
getDeclHandlerT* prop = new getDeclHandlerT(declHandler_);
2002-09-20 12:16:35 +02:00
return std::auto_ptr<PropertyBaseT>(prop);
2002-06-21 13:16:28 +02:00
}
2004-09-14 22:51:36 +02:00
2005-11-09 22:13:22 +01:00
throw SAX::SAXNotRecognizedException(std::string("Property not recognized ") + SA::asStdString(name));
2002-06-21 13:16:28 +02:00
} // doGetProperty
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::doSetProperty(const string_type& name, std::auto_ptr<PropertyBaseT> value)
2002-06-21 13:16:28 +02:00
{
if(name == properties_.lexicalHandler)
{
2004-09-14 22:51:36 +02:00
setLexicalHandlerT* prop = dynamic_cast<setLexicalHandlerT*>(value.get());
2002-06-21 13:16:28 +02:00
if(!prop)
throw std::bad_cast();
lexicalHandler_ = &(prop->get());
}
else if(name == properties_.declHandler)
{
2004-09-14 22:51:36 +02:00
setDeclHandlerT* prop = dynamic_cast<setDeclHandlerT*>(value.get());
2002-06-21 13:16:28 +02:00
if(!prop)
throw std::bad_cast();
declHandler_ = &(prop->get());
}
else
{
std::ostringstream os;
2005-11-09 22:13:22 +01:00
os << "Property not recognized " << SA::asStdString(name);
2002-06-21 13:16:28 +02:00
throw SAX::SAXNotRecognizedException(os.str());
}
} // doSetProperty
// Locator implementation
template<class string_type, class T0, class T1>
string_type expat_wrapper<string_type, T0, T1>::getPublicId() const
2002-06-21 13:16:28 +02:00
{
return publicId_;
} // getPublicId
template<class string_type, class T0, class T1>
string_type expat_wrapper<string_type, T0, T1>::getSystemId() const
2002-06-21 13:16:28 +02:00
{
return systemId_;
} // getSystemId
template<class string_type, class T0, class T1>
int expat_wrapper<string_type, T0, T1>::getLineNumber() const
2002-06-21 13:16:28 +02:00
{
return XML_GetCurrentLineNumber(parser_);
} // getLineNumber
template<class string_type, class T0, class T1>
int expat_wrapper<string_type, T0, T1>::getColumnNumber() const
2002-06-21 13:16:28 +02:00
{
return XML_GetCurrentColumnNumber(parser_);
} // getColumnNumber
template<class string_type, class T0, class T1>
typename XML::QualifiedName<string_type, typename expat_wrapper<string_type, T0, T1>::string_adaptor> expat_wrapper<string_type, T0, T1>::processName(const string_type& qName, bool isAttribute)
2002-06-21 13:16:28 +02:00
{
qualifiedNameT p = nsSupport_.processName(qName, isAttribute);
if(!p.has_namespaceUri() && p.has_prefix())
2005-11-09 22:13:22 +01:00
reportError(std::string("Undeclared prefix ") + SA::asStdString(qName));
2002-06-21 13:16:28 +02:00
return p;
} // processName
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::reportError(const std::string& message, bool fatal)
2002-06-21 13:16:28 +02:00
{
if(!errorHandler_)
return;
2003-09-10 10:47:37 +02:00
SAXParseExceptionT e(message,
publicId_,
2004-09-14 22:51:36 +02:00
systemId_,
XML_GetCurrentLineNumber(parser_),
XML_GetCurrentColumnNumber(parser_));
2002-06-21 13:16:28 +02:00
if(fatal)
errorHandler_->fatalError(e);
else
errorHandler_->error(e);
} // reportError
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::checkNotParsing(const string_type& type, const string_type& name) const
2002-06-21 13:16:28 +02:00
{
if(parsing_)
{
std::ostringstream os;
2005-11-09 22:13:22 +01:00
os << "Can't change " << SA::asStdString(type) << " " << SA::asStdString(name) << " while parsing";
2002-06-21 13:16:28 +02:00
throw SAX::SAXNotSupportedException(os.str());
} // if(parsing_)
} // checkNotParsing
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::charHandler(const char* txt, int txtlen)
2002-06-21 13:16:28 +02:00
{
if(!contentHandler_)
return;
2005-11-09 22:13:22 +01:00
contentHandler_->characters(SA::construct_from_utf8(txt, txtlen));
2002-06-21 13:16:28 +02:00
} // charHandler
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::startElement(const char* qName, const char** atts)
2002-06-21 13:16:28 +02:00
{
if(!contentHandler_)
return;
if(!namespaces_)
{
startElementNoNS(qName, atts);
return;
} // if(!namespaces)
// OK we're doing Namespaces
nsSupport_.pushContext();
SAX::AttributesImpl<string_type, string_adaptor> attributes;
2002-06-21 13:16:28 +02:00
// take a first pass and copy all the attributes, noting any declarations
if(atts && *atts != 0)
{
const char** a1 = atts;
while(*a1 != 0)
2002-06-21 13:16:28 +02:00
{
string_type attQName = SA::construct_from_utf8(*a1++);
string_type value = SA::construct_from_utf8(*a1++);
2002-06-21 13:16:28 +02:00
// declaration?
2005-11-17 13:45:49 +01:00
if(SA::find(attQName, nsc_.xmlns) == 0)
2002-06-21 13:16:28 +02:00
{
string_type prefix;
typename SA::size_type n = SA::find(attQName, nsc_.colon);
2006-06-08 11:51:18 +02:00
if(n != SA::npos())
2005-11-17 13:45:49 +01:00
prefix = SA::construct(SA::begin(attQName) + n + 1, SA::end(attQName));
2002-06-21 13:16:28 +02:00
if(!nsSupport_.declarePrefix(prefix, value))
2005-11-09 22:13:22 +01:00
reportError(std::string("Illegal Namespace prefix ") + SA::asStdString(prefix));
2002-06-21 13:16:28 +02:00
contentHandler_->startPrefixMapping(prefix, value);
if(prefixes_)
attributes.addAttribute(emptyString_,
emptyString_,
attQName,
attributeTypeT::CDATA,
2002-06-21 13:16:28 +02:00
value);
}
} // while
while(*atts != 0)
{
string_type attQName = SA::construct_from_utf8(*atts++);
string_type value = SA::construct_from_utf8(*atts++);
// declaration?
if(SA::find(attQName, nsc_.xmlns) != 0)
2002-06-21 13:16:28 +02:00
{
qualifiedNameT attName = processName(attQName, true);
attributes.addAttribute(attName.namespaceUri(),
attName.localName(),
attName.rawName(),
attributeTypeT::CDATA,
value);
2002-06-21 13:16:28 +02:00
}
} // while ...
} // if ...
// at last! report the event
qualifiedNameT name = processName(SA::construct_from_utf8(qName), false);
contentHandler_->startElement(name.namespaceUri(), name.localName(), name.rawName(), attributes);
2002-06-21 13:16:28 +02:00
} // startElement
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::startElementNoNS(const char* qName, const char** atts)
2002-06-21 13:16:28 +02:00
{
SAX::AttributesImpl<string_type, string_adaptor> attributes;
2002-06-21 13:16:28 +02:00
if(atts && *atts != 0)
{
while(*atts != 0)
{
string_type attQName = SA::construct_from_utf8(*atts++);
string_type value = SA::construct_from_utf8(*atts++);
2002-06-21 13:16:28 +02:00
attributes.addAttribute(emptyString_,
emptyString_,
attQName,
attributeTypeT::CDATA,
value);
2002-06-21 13:16:28 +02:00
} // while ..
} // if ...
2005-11-09 22:13:22 +01:00
contentHandler_->startElement(emptyString_, emptyString_, SA::construct_from_utf8(qName), attributes);
2002-06-21 13:16:28 +02:00
} // startElementNoNS
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::endElement(const char* qName)
2002-06-21 13:16:28 +02:00
{
if(!contentHandler_)
return;
if(!namespaces_)
{
endElementNoNS(qName);
return;
} // if(!namespaces_)
qualifiedNameT name = processName(SA::construct_from_utf8(qName), false);
contentHandler_->endElement(name.namespaceUri(), name.localName(), name.rawName());
2002-09-20 12:11:57 +02:00
typename namespaceSupportT::stringListT prefixes = nsSupport_.getDeclaredPrefixes();
for(size_t i = 0, end = prefixes.size(); i < end; ++i)
2002-06-21 13:16:28 +02:00
contentHandler_->endPrefixMapping(prefixes[i]);
nsSupport_.popContext();
} // endElement
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::endElementNoNS(const char* qName)
2002-06-21 13:16:28 +02:00
{
if(contentHandler_)
2005-11-09 22:13:22 +01:00
contentHandler_->endElement(emptyString_, emptyString_, SA::construct_from_utf8(qName));
2002-06-21 13:16:28 +02:00
} // endElementNoNS
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::processingInstruction(const char* target, const char* data)
2002-06-21 13:16:28 +02:00
{
if(contentHandler_)
2005-11-09 22:13:22 +01:00
contentHandler_->processingInstruction(SA::construct_from_utf8(target), SA::construct_from_utf8(data));
2002-06-21 13:16:28 +02:00
} // processingInstruction
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::elementDeclaration(const XML_Char* name,
2002-06-21 13:16:28 +02:00
const XML_Content* model)
{
if(!declHandler_)
return;
std::ostringstream os;
convertXML_Content(os, model);
2005-11-09 22:13:22 +01:00
declHandler_->elementDecl(SA::construct_from_utf8(name), SA::construct_from_utf8(os.str().c_str()));
2002-06-21 13:16:28 +02:00
} // elementDeclaration
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::convertXML_Content(std::ostream& os, const XML_Content* model, bool isChild)
2002-06-21 13:16:28 +02:00
{
/*
enum XML_Content_Type {
XML_CTYPE_EMPTY = 1,
XML_CTYPE_ANY,
XML_CTYPE_MIXED,
XML_CTYPE_NAME,
XML_CTYPE_CHOICE,
XML_CTYPE_SEQ
};
enum XML_Content_Quant {
XML_CQUANT_NONE,
XML_CQUANT_OPT,
XML_CQUANT_REP,
XML_CQUANT_PLUS
};
If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be
XML_CQUANT_NONE, and the other fields will be zero or NULL.
If type == XML_CTYPE_MIXED, then quant will be NONE or REP and
numchildren will contain number of elements that may be mixed in
and children point to an array of XML_Content cells that will be
all of XML_CTYPE_NAME type with no quantification.
If type == XML_CTYPE_NAME, then the name points to the name, and
the numchildren field will be zero and children will be NULL. The
quant fields indicates any quantifiers placed on the name.
CHOICE and SEQ will have name NULL, the number of children in
numchildren and children will point, recursively, to an array
of XML_Content cells.
The EMPTY, ANY, and MIXED types will only occur at top level.
typedef struct XML_cp XML_Content;
struct XML_cp {
2004-09-14 22:51:36 +02:00
enum XML_Content_Type type;
enum XML_Content_Quant quant;
const XML_Char * name;
unsigned int numchildren;
XML_Content * children;
2002-06-21 13:16:28 +02:00
};
*/
char concatenator = ' ';
switch(model->type)
{
case XML_CTYPE_EMPTY:
os << "EMPTY";
break;
case XML_CTYPE_ANY:
os << "ANY";
break;
case XML_CTYPE_MIXED:
if(model->numchildren == 0)
os << "(#PCDATA)";
else
os << "(#PCDATA";
concatenator = '|';
break;
case XML_CTYPE_NAME:
if(!isChild)
os << '(' << model->name << ')';
else
os << model->name;
break;
case XML_CTYPE_CHOICE:
concatenator = '|';
break;
case XML_CTYPE_SEQ:
concatenator = ',';
break;
} // switch
// do children here
if(model->numchildren)
{
if(model->type != XML_CTYPE_MIXED)
os << '(';
for(unsigned int i = 0; i < model->numchildren; ++i)
{
if(i != 0)
os << concatenator;
convertXML_Content(os, &(model->children[i]), true);
} // for ...
os << ')';
} // if ...
switch(model->quant)
{
case XML_CQUANT_NONE:
break;
case XML_CQUANT_OPT:
os << "?";
break;
case XML_CQUANT_REP:
os << "*";
break;
case XML_CQUANT_PLUS:
os << "+";
break;
} // switch
} // convertXML_Content
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::attListDeclaration(const XML_Char* elname,
2002-06-21 13:16:28 +02:00
const XML_Char* attname,
const XML_Char* att_type,
const XML_Char* dflt,
int isrequired)
{
/* The Attlist declaration handler is called for *each* attribute. So
a single Attlist declaration with multiple attributes declared will
generate multiple calls to this handler. The "default" parameter
may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword.
The "isrequired" parameter will be true and the default value will
be NULL in the case of "#REQUIRED". If "isrequired" is true and
default is non-NULL, then this is a "#FIXED" default. */
if(declHandler_)
{
const string_type* defType = &attrDefaults_.implied;
2002-06-21 13:16:28 +02:00
if(isrequired)
defType = dflt ? &attrDefaults_.fixed : &attrDefaults_.required;
2005-11-09 22:13:22 +01:00
declHandler_->attributeDecl(SA::construct_from_utf8(elname),
SA::construct_from_utf8(attname),
SA::construct_from_utf8(att_type),
2002-06-21 13:16:28 +02:00
*defType,
2005-11-09 22:13:22 +01:00
SA::construct_from_utf8(dflt));
2002-06-21 13:16:28 +02:00
}
} // attListDeclaration
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::entityDeclaration(const XML_Char* entityName,
int /*is_parameter_entity*/,
2004-09-14 22:51:36 +02:00
const XML_Char* value,
int value_length,
const XML_Char* /*base*/,
2004-09-14 22:51:36 +02:00
const XML_Char* systemId,
const XML_Char* publicId,
const XML_Char* notationName)
2002-06-21 13:16:28 +02:00
{
/* For internal entities (<!ENTITY foo "bar">), value will
be non-null and systemId, publicID, and notationName will be null.
The value string is NOT null terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities.
For external entities, value will be null and systemId will be non-null.
The publicId argument will be null unless a public identifier was
provided. The notationName argument will have a non-null value only
for unparsed entity declarations. */
const string_type s_entityName(SA::construct_from_utf8(entityName));
2002-06-21 13:16:28 +02:00
if(!systemId && !publicId && !notationName)
{
// internal entity!
if(declHandler_)
2005-11-09 22:13:22 +01:00
declHandler_->internalEntityDecl(s_entityName, SA::construct_from_utf8(value, value_length));
2002-06-21 13:16:28 +02:00
return;
}
const string_type s_publicId(SA::construct_from_utf8(publicId));
const string_type s_systemId(SA::construct_from_utf8(systemId));
2002-06-21 13:16:28 +02:00
if(notationName == 0)
{
if(declHandler_)
declHandler_->externalEntityDecl(s_entityName, s_publicId, s_systemId);
declaredExternalEnts_.insert(std::make_pair(s_publicId, s_entityName));
declaredExternalEnts_.insert(std::make_pair(s_systemId, s_entityName));
}
else
{
if(dtdHandler_)
2005-11-09 22:13:22 +01:00
dtdHandler_->unparsedEntityDecl(s_entityName, s_publicId, s_systemId, SA::construct_from_utf8(notationName));
2002-06-21 13:16:28 +02:00
}
} // entityDeclaration
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::notationDeclaration(const XML_Char* notationName,
const XML_Char* /*base*/,
2004-09-14 22:51:36 +02:00
const XML_Char* systemId,
const XML_Char* publicId)
2002-06-21 13:16:28 +02:00
{
if(!dtdHandler_)
return;
2005-11-09 22:13:22 +01:00
dtdHandler_->notationDecl(SA::construct_from_utf8(notationName),
SA::construct_from_utf8(publicId),
SA::construct_from_utf8(systemId));
2002-06-21 13:16:28 +02:00
} // notationDeclaration
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::startDoctypeDecl(const XML_Char *doctypeName,
2004-09-14 22:51:36 +02:00
const XML_Char *systemId,
const XML_Char *publicId,
int /*has_internal_subset*/)
2002-06-21 13:16:28 +02:00
{
if(!lexicalHandler_)
return;
string_type s_publicId = SA::construct_from_utf8(publicId);
string_type s_systemId = SA::construct_from_utf8(systemId);
string_type dtd = SA::construct_from_utf8("[dtd]");
2002-06-21 13:16:28 +02:00
declaredExternalEnts_.insert(std::make_pair(s_publicId, dtd));
declaredExternalEnts_.insert(std::make_pair(s_systemId, dtd));
2005-11-09 22:13:22 +01:00
lexicalHandler_->startDTD(SA::construct_from_utf8(doctypeName),
2002-06-21 13:16:28 +02:00
s_publicId,
s_systemId);
} // startDoctypeDecl
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::endDoctypeDecl()
2002-06-21 13:16:28 +02:00
{
if(lexicalHandler_)
lexicalHandler_->endDTD();
} // endDoctypeDecl
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::startCdataSection()
2002-06-21 13:16:28 +02:00
{
if(lexicalHandler_)
lexicalHandler_->startCDATA();
} // startCdataSection
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::endCdataSection()
2002-06-21 13:16:28 +02:00
{
if(lexicalHandler_)
lexicalHandler_->endCDATA();
} // endCdataSection
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::commentHandler(const XML_Char *data)
2002-06-21 13:16:28 +02:00
{
if(lexicalHandler_)
2005-11-09 22:13:22 +01:00
lexicalHandler_->comment(SA::construct_from_utf8(data));
2002-06-21 13:16:28 +02:00
} // commentHandler
template<class string_type, class T0, class T1>
void expat_wrapper<string_type, T0, T1>::skippedEntity(const XML_Char *entityName)
{
if(contentHandler_)
contentHandler_->skippedEntity(SA::construct_from_utf8(entityName));
} // skippedEntity
template<class string_type, class T0, class T1>
int expat_wrapper<string_type, T0, T1>::externalEntityRefHandler(XML_Parser parser,
2002-06-21 13:16:28 +02:00
const XML_Char* context,
const XML_Char* /*base*/,
2002-06-21 13:16:28 +02:00
const XML_Char* systemId,
const XML_Char* publicId)
{
string_type pubId(SA::construct_from_utf8(publicId));
string_type sysId(SA::construct_from_utf8(systemId));
2002-06-21 13:16:28 +02:00
string_type entityName;
2002-06-21 13:16:28 +02:00
if(systemId)
entityName = declaredExternalEnts_[sysId];
else if(publicId)
entityName = declaredExternalEnts_[pubId];
if(!externalResolving_)
{
2005-11-17 13:45:49 +01:00
if(!SA::empty(entityName) && contentHandler_)
2002-06-21 13:16:28 +02:00
contentHandler_->skippedEntity(entityName);
return 1;
} // if ...
////////////////////////////////////////////////////////
// resolve external entity
2005-11-17 13:45:49 +01:00
if(!SA::empty(entityName) && lexicalHandler_)
2002-06-21 13:16:28 +02:00
lexicalHandler_->startEntity(entityName);
inputSourceT source;
if(entityResolver_)
{
source = entityResolver_->resolveEntity(pubId, sysId);
2005-11-17 13:45:49 +01:00
if(SA::empty(source.getPublicId()) && SA::empty(source.getSystemId()))
2002-06-21 13:16:28 +02:00
{
source.setPublicId(pubId);
source.setSystemId(sysId);
} // if ...
}
else
{
source.setPublicId(pubId);
source.setSystemId(sysId);
} // if ...
XML_Parser externalParser = XML_ExternalEntityParserCreate(parser, context, 0);
bool ok = do_parse(source, externalParser);
XML_ParserFree(externalParser);
2005-11-17 13:45:49 +01:00
if(!SA::empty(entityName) && lexicalHandler_)
2002-06-21 13:16:28 +02:00
lexicalHandler_->endEntity(entityName);
return ok;
} // externalEntityRefHandler
2007-09-05 11:49:18 +02:00
} // namespace SAX
} // namespace Arabica
2002-06-21 13:16:28 +02:00
#endif
// end of file