2002-06-21 13:16:28 +02:00
|
|
|
#ifndef JEZUK_SAX2DOM_PARSER_H
|
|
|
|
#define JEZUK_SAX2DOM_PARSER_H
|
|
|
|
|
2007-09-05 00:55:47 +02:00
|
|
|
#include <SAX/XMLReader.hpp>
|
|
|
|
#include <SAX/helpers/DefaultHandler.hpp>
|
|
|
|
#include <SAX/helpers/AttributeTypes.hpp>
|
|
|
|
#include <DOM/Simple/DOMImplementation.hpp>
|
|
|
|
#include <DOM/Simple/NotationImpl.hpp>
|
|
|
|
#include <DOM/Simple/EntityImpl.hpp>
|
|
|
|
#include <DOM/Document.hpp>
|
|
|
|
#include <DOM/DOMException.hpp>
|
|
|
|
#include <DOM/SAX2DOM/DocumentTypeImpl.hpp>
|
2002-06-21 13:16:28 +02:00
|
|
|
#include <map>
|
2007-09-05 00:55:47 +02:00
|
|
|
#include <SAX/helpers/FeatureNames.hpp>
|
|
|
|
#include <SAX/helpers/PropertyNames.hpp>
|
|
|
|
#include <SAX/SAXParseException.hpp>
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 13:47:13 +02:00
|
|
|
namespace Arabica
|
|
|
|
{
|
2002-06-21 13:16:28 +02:00
|
|
|
namespace SAX2DOM
|
|
|
|
{
|
|
|
|
|
|
|
|
template<class stringT,
|
2005-08-15 23:09:13 +02:00
|
|
|
class string_adaptorT = Arabica::default_string_adaptor<stringT>,
|
2007-09-05 11:49:18 +02:00
|
|
|
class SAX_parser = Arabica::SAX::XMLReader<stringT, string_adaptorT> >
|
2007-09-06 20:54:02 +02:00
|
|
|
class Parser : protected Arabica::SAX::DefaultHandler<stringT, string_adaptorT>
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
2007-09-06 20:54:02 +02:00
|
|
|
typedef Arabica::SAX::Attributes<stringT, string_adaptorT> AttributesT;
|
|
|
|
typedef Arabica::SAX::EntityResolver<stringT, string_adaptorT> EntityResolverT;
|
|
|
|
typedef Arabica::SAX::ErrorHandler<stringT, string_adaptorT> ErrorHandlerT;
|
|
|
|
typedef Arabica::SAX::LexicalHandler<stringT, string_adaptorT> LexicalHandlerT;
|
|
|
|
typedef Arabica::SAX::DeclHandler<stringT, string_adaptorT> DeclHandlerT;
|
|
|
|
typedef Arabica::SAX::InputSource<stringT, string_adaptorT> InputSourceT;
|
2007-09-05 13:47:13 +02:00
|
|
|
typedef Arabica::SimpleDOM::EntityImpl<stringT, string_adaptorT> EntityT;
|
|
|
|
typedef Arabica::SimpleDOM::NotationImpl<stringT, string_adaptorT> NotationT;
|
|
|
|
typedef Arabica::SimpleDOM::ElementImpl<stringT, string_adaptorT> ElementT;
|
2007-09-06 20:54:02 +02:00
|
|
|
typedef typename ErrorHandlerT::SAXParseExceptionT SAXParseExceptionT;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
public:
|
|
|
|
Parser() :
|
|
|
|
entityResolver_(0),
|
2007-07-19 19:01:19 +02:00
|
|
|
errorHandler_(0),
|
|
|
|
documentType_(0)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
2007-09-05 11:49:18 +02:00
|
|
|
Arabica::SAX::FeatureNames<stringT, string_adaptorT> fNames;
|
2005-08-15 10:31:41 +02:00
|
|
|
features_.insert(std::make_pair(fNames.namespaces, true));
|
|
|
|
features_.insert(std::make_pair(fNames.namespace_prefixes, true));
|
2005-11-17 23:05:47 +01:00
|
|
|
features_.insert(std::make_pair(fNames.validation, false));
|
2002-06-21 13:16:28 +02:00
|
|
|
} // Parser
|
|
|
|
|
|
|
|
void setEntityResolver(EntityResolverT& resolver) { entityResolver_ = &resolver; }
|
|
|
|
EntityResolverT* getEntityResolver() const { return entityResolver_; }
|
|
|
|
|
2003-10-29 17:12:31 +01:00
|
|
|
void setErrorHandler(ErrorHandlerT& handler) { errorHandler_ = &handler; }
|
2003-08-28 00:11:10 +02:00
|
|
|
ErrorHandlerT* getErrorHandler() const { return errorHandler_; }
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-08-15 10:44:31 +02:00
|
|
|
void setFeature(const stringT& name, bool value)
|
|
|
|
{
|
2005-08-21 14:48:00 +02:00
|
|
|
typename Features::iterator f = features_.find(name);
|
2005-08-15 10:44:31 +02:00
|
|
|
if(f == features_.end())
|
2006-08-04 11:49:58 +02:00
|
|
|
features_.insert(std::make_pair(name, value));
|
|
|
|
else
|
|
|
|
f->second = value;
|
2005-08-15 10:44:31 +02:00
|
|
|
} // setFeature
|
|
|
|
|
|
|
|
bool getFeature(const stringT& name) const
|
|
|
|
{
|
2005-08-21 14:48:00 +02:00
|
|
|
typename Features::const_iterator f = features_.find(name);
|
2005-08-15 10:44:31 +02:00
|
|
|
if(f == features_.end())
|
2007-09-05 11:49:18 +02:00
|
|
|
throw Arabica::SAX::SAXNotRecognizedException(std::string("Feature not recognized ") + string_adaptorT::asStdString(name));
|
2005-08-15 10:44:31 +02:00
|
|
|
return f->second;
|
|
|
|
} // getFeature
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
bool parse(const stringT& systemId)
|
|
|
|
{
|
2006-06-08 11:51:18 +02:00
|
|
|
InputSourceT is(systemId);
|
2002-06-21 13:16:28 +02:00
|
|
|
return parse(is);
|
|
|
|
} // loadDOM
|
|
|
|
|
2006-06-08 11:51:18 +02:00
|
|
|
bool parse(InputSourceT& source)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
2007-09-05 11:49:18 +02:00
|
|
|
Arabica::SAX::PropertyNames<stringT, string_adaptorT> pNames;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 13:47:13 +02:00
|
|
|
DOM::DOMImplementation<stringT> di = Arabica::SimpleDOM::DOMImplementation<stringT, string_adaptorT>::getDOMImplementation();
|
2005-09-30 23:36:11 +02:00
|
|
|
document_ = di.createDocument(string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), 0);
|
2002-06-21 13:16:28 +02:00
|
|
|
currentNode_ = document_;
|
|
|
|
inCDATA_ = false;
|
2007-08-27 01:12:49 +02:00
|
|
|
inDTD_ = false;
|
2002-06-21 13:16:28 +02:00
|
|
|
inEntity_ = 0;
|
|
|
|
|
|
|
|
SAX_parser parser;
|
|
|
|
parser.setContentHandler(*this);
|
|
|
|
parser.setErrorHandler(*this);
|
|
|
|
if(entityResolver_)
|
|
|
|
parser.setEntityResolver(*entityResolver_);
|
2005-08-15 10:13:45 +02:00
|
|
|
|
2007-09-06 20:54:02 +02:00
|
|
|
parser.setLexicalHandler(*this);
|
|
|
|
parser.setDeclHandler(*this);
|
2005-08-15 10:13:45 +02:00
|
|
|
|
2005-08-15 10:31:41 +02:00
|
|
|
setParserFeatures(parser);
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2003-10-29 17:12:31 +01:00
|
|
|
try
|
|
|
|
{
|
|
|
|
parser.parse(source);
|
|
|
|
}
|
|
|
|
catch(const DOM::DOMException& de)
|
|
|
|
{
|
|
|
|
document_ = 0;
|
|
|
|
|
|
|
|
if(errorHandler_)
|
|
|
|
{
|
|
|
|
SAXParseExceptionT pe(de.what());
|
|
|
|
errorHandler_->fatalError(pe);
|
|
|
|
} // if ...
|
|
|
|
} // catch
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
return (document_ != 0);
|
|
|
|
} // loadDOM
|
|
|
|
|
|
|
|
DOM::Document<stringT> getDocument() const
|
|
|
|
{
|
|
|
|
return document_;
|
|
|
|
} // getDocument
|
|
|
|
|
|
|
|
void reset()
|
|
|
|
{
|
|
|
|
currentNode_ = 0;
|
|
|
|
document_ = 0;
|
|
|
|
} // reset
|
|
|
|
|
2005-12-01 17:13:42 +01:00
|
|
|
protected:
|
|
|
|
DOM::Node<stringT>& currentNode() { return currentNode_; }
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
private:
|
|
|
|
// no implementations
|
|
|
|
Parser(const Parser&);
|
|
|
|
bool operator==(const Parser&) const;
|
|
|
|
Parser& operator=(const Parser&);
|
|
|
|
|
|
|
|
// instance variables
|
|
|
|
DOM::Document<stringT> document_;
|
|
|
|
DocumentType<stringT, string_adaptorT >* documentType_;
|
|
|
|
DOM::Node<stringT> currentNode_;
|
|
|
|
DOM::Node<stringT> cachedCurrent_;
|
|
|
|
|
2005-08-15 10:31:41 +02:00
|
|
|
typedef std::map<stringT, bool> Features;
|
|
|
|
Features features_;
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
bool inCDATA_;
|
2007-08-27 01:12:49 +02:00
|
|
|
bool inDTD_;
|
2002-06-21 13:16:28 +02:00
|
|
|
int inEntity_;
|
2005-08-15 10:31:41 +02:00
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
std::map<stringT, EntityT*> declaredEntities_;
|
|
|
|
|
|
|
|
EntityResolverT* entityResolver_;
|
2003-08-28 00:11:10 +02:00
|
|
|
ErrorHandlerT* errorHandler_;
|
2007-09-05 11:49:18 +02:00
|
|
|
Arabica::SAX::AttributeTypes<stringT, string_adaptorT> attributeTypes_;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-12-01 17:13:42 +01:00
|
|
|
protected:
|
2005-08-15 10:31:41 +02:00
|
|
|
void setParserFeatures(SAX_parser& parser) const
|
|
|
|
{
|
2005-08-21 14:48:00 +02:00
|
|
|
for(typename Features::const_iterator f = features_.begin(), e = features_.end(); f != e; ++f)
|
2005-11-09 22:13:22 +01:00
|
|
|
try {
|
|
|
|
parser.setFeature(f->first, f->second);
|
|
|
|
}
|
2007-09-05 11:49:18 +02:00
|
|
|
catch(const Arabica::SAX::SAXException&) { }
|
2005-08-15 10:31:41 +02:00
|
|
|
} // setParserFeatures
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
// ContentHandler
|
|
|
|
virtual void endDocument()
|
|
|
|
{
|
|
|
|
currentNode_ = 0;
|
|
|
|
} // endDocument
|
|
|
|
|
|
|
|
virtual void startElement(const stringT& namespaceURI, const stringT& localName,
|
2007-09-06 20:54:02 +02:00
|
|
|
const stringT& qName, const AttributesT& atts)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
if(currentNode_ == 0)
|
|
|
|
return;
|
|
|
|
|
2003-10-29 17:12:31 +01:00
|
|
|
try
|
|
|
|
{
|
|
|
|
DOM::Element<stringT> elem = document_.createElementNS(namespaceURI, qName);
|
|
|
|
currentNode_.appendChild(elem);
|
|
|
|
|
|
|
|
// attributes here
|
|
|
|
for(int i = 0; i < atts.getLength(); ++i)
|
2007-06-18 14:01:47 +02:00
|
|
|
{
|
|
|
|
stringT qName = atts.getQName(i);
|
|
|
|
if(string_adaptorT::empty(qName))
|
|
|
|
qName = atts.getLocalName(i);
|
|
|
|
elem.setAttributeNS(atts.getURI(i), qName, atts.getValue(i));
|
|
|
|
}
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2003-10-29 17:12:31 +01:00
|
|
|
currentNode_ = elem;
|
|
|
|
}
|
|
|
|
catch(const DOM::DOMException& de)
|
|
|
|
{
|
|
|
|
reset();
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2003-10-29 17:12:31 +01:00
|
|
|
if(errorHandler_)
|
|
|
|
{
|
|
|
|
SAXParseExceptionT pe(de.what());
|
|
|
|
errorHandler_->fatalError(pe);
|
|
|
|
} // if ...
|
|
|
|
} // catch
|
2002-06-21 13:16:28 +02:00
|
|
|
} // startElement
|
|
|
|
|
|
|
|
virtual void endElement(const stringT& namespaceURI, const stringT& localName,
|
|
|
|
const stringT& qName)
|
|
|
|
{
|
|
|
|
if(currentNode_ == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
currentNode_ = currentNode_.getParentNode();
|
|
|
|
} // endElement
|
|
|
|
|
|
|
|
virtual void characters(const stringT& ch)
|
|
|
|
{
|
|
|
|
if(currentNode_ == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if(!inCDATA_)
|
|
|
|
currentNode_.appendChild(document_.createTextNode(ch));
|
|
|
|
else
|
|
|
|
currentNode_.appendChild(document_.createCDATASection(ch));
|
|
|
|
} // characters
|
|
|
|
|
|
|
|
virtual void processingInstruction(const stringT& target, const stringT& data)
|
|
|
|
{
|
|
|
|
if(currentNode_ == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
currentNode_.appendChild(document_.createProcessingInstruction(target, data));
|
|
|
|
} // processingInstruction
|
|
|
|
|
|
|
|
virtual void skippedEntity(const stringT& name)
|
|
|
|
{
|
2007-08-27 01:12:49 +02:00
|
|
|
if(currentNode_ == 0 || inDTD_ == true)
|
2002-06-21 13:16:28 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
currentNode_.appendChild(document_.createEntityReference(name));
|
|
|
|
} // skippedEntity
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////
|
|
|
|
// ErrorHandler
|
2003-08-28 00:59:01 +02:00
|
|
|
virtual void warning(const SAXParseExceptionT& e)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
if(errorHandler_)
|
|
|
|
errorHandler_->warning(e);
|
|
|
|
} // warning
|
|
|
|
|
2003-08-28 00:59:01 +02:00
|
|
|
virtual void error(const SAXParseExceptionT& e)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
if(errorHandler_)
|
|
|
|
errorHandler_->error(e);
|
|
|
|
reset();
|
|
|
|
} // error
|
|
|
|
|
2003-08-28 00:59:01 +02:00
|
|
|
virtual void fatalError(const SAXParseExceptionT& e)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
if(errorHandler_)
|
|
|
|
errorHandler_->fatalError(e);
|
|
|
|
reset();
|
|
|
|
} // fatalError
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////
|
|
|
|
// LexicalHandler
|
|
|
|
virtual void startDTD(const stringT& name, const stringT& publicId, const stringT& systemId)
|
|
|
|
{
|
|
|
|
documentType_ = new DocumentType<stringT, string_adaptorT >(name, publicId, systemId);
|
|
|
|
document_.insertBefore(documentType_, 0);
|
2007-08-27 01:12:49 +02:00
|
|
|
inDTD_ = true;
|
2002-06-21 13:16:28 +02:00
|
|
|
} // startDTD
|
|
|
|
|
|
|
|
virtual void endDTD()
|
|
|
|
{
|
|
|
|
documentType_->setReadOnly(true);
|
2007-08-27 01:12:49 +02:00
|
|
|
inDTD_ = false;
|
2002-06-21 13:16:28 +02:00
|
|
|
} // endDTD
|
|
|
|
|
|
|
|
virtual void startEntity(const stringT& name)
|
|
|
|
{
|
|
|
|
if(currentNode_ == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if(++inEntity_ == 1)
|
|
|
|
{
|
|
|
|
cachedCurrent_ = currentNode_;
|
|
|
|
currentNode_ = declaredEntities_[name];
|
2003-04-28 16:45:58 +02:00
|
|
|
if(currentNode_ != 0 && currentNode_.hasChildNodes() == true) // already populated
|
2002-06-21 13:16:28 +02:00
|
|
|
currentNode_ = 0;
|
|
|
|
}
|
|
|
|
} // startEntity
|
|
|
|
|
|
|
|
virtual void endEntity(const stringT& name)
|
|
|
|
{
|
|
|
|
if(--inEntity_ == 0)
|
|
|
|
currentNode_ = cachedCurrent_;
|
|
|
|
|
|
|
|
currentNode_.appendChild(document_.createEntityReference(name));
|
|
|
|
} // endEntity
|
|
|
|
|
|
|
|
virtual void startCDATA()
|
|
|
|
{
|
|
|
|
inCDATA_ = true;
|
|
|
|
} // startCDATA
|
|
|
|
|
|
|
|
virtual void endCDATA()
|
|
|
|
{
|
|
|
|
inCDATA_ = false;
|
|
|
|
} // endCDATA
|
|
|
|
|
|
|
|
virtual void comment(const stringT& text)
|
|
|
|
{
|
|
|
|
if(currentNode_ == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
currentNode_.appendChild(document_.createComment(text));
|
|
|
|
} // comment
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// DeclHandler
|
|
|
|
virtual void elementDecl(const stringT& name, const stringT& model)
|
|
|
|
{
|
2007-07-19 19:01:19 +02:00
|
|
|
if(!documentType_)
|
|
|
|
return;
|
2006-08-04 11:49:58 +02:00
|
|
|
documentType_->addElement(name);
|
2002-06-21 13:16:28 +02:00
|
|
|
} // elementDecl
|
|
|
|
|
|
|
|
virtual void attributeDecl(const stringT& elementName,
|
|
|
|
const stringT& attributeName,
|
|
|
|
const stringT& type,
|
|
|
|
const stringT& valueDefault,
|
|
|
|
const stringT& value)
|
|
|
|
{
|
2007-07-19 19:01:19 +02:00
|
|
|
if(!documentType_)
|
|
|
|
return;
|
2005-11-17 12:38:21 +01:00
|
|
|
if(!string_adaptorT::empty(value))
|
2002-06-21 13:16:28 +02:00
|
|
|
documentType_->addDefaultAttr(elementName, attributeName, value);
|
|
|
|
if(type == attributeTypes_.id)
|
|
|
|
documentType_->addElementId(attributeName);
|
|
|
|
} // attributeDecl
|
|
|
|
|
|
|
|
virtual void internalEntityDecl(const stringT& name, const stringT& value)
|
|
|
|
{
|
2007-07-19 19:01:19 +02:00
|
|
|
if(!documentType_)
|
|
|
|
return;
|
2005-10-03 14:40:44 +02:00
|
|
|
EntityT* entity = new EntityT(0, name, string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""));
|
2002-06-21 13:16:28 +02:00
|
|
|
declaredEntities_.insert(std::make_pair(name, entity));
|
|
|
|
documentType_->addEntity(entity);
|
2004-01-28 22:40:28 +01:00
|
|
|
DOM::Node<stringT> n = entity;
|
|
|
|
n.appendChild(document_.createTextNode(value));
|
2002-06-21 13:16:28 +02:00
|
|
|
} // internalEntityDecl
|
|
|
|
|
|
|
|
virtual void externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
|
|
|
|
{
|
2007-07-19 19:01:19 +02:00
|
|
|
if(!documentType_)
|
|
|
|
return;
|
2005-10-03 14:40:44 +02:00
|
|
|
EntityT* entity = new EntityT(0, name, publicId, systemId, string_adaptorT::construct_from_utf8(""));
|
2002-06-21 13:16:28 +02:00
|
|
|
declaredEntities_.insert(std::make_pair(name, entity)); // we'll populate it later
|
|
|
|
documentType_->addEntity(entity);
|
|
|
|
} // externalEntityDecl
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// DTDHandler
|
|
|
|
virtual void notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
|
|
|
|
{
|
2007-07-19 19:01:19 +02:00
|
|
|
if(!documentType_)
|
|
|
|
return;
|
2002-06-21 13:16:28 +02:00
|
|
|
documentType_->addNotation(new NotationT(0, name, publicId, systemId));
|
|
|
|
} // notationDecl
|
|
|
|
|
|
|
|
virtual void unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName)
|
|
|
|
{
|
2007-07-19 19:01:19 +02:00
|
|
|
if(!documentType_)
|
|
|
|
return;
|
2002-06-21 13:16:28 +02:00
|
|
|
documentType_->addEntity(new EntityT(0, name, publicId, systemId, notationName));
|
|
|
|
} // unparsedEntityDecl
|
|
|
|
}; // class Parser
|
|
|
|
|
2007-09-05 13:47:13 +02:00
|
|
|
} // namespace SAX2DOM
|
|
|
|
} // namespace Arabica
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|