arabica/include/DOM/SAX2DOM/SAX2DOM.hpp

441 lines
14 KiB
C++
Raw Normal View History

2002-06-21 13:16:28 +02:00
#ifndef JEZUK_SAX2DOM_PARSER_H
#define JEZUK_SAX2DOM_PARSER_H
2007-09-05 00:55:47 +02:00
#include <SAX/XMLReader.hpp>
#include <SAX/helpers/DefaultHandler.hpp>
#include <SAX/helpers/AttributeTypes.hpp>
#include <SAX/filter/TextCoalescer.hpp>
2007-09-05 00:55:47 +02:00
#include <DOM/Simple/DOMImplementation.hpp>
#include <DOM/Simple/NotationImpl.hpp>
#include <DOM/Simple/EntityImpl.hpp>
#include <DOM/Document.hpp>
#include <DOM/DOMException.hpp>
#include <DOM/SAX2DOM/DocumentTypeImpl.hpp>
2002-06-21 13:16:28 +02:00
#include <map>
2007-09-05 00:55:47 +02:00
#include <SAX/helpers/FeatureNames.hpp>
#include <SAX/helpers/PropertyNames.hpp>
#include <SAX/SAXParseException.hpp>
2002-06-21 13:16:28 +02:00
2007-09-05 13:47:13 +02:00
namespace Arabica
{
2002-06-21 13:16:28 +02:00
namespace SAX2DOM
{
2008-04-07 17:37:47 +02:00
template<class string_type, class T0, class T1>
struct ParserTypes
{
typedef typename Arabica::get_param<Arabica::string_adaptor_tag,
Arabica::default_string_adaptor<string_type>,
T0,
T1>::type string_adaptor;
typedef typename Arabica::get_param<Arabica::SAX::XMLReaderInterface_tag,
Arabica::SAX::XMLReader<string_type, string_adaptor>,
T1,
T0>::type SAX_parser_type;
};
2002-06-21 13:16:28 +02:00
template<class stringT,
2008-04-07 17:37:47 +02:00
class T0 = Arabica::nil_t,
class T1 = Arabica::nil_t>
class Parser : protected Arabica::SAX::DefaultHandler<stringT, typename ParserTypes<stringT, T0, T1>::string_adaptor>
2002-06-21 13:16:28 +02:00
{
2008-04-07 17:37:47 +02:00
typedef typename ParserTypes<stringT, T0, T1>::string_adaptor string_adaptorT;
typedef typename ParserTypes<stringT, T0, T1>::SAX_parser_type SAX_parser_type;
typedef Arabica::SAX::XMLReaderInterface<stringT, string_adaptorT> XMLReaderInterfaceT;
typedef Arabica::SAX::TextCoalescer<stringT, string_adaptorT> TextCoalescerT;
typedef Arabica::SAX::Attributes<stringT, string_adaptorT> AttributesT;
typedef Arabica::SAX::EntityResolver<stringT, string_adaptorT> EntityResolverT;
typedef Arabica::SAX::ErrorHandler<stringT, string_adaptorT> ErrorHandlerT;
typedef Arabica::SAX::LexicalHandler<stringT, string_adaptorT> LexicalHandlerT;
typedef Arabica::SAX::DeclHandler<stringT, string_adaptorT> DeclHandlerT;
typedef Arabica::SAX::InputSource<stringT, string_adaptorT> InputSourceT;
2007-09-05 13:47:13 +02:00
typedef Arabica::SimpleDOM::EntityImpl<stringT, string_adaptorT> EntityT;
typedef Arabica::SimpleDOM::NotationImpl<stringT, string_adaptorT> NotationT;
typedef Arabica::SimpleDOM::ElementImpl<stringT, string_adaptorT> ElementT;
typedef typename ErrorHandlerT::SAXParseExceptionT SAXParseExceptionT;
2002-06-21 13:16:28 +02:00
public:
Parser() :
documentType_(0),
2002-06-21 13:16:28 +02:00
entityResolver_(0),
errorHandler_(0)
2002-06-21 13:16:28 +02:00
{
2007-09-05 11:49:18 +02:00
Arabica::SAX::FeatureNames<stringT, string_adaptorT> fNames;
2005-08-15 10:31:41 +02:00
features_.insert(std::make_pair(fNames.namespaces, true));
features_.insert(std::make_pair(fNames.namespace_prefixes, true));
features_.insert(std::make_pair(fNames.validation, false));
2002-06-21 13:16:28 +02:00
} // Parser
void setEntityResolver(EntityResolverT& resolver) { entityResolver_ = &resolver; }
EntityResolverT* getEntityResolver() const { return entityResolver_; }
void setErrorHandler(ErrorHandlerT& handler) { errorHandler_ = &handler; }
2003-08-28 00:11:10 +02:00
ErrorHandlerT* getErrorHandler() const { return errorHandler_; }
2002-06-21 13:16:28 +02:00
2005-08-15 10:44:31 +02:00
void setFeature(const stringT& name, bool value)
{
2005-08-21 14:48:00 +02:00
typename Features::iterator f = features_.find(name);
2005-08-15 10:44:31 +02:00
if(f == features_.end())
2006-08-04 11:49:58 +02:00
features_.insert(std::make_pair(name, value));
else
f->second = value;
2005-08-15 10:44:31 +02:00
} // setFeature
bool getFeature(const stringT& name) const
{
2005-08-21 14:48:00 +02:00
typename Features::const_iterator f = features_.find(name);
2005-08-15 10:44:31 +02:00
if(f == features_.end())
2007-09-05 11:49:18 +02:00
throw Arabica::SAX::SAXNotRecognizedException(std::string("Feature not recognized ") + string_adaptorT::asStdString(name));
2005-08-15 10:44:31 +02:00
return f->second;
} // getFeature
2002-06-21 13:16:28 +02:00
bool parse(const stringT& systemId)
{
2006-06-08 11:51:18 +02:00
InputSourceT is(systemId);
2002-06-21 13:16:28 +02:00
return parse(is);
} // loadDOM
2006-06-08 11:51:18 +02:00
bool parse(InputSourceT& source)
2002-06-21 13:16:28 +02:00
{
2007-09-05 11:49:18 +02:00
Arabica::SAX::PropertyNames<stringT, string_adaptorT> pNames;
2002-06-21 13:16:28 +02:00
DOM::DOMImplementation<stringT, string_adaptorT> di = Arabica::SimpleDOM::DOMImplementation<stringT, string_adaptorT>::getDOMImplementation();
2005-09-30 23:36:11 +02:00
document_ = di.createDocument(string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), 0);
2002-06-21 13:16:28 +02:00
currentNode_ = document_;
inCDATA_ = false;
inDTD_ = false;
2002-06-21 13:16:28 +02:00
inEntity_ = 0;
SAX_parser_type base_parser;
TextCoalescerT parser(base_parser);
2002-06-21 13:16:28 +02:00
parser.setContentHandler(*this);
parser.setErrorHandler(*this);
if(entityResolver_)
parser.setEntityResolver(*entityResolver_);
2005-08-15 10:13:45 +02:00
parser.setLexicalHandler(*this);
parser.setDeclHandler(*this);
2005-08-15 10:13:45 +02:00
2005-08-15 10:31:41 +02:00
setParserFeatures(parser);
2002-06-21 13:16:28 +02:00
try
{
parser.parse(source);
}
catch(const DOM::DOMException& de)
{
document_ = 0;
if(errorHandler_)
{
SAXParseExceptionT pe(de.what());
errorHandler_->fatalError(pe);
} // if ...
} // catch
2002-06-21 13:16:28 +02:00
return (document_ != 0);
} // loadDOM
DOM::Document<stringT, string_adaptorT> getDocument() const
2002-06-21 13:16:28 +02:00
{
return document_;
} // getDocument
void reset()
{
currentNode_ = 0;
document_ = 0;
} // reset
protected:
DOM::Node<stringT, string_adaptorT>& currentNode() { return currentNode_; }
2002-06-21 13:16:28 +02:00
private:
// no implementations
Parser(const Parser&);
bool operator==(const Parser&) const;
Parser& operator=(const Parser&);
// instance variables
DOM::Document<stringT, string_adaptorT> document_;
2002-06-21 13:16:28 +02:00
DocumentType<stringT, string_adaptorT >* documentType_;
DOM::Node<stringT, string_adaptorT> currentNode_;
DOM::Node<stringT, string_adaptorT> cachedCurrent_;
2002-06-21 13:16:28 +02:00
2005-08-15 10:31:41 +02:00
typedef std::map<stringT, bool> Features;
Features features_;
2002-06-21 13:16:28 +02:00
bool inCDATA_;
bool inDTD_;
2002-06-21 13:16:28 +02:00
int inEntity_;
2005-08-15 10:31:41 +02:00
2002-06-21 13:16:28 +02:00
std::map<stringT, EntityT*> declaredEntities_;
EntityResolverT* entityResolver_;
2003-08-28 00:11:10 +02:00
ErrorHandlerT* errorHandler_;
2007-09-05 11:49:18 +02:00
Arabica::SAX::AttributeTypes<stringT, string_adaptorT> attributeTypes_;
2002-06-21 13:16:28 +02:00
protected:
void setParserFeatures(XMLReaderInterfaceT& parser) const
2005-08-15 10:31:41 +02:00
{
2005-08-21 14:48:00 +02:00
for(typename Features::const_iterator f = features_.begin(), e = features_.end(); f != e; ++f)
2005-11-09 22:13:22 +01:00
try {
parser.setFeature(f->first, f->second);
}
2007-09-05 11:49:18 +02:00
catch(const Arabica::SAX::SAXException&) { }
2005-08-15 10:31:41 +02:00
} // setParserFeatures
2002-06-21 13:16:28 +02:00
///////////////////////////////////////////////////////////
// ContentHandler
virtual void endDocument()
{
currentNode_ = 0;
} // endDocument
virtual void startElement(const stringT& namespaceURI, const stringT& /*localName*/,
const stringT& qName, const AttributesT& atts)
2002-06-21 13:16:28 +02:00
{
if(currentNode_ == 0)
return;
try
{
DOM::Element<stringT, string_adaptorT> elem = document_.createElementNS(namespaceURI, qName);
currentNode_.appendChild(elem);
// attributes here
for(int i = 0; i < atts.getLength(); ++i)
2007-06-18 14:01:47 +02:00
{
stringT qName = atts.getQName(i);
if(string_adaptorT::empty(qName))
qName = atts.getLocalName(i);
elem.setAttributeNS(atts.getURI(i), qName, atts.getValue(i));
}
2002-06-21 13:16:28 +02:00
currentNode_ = elem;
}
catch(const DOM::DOMException& de)
{
reset();
2002-06-21 13:16:28 +02:00
if(errorHandler_)
{
SAXParseExceptionT pe(de.what());
errorHandler_->fatalError(pe);
} // if ...
} // catch
2002-06-21 13:16:28 +02:00
} // startElement
virtual void endElement(const stringT& /*namespaceURI*/, const stringT& /*localName*/,
const stringT& /*qName*/)
2002-06-21 13:16:28 +02:00
{
if(currentNode_ == 0)
return;
currentNode_ = currentNode_.getParentNode();
} // endElement
virtual void characters(const stringT& ch)
{
if(currentNode_ == 0)
return;
if(!inCDATA_)
currentNode_.appendChild(document_.createTextNode(ch));
else
currentNode_.appendChild(document_.createCDATASection(ch));
} // characters
virtual void processingInstruction(const stringT& target, const stringT& data)
{
if(currentNode_ == 0)
return;
currentNode_.appendChild(document_.createProcessingInstruction(target, data));
} // processingInstruction
virtual void skippedEntity(const stringT& name)
{
if(currentNode_ == 0 || inDTD_ == true)
2002-06-21 13:16:28 +02:00
return;
currentNode_.appendChild(document_.createEntityReference(name));
} // skippedEntity
////////////////////////////////////////////////////
// ErrorHandler
2003-08-28 00:59:01 +02:00
virtual void warning(const SAXParseExceptionT& e)
2002-06-21 13:16:28 +02:00
{
if(errorHandler_)
errorHandler_->warning(e);
} // warning
2003-08-28 00:59:01 +02:00
virtual void error(const SAXParseExceptionT& e)
2002-06-21 13:16:28 +02:00
{
if(errorHandler_)
errorHandler_->error(e);
reset();
} // error
2003-08-28 00:59:01 +02:00
virtual void fatalError(const SAXParseExceptionT& e)
2002-06-21 13:16:28 +02:00
{
if(errorHandler_)
errorHandler_->fatalError(e);
reset();
} // fatalError
/////////////////////////////////////////////////////
// LexicalHandler
virtual void startDTD(const stringT& name, const stringT& publicId, const stringT& systemId)
{
documentType_ = new DocumentType<stringT, string_adaptorT >(name, publicId, systemId);
document_.insertBefore(documentType_, 0);
inDTD_ = true;
2002-06-21 13:16:28 +02:00
} // startDTD
virtual void endDTD()
{
documentType_->setReadOnly(true);
inDTD_ = false;
2002-06-21 13:16:28 +02:00
} // endDTD
virtual void startEntity(const stringT& name)
{
if(currentNode_ == 0)
return;
if(++inEntity_ == 1)
{
cachedCurrent_ = currentNode_;
currentNode_ = declaredEntities_[name];
if(currentNode_ != 0 && currentNode_.hasChildNodes() == true) // already populated
2002-06-21 13:16:28 +02:00
currentNode_ = 0;
}
} // startEntity
virtual void endEntity(const stringT& name)
{
if(--inEntity_ == 0)
currentNode_ = cachedCurrent_;
currentNode_.appendChild(document_.createEntityReference(name));
} // endEntity
virtual void startCDATA()
{
inCDATA_ = true;
} // startCDATA
virtual void endCDATA()
{
inCDATA_ = false;
} // endCDATA
virtual void comment(const stringT& text)
{
if(currentNode_ == 0)
return;
currentNode_.appendChild(document_.createComment(text));
} // comment
//////////////////////////////////////////////////////////////////////
// DeclHandler
virtual void elementDecl(const stringT& name, const stringT& /*model*/)
2002-06-21 13:16:28 +02:00
{
2007-07-19 19:01:19 +02:00
if(!documentType_)
return;
2006-08-04 11:49:58 +02:00
documentType_->addElement(name);
2002-06-21 13:16:28 +02:00
} // elementDecl
virtual void attributeDecl(const stringT& elementName,
const stringT& attributeName,
const stringT& type,
const stringT& /*valueDefault*/,
2002-06-21 13:16:28 +02:00
const stringT& value)
{
2007-07-19 19:01:19 +02:00
if(!documentType_)
return;
2005-11-17 12:38:21 +01:00
if(!string_adaptorT::empty(value))
2002-06-21 13:16:28 +02:00
documentType_->addDefaultAttr(elementName, attributeName, value);
if(type == attributeTypes_.id)
documentType_->addElementId(attributeName);
} // attributeDecl
virtual void internalEntityDecl(const stringT& name, const stringT& value)
{
2007-07-19 19:01:19 +02:00
if(!documentType_)
return;
static const stringT LEFT_ANGLE_BRACKET = string_adaptorT::construct_from_utf8("<");
2005-10-03 14:40:44 +02:00
EntityT* entity = new EntityT(0, name, string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""));
2002-06-21 13:16:28 +02:00
declaredEntities_.insert(std::make_pair(name, entity));
documentType_->addEntity(entity);
DOM::Node<stringT, string_adaptorT> n = entity;
if(string_adaptorT::find(value, LEFT_ANGLE_BRACKET) == string_adaptorT::npos())
{
n.appendChild(document_.createTextNode(value));
return;
} // if ...
// parse the value into a Document
// this may not quite do the right thing for some custom strug types,
// but at the time I've writing this, the code has been missing this
// stuff for something like 8 years and nobody's noticed so it's not
// massively used.
// I only noticed myself when I started running the DOM conformance tests
std::stringstream ss;
ss << "<wrapper>" << string_adaptorT::asStdString(value) << "</wrapper>";
Arabica::SAX::InputSource<stringT, string_adaptorT> is(ss);
Arabica::SAX2DOM::Parser<stringT, string_adaptorT> parser;
parser.parse(is);
DOM::Document<stringT, string_adaptorT> entityDoc = parser.getDocument();
DOM::Element<stringT, string_adaptorT> entityElem = entityDoc.getDocumentElement();
DOM::Node<stringT, string_adaptorT> child = entityElem.getFirstChild();
while(child != 0)
{
// import the contents thereof
DOM::Node<stringT, string_adaptorT> imported = document_.importNode(child, true);
// append to entity
n.appendChild(imported);
child = child.getNextSibling();
} // while
2002-06-21 13:16:28 +02:00
} // internalEntityDecl
virtual void externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
{
2007-07-19 19:01:19 +02:00
if(!documentType_)
return;
2005-10-03 14:40:44 +02:00
EntityT* entity = new EntityT(0, name, publicId, systemId, string_adaptorT::construct_from_utf8(""));
2002-06-21 13:16:28 +02:00
declaredEntities_.insert(std::make_pair(name, entity)); // we'll populate it later
documentType_->addEntity(entity);
} // externalEntityDecl
/////////////////////////////////////////////////////////////////////////
// DTDHandler
virtual void notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
{
2007-07-19 19:01:19 +02:00
if(!documentType_)
return;
2002-06-21 13:16:28 +02:00
documentType_->addNotation(new NotationT(0, name, publicId, systemId));
} // notationDecl
virtual void unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName)
{
2007-07-19 19:01:19 +02:00
if(!documentType_)
return;
2002-06-21 13:16:28 +02:00
documentType_->addEntity(new EntityT(0, name, publicId, systemId, notationName));
} // unparsedEntityDecl
}; // class Parser
2007-09-05 13:47:13 +02:00
} // namespace SAX2DOM
} // namespace Arabica
2002-06-21 13:16:28 +02:00
#endif