#ifndef JEZUK_SAX2DOM_PARSER_H #define JEZUK_SAX2DOM_PARSER_H #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Arabica { namespace SAX2DOM { template struct ParserTypes { typedef typename Arabica::get_param, T0, T1>::type string_adaptor; typedef typename Arabica::get_param, T1, T0>::type SAX_parser_type; }; template class Parser : protected Arabica::SAX::DefaultHandler::string_adaptor> { typedef typename ParserTypes::string_adaptor string_adaptorT; typedef typename ParserTypes::SAX_parser_type SAX_parser_type; typedef Arabica::SAX::XMLReaderInterface XMLReaderInterfaceT; typedef Arabica::SAX::TextCoalescer TextCoalescerT; typedef Arabica::SAX::Attributes AttributesT; typedef Arabica::SAX::EntityResolver EntityResolverT; typedef Arabica::SAX::ErrorHandler ErrorHandlerT; typedef Arabica::SAX::LexicalHandler LexicalHandlerT; typedef Arabica::SAX::DeclHandler DeclHandlerT; typedef Arabica::SAX::InputSource InputSourceT; typedef Arabica::SimpleDOM::EntityImpl EntityT; typedef Arabica::SimpleDOM::NotationImpl NotationT; typedef Arabica::SimpleDOM::ElementImpl ElementT; typedef typename ErrorHandlerT::SAXParseExceptionT SAXParseExceptionT; public: Parser() : documentType_(0), entityResolver_(0), errorHandler_(0) { Arabica::SAX::FeatureNames fNames; features_.insert(std::make_pair(fNames.namespaces, true)); features_.insert(std::make_pair(fNames.namespace_prefixes, true)); features_.insert(std::make_pair(fNames.validation, false)); } // Parser void setEntityResolver(EntityResolverT& resolver) { entityResolver_ = &resolver; } EntityResolverT* getEntityResolver() const { return entityResolver_; } void setErrorHandler(ErrorHandlerT& handler) { errorHandler_ = &handler; } ErrorHandlerT* getErrorHandler() const { return errorHandler_; } void setFeature(const stringT& name, bool value) { typename Features::iterator f = features_.find(name); if(f == features_.end()) features_.insert(std::make_pair(name, value)); else f->second = value; } // setFeature bool getFeature(const stringT& name) const { typename Features::const_iterator f = features_.find(name); if(f == features_.end()) throw Arabica::SAX::SAXNotRecognizedException(std::string("Feature not recognized ") + string_adaptorT::asStdString(name)); return f->second; } // getFeature bool parse(const stringT& systemId) { InputSourceT is(systemId); return parse(is); } // loadDOM bool parse(InputSourceT& source) { DOM::DOMImplementation di = Arabica::SimpleDOM::DOMImplementation::getDOMImplementation(); document_ = di.createDocument(string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), 0); currentNode_ = document_; inCDATA_ = false; inDTD_ = false; inEntity_ = 0; SAX_parser_type base_parser; TextCoalescerT parser(base_parser); parser.setContentHandler(*this); parser.setErrorHandler(*this); if(entityResolver_) parser.setEntityResolver(*entityResolver_); parser.setLexicalHandler(*this); parser.setDeclHandler(*this); setParserFeatures(parser); try { parser.parse(source); } catch(const DOM::DOMException& de) { document_ = 0; if(errorHandler_) { SAXParseExceptionT pe(de.what()); errorHandler_->fatalError(pe); } // if ... } // catch return (document_ != 0); } // loadDOM DOM::Document getDocument() const { return document_; } // getDocument void reset() { currentNode_ = 0; document_ = 0; } // reset protected: DOM::Node& currentNode() { return currentNode_; } private: // no implementations Parser(const Parser&); bool operator==(const Parser&) const; Parser& operator=(const Parser&); // instance variables DOM::Document document_; DocumentType* documentType_; DOM::Node currentNode_; DOM::Node cachedCurrent_; typedef std::map Features; Features features_; bool inCDATA_; bool inDTD_; int inEntity_; std::map declaredEntities_; EntityResolverT* entityResolver_; ErrorHandlerT* errorHandler_; Arabica::SAX::AttributeTypes attributeTypes_; protected: void setParserFeatures(XMLReaderInterfaceT& parser) const { for(typename Features::const_iterator f = features_.begin(), e = features_.end(); f != e; ++f) try { parser.setFeature(f->first, f->second); } catch(const Arabica::SAX::SAXException&) { } } // setParserFeatures /////////////////////////////////////////////////////////// // ContentHandler virtual void endDocument() { currentNode_ = 0; } // endDocument virtual void startElement(const stringT& namespaceURI, const stringT& /*localName*/, const stringT& qName, const AttributesT& atts) { if(currentNode_ == 0) return; try { DOM::Element elem = document_.createElementNS(namespaceURI, qName); currentNode_.appendChild(elem); // attributes here for(int i = 0; i < atts.getLength(); ++i) { stringT attName = atts.getQName(i); if(string_adaptorT::empty(attName)) attName = atts.getLocalName(i); elem.setAttributeNS(atts.getURI(i), attName, atts.getValue(i)); } currentNode_ = elem; } catch(const DOM::DOMException& de) { reset(); if(errorHandler_) { SAXParseExceptionT pe(de.what()); errorHandler_->fatalError(pe); } // if ... } // catch } // startElement virtual void endElement(const stringT& /*namespaceURI*/, const stringT& /*localName*/, const stringT& /*qName*/) { if(currentNode_ == 0) return; currentNode_ = currentNode_.getParentNode(); } // endElement virtual void characters(const stringT& ch) { if(currentNode_ == 0) return; if(!inCDATA_) currentNode_.appendChild(document_.createTextNode(ch)); else currentNode_.appendChild(document_.createCDATASection(ch)); } // characters virtual void processingInstruction(const stringT& target, const stringT& data) { if(currentNode_ == 0) return; currentNode_.appendChild(document_.createProcessingInstruction(target, data)); } // processingInstruction virtual void skippedEntity(const stringT& name) { if(currentNode_ == 0 || inDTD_ == true) return; currentNode_.appendChild(document_.createEntityReference(name)); } // skippedEntity //////////////////////////////////////////////////// // ErrorHandler virtual void warning(const SAXParseExceptionT& e) { if(errorHandler_) errorHandler_->warning(e); } // warning virtual void error(const SAXParseExceptionT& e) { if(errorHandler_) errorHandler_->error(e); reset(); } // error virtual void fatalError(const SAXParseExceptionT& e) { if(errorHandler_) errorHandler_->fatalError(e); reset(); } // fatalError ///////////////////////////////////////////////////// // LexicalHandler virtual void startDTD(const stringT& name, const stringT& publicId, const stringT& systemId) { documentType_ = new DocumentType(name, publicId, systemId); document_.insertBefore(documentType_, 0); inDTD_ = true; } // startDTD virtual void endDTD() { documentType_->setReadOnly(true); inDTD_ = false; } // endDTD virtual void startEntity(const stringT& name) { if(currentNode_ == 0) return; if(++inEntity_ == 1) { cachedCurrent_ = currentNode_; currentNode_ = declaredEntities_[name]; if(currentNode_ != 0 && currentNode_.hasChildNodes() == true) // already populated currentNode_ = 0; } } // startEntity virtual void endEntity(const stringT& name) { if(--inEntity_ == 0) currentNode_ = cachedCurrent_; if(this->dtd_pseudo_entity == name) return; const EntityT* declared = declaredEntities_[name]; if(declared != 0 && declared->getLength() != 0) { for(DOM::Node child = declared->getFirstChild(); child != 0; child = child.getNextSibling()) currentNode_.appendChild(child.cloneNode(true)); } else currentNode_.appendChild(document_.createEntityReference(name)); } // endEntity virtual void startCDATA() { inCDATA_ = true; } // startCDATA virtual void endCDATA() { inCDATA_ = false; } // endCDATA virtual void comment(const stringT& text) { if(currentNode_ == 0) return; currentNode_.appendChild(document_.createComment(text)); } // comment ////////////////////////////////////////////////////////////////////// // DeclHandler virtual void elementDecl(const stringT& name, const stringT& /*model*/) { if(!documentType_) return; documentType_->addElement(name); } // elementDecl virtual void attributeDecl(const stringT& elementName, const stringT& attributeName, const stringT& type, const stringT& /*valueDefault*/, const stringT& value) { if(!documentType_) return; if(!string_adaptorT::empty(value)) documentType_->addDefaultAttr(elementName, attributeName, value); if(type == attributeTypes_.id) documentType_->addElementId(attributeName); } // attributeDecl virtual void internalEntityDecl(const stringT& name, const stringT& value) { if(!documentType_) return; static const stringT LEFT_ANGLE_BRACKET = string_adaptorT::construct_from_utf8("<"); EntityT* entity = new EntityT(0, name, string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8("")); declaredEntities_.insert(std::make_pair(name, entity)); documentType_->addEntity(entity); DOM::Node n = entity; if(string_adaptorT::find(value, LEFT_ANGLE_BRACKET) == string_adaptorT::npos()) { n.appendChild(document_.createTextNode(value)); return; } // if ... // parse the value into a Document // this may not quite do the right thing for some custom strug types, // but at the time I've writing this, the code has been missing this // stuff for something like 8 years and nobody's noticed so it's not // massively used. // I only noticed myself when I started running the DOM conformance tests std::stringstream ss; ss << "" << string_adaptorT::asStdString(value) << ""; Arabica::SAX::InputSource is(ss); Arabica::SAX2DOM::Parser parser; parser.parse(is); DOM::Document entityDoc = parser.getDocument(); DOM::Element entityElem = entityDoc.getDocumentElement(); DOM::Node child = entityElem.getFirstChild(); while(child != 0) { // import the contents thereof DOM::Node imported = document_.importNode(child, true); // append to entity n.appendChild(imported); child = child.getNextSibling(); } // while } // internalEntityDecl virtual void externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId) { if(!documentType_) return; EntityT* entity = new EntityT(0, name, publicId, systemId, string_adaptorT::construct_from_utf8("")); declaredEntities_.insert(std::make_pair(name, entity)); // we'll populate it later documentType_->addEntity(entity); } // externalEntityDecl ///////////////////////////////////////////////////////////////////////// // DTDHandler virtual void notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId) { if(!documentType_) return; documentType_->addNotation(new NotationT(0, name, publicId, systemId)); } // notationDecl virtual void unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName) { if(!documentType_) return; documentType_->addEntity(new EntityT(0, name, publicId, systemId, notationName)); } // unparsedEntityDecl }; // class Parser } // namespace SAX2DOM } // namespace Arabica #endif