mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-17 18:12:04 +01:00
Deal properly with the [dtd] pseudo-entity
Some parsers report the [dtd] pseudo entity through the LexicalHandler. Arabica was incorrectly trying to create an Entity Reference for it.
This commit is contained in:
parent
0c37303b50
commit
2f49994b09
2 changed files with 650 additions and 639 deletions
|
@ -1,445 +1,448 @@
|
|||
#ifndef JEZUK_SAX2DOM_PARSER_H
|
||||
#define JEZUK_SAX2DOM_PARSER_H
|
||||
|
||||
#include <SAX/XMLReader.hpp>
|
||||
#include <SAX/helpers/DefaultHandler.hpp>
|
||||
#include <SAX/helpers/AttributeTypes.hpp>
|
||||
#include <SAX/filter/TextCoalescer.hpp>
|
||||
#include <DOM/Simple/DOMImplementation.hpp>
|
||||
#include <DOM/Simple/NotationImpl.hpp>
|
||||
#include <DOM/Simple/EntityImpl.hpp>
|
||||
#include <DOM/Document.hpp>
|
||||
#include <DOM/DOMException.hpp>
|
||||
#include <DOM/SAX2DOM/DocumentTypeImpl.hpp>
|
||||
#include <map>
|
||||
#include <SAX/helpers/FeatureNames.hpp>
|
||||
#include <SAX/helpers/PropertyNames.hpp>
|
||||
#include <SAX/SAXParseException.hpp>
|
||||
|
||||
namespace Arabica
|
||||
{
|
||||
namespace SAX2DOM
|
||||
{
|
||||
|
||||
template<class string_type, class T0, class T1>
|
||||
struct ParserTypes
|
||||
{
|
||||
typedef typename Arabica::get_param<Arabica::string_adaptor_tag,
|
||||
Arabica::default_string_adaptor<string_type>,
|
||||
T0,
|
||||
T1>::type string_adaptor;
|
||||
typedef typename Arabica::get_param<Arabica::SAX::XMLReaderInterface_tag,
|
||||
Arabica::SAX::XMLReader<string_type, string_adaptor>,
|
||||
T1,
|
||||
T0>::type SAX_parser_type;
|
||||
};
|
||||
|
||||
template<class stringT,
|
||||
class T0 = Arabica::nil_t,
|
||||
class T1 = Arabica::nil_t>
|
||||
class Parser : protected Arabica::SAX::DefaultHandler<stringT, typename ParserTypes<stringT, T0, T1>::string_adaptor>
|
||||
{
|
||||
typedef typename ParserTypes<stringT, T0, T1>::string_adaptor string_adaptorT;
|
||||
typedef typename ParserTypes<stringT, T0, T1>::SAX_parser_type SAX_parser_type;
|
||||
typedef Arabica::SAX::XMLReaderInterface<stringT, string_adaptorT> XMLReaderInterfaceT;
|
||||
typedef Arabica::SAX::TextCoalescer<stringT, string_adaptorT> TextCoalescerT;
|
||||
typedef Arabica::SAX::Attributes<stringT, string_adaptorT> AttributesT;
|
||||
typedef Arabica::SAX::EntityResolver<stringT, string_adaptorT> EntityResolverT;
|
||||
typedef Arabica::SAX::ErrorHandler<stringT, string_adaptorT> ErrorHandlerT;
|
||||
typedef Arabica::SAX::LexicalHandler<stringT, string_adaptorT> LexicalHandlerT;
|
||||
typedef Arabica::SAX::DeclHandler<stringT, string_adaptorT> DeclHandlerT;
|
||||
typedef Arabica::SAX::InputSource<stringT, string_adaptorT> InputSourceT;
|
||||
typedef Arabica::SimpleDOM::EntityImpl<stringT, string_adaptorT> EntityT;
|
||||
typedef Arabica::SimpleDOM::NotationImpl<stringT, string_adaptorT> NotationT;
|
||||
typedef Arabica::SimpleDOM::ElementImpl<stringT, string_adaptorT> ElementT;
|
||||
typedef typename ErrorHandlerT::SAXParseExceptionT SAXParseExceptionT;
|
||||
|
||||
public:
|
||||
Parser() :
|
||||
documentType_(0),
|
||||
entityResolver_(0),
|
||||
errorHandler_(0)
|
||||
{
|
||||
Arabica::SAX::FeatureNames<stringT, string_adaptorT> fNames;
|
||||
features_.insert(std::make_pair(fNames.namespaces, true));
|
||||
features_.insert(std::make_pair(fNames.namespace_prefixes, true));
|
||||
features_.insert(std::make_pair(fNames.validation, false));
|
||||
} // Parser
|
||||
|
||||
void setEntityResolver(EntityResolverT& resolver) { entityResolver_ = &resolver; }
|
||||
EntityResolverT* getEntityResolver() const { return entityResolver_; }
|
||||
|
||||
void setErrorHandler(ErrorHandlerT& handler) { errorHandler_ = &handler; }
|
||||
ErrorHandlerT* getErrorHandler() const { return errorHandler_; }
|
||||
|
||||
void setFeature(const stringT& name, bool value)
|
||||
{
|
||||
typename Features::iterator f = features_.find(name);
|
||||
if(f == features_.end())
|
||||
features_.insert(std::make_pair(name, value));
|
||||
else
|
||||
f->second = value;
|
||||
} // setFeature
|
||||
|
||||
bool getFeature(const stringT& name) const
|
||||
{
|
||||
typename Features::const_iterator f = features_.find(name);
|
||||
if(f == features_.end())
|
||||
throw Arabica::SAX::SAXNotRecognizedException(std::string("Feature not recognized ") + string_adaptorT::asStdString(name));
|
||||
return f->second;
|
||||
} // getFeature
|
||||
|
||||
bool parse(const stringT& systemId)
|
||||
{
|
||||
InputSourceT is(systemId);
|
||||
return parse(is);
|
||||
} // loadDOM
|
||||
|
||||
bool parse(InputSourceT& source)
|
||||
{
|
||||
Arabica::SAX::PropertyNames<stringT, string_adaptorT> pNames;
|
||||
|
||||
DOM::DOMImplementation<stringT, string_adaptorT> di = Arabica::SimpleDOM::DOMImplementation<stringT, string_adaptorT>::getDOMImplementation();
|
||||
document_ = di.createDocument(string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), 0);
|
||||
currentNode_ = document_;
|
||||
inCDATA_ = false;
|
||||
inDTD_ = false;
|
||||
inEntity_ = 0;
|
||||
|
||||
SAX_parser_type base_parser;
|
||||
TextCoalescerT parser(base_parser);
|
||||
parser.setContentHandler(*this);
|
||||
parser.setErrorHandler(*this);
|
||||
if(entityResolver_)
|
||||
parser.setEntityResolver(*entityResolver_);
|
||||
|
||||
parser.setLexicalHandler(*this);
|
||||
parser.setDeclHandler(*this);
|
||||
|
||||
setParserFeatures(parser);
|
||||
|
||||
try
|
||||
{
|
||||
parser.parse(source);
|
||||
}
|
||||
catch(const DOM::DOMException& de)
|
||||
{
|
||||
document_ = 0;
|
||||
|
||||
if(errorHandler_)
|
||||
{
|
||||
SAXParseExceptionT pe(de.what());
|
||||
errorHandler_->fatalError(pe);
|
||||
} // if ...
|
||||
} // catch
|
||||
|
||||
return (document_ != 0);
|
||||
} // loadDOM
|
||||
|
||||
DOM::Document<stringT, string_adaptorT> getDocument() const
|
||||
{
|
||||
return document_;
|
||||
} // getDocument
|
||||
|
||||
void reset()
|
||||
{
|
||||
currentNode_ = 0;
|
||||
document_ = 0;
|
||||
} // reset
|
||||
|
||||
protected:
|
||||
DOM::Node<stringT, string_adaptorT>& currentNode() { return currentNode_; }
|
||||
|
||||
private:
|
||||
// no implementations
|
||||
Parser(const Parser&);
|
||||
bool operator==(const Parser&) const;
|
||||
Parser& operator=(const Parser&);
|
||||
|
||||
// instance variables
|
||||
DOM::Document<stringT, string_adaptorT> document_;
|
||||
DocumentType<stringT, string_adaptorT >* documentType_;
|
||||
DOM::Node<stringT, string_adaptorT> currentNode_;
|
||||
DOM::Node<stringT, string_adaptorT> cachedCurrent_;
|
||||
|
||||
typedef std::map<stringT, bool> Features;
|
||||
Features features_;
|
||||
|
||||
bool inCDATA_;
|
||||
bool inDTD_;
|
||||
int inEntity_;
|
||||
|
||||
std::map<stringT, EntityT*> declaredEntities_;
|
||||
|
||||
EntityResolverT* entityResolver_;
|
||||
ErrorHandlerT* errorHandler_;
|
||||
Arabica::SAX::AttributeTypes<stringT, string_adaptorT> attributeTypes_;
|
||||
|
||||
protected:
|
||||
void setParserFeatures(XMLReaderInterfaceT& parser) const
|
||||
{
|
||||
for(typename Features::const_iterator f = features_.begin(), e = features_.end(); f != e; ++f)
|
||||
try {
|
||||
parser.setFeature(f->first, f->second);
|
||||
}
|
||||
catch(const Arabica::SAX::SAXException&) { }
|
||||
} // setParserFeatures
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// ContentHandler
|
||||
virtual void endDocument()
|
||||
{
|
||||
currentNode_ = 0;
|
||||
} // endDocument
|
||||
|
||||
virtual void startElement(const stringT& namespaceURI,
|
||||
const stringT& /*localName*/,
|
||||
const stringT& qName,
|
||||
const AttributesT& atts)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
DOM::Element<stringT, string_adaptorT> elem = document_.createElementNS(namespaceURI, qName);
|
||||
currentNode_.appendChild(elem);
|
||||
|
||||
// attributes here
|
||||
for(int i = 0; i < atts.getLength(); ++i)
|
||||
{
|
||||
stringT qName = atts.getQName(i);
|
||||
if(string_adaptorT::empty(qName))
|
||||
qName = atts.getLocalName(i);
|
||||
elem.setAttributeNS(atts.getURI(i), qName, atts.getValue(i));
|
||||
}
|
||||
|
||||
currentNode_ = elem;
|
||||
}
|
||||
catch(const DOM::DOMException& de)
|
||||
{
|
||||
reset();
|
||||
|
||||
if(errorHandler_)
|
||||
{
|
||||
SAXParseExceptionT pe(de.what());
|
||||
errorHandler_->fatalError(pe);
|
||||
} // if ...
|
||||
} // catch
|
||||
} // startElement
|
||||
|
||||
virtual void endElement(const stringT& /*namespaceURI*/,
|
||||
const stringT& /*localName*/,
|
||||
const stringT& /*qName*/)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
currentNode_ = currentNode_.getParentNode();
|
||||
} // endElement
|
||||
|
||||
virtual void characters(const stringT& ch)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
if(!inCDATA_)
|
||||
currentNode_.appendChild(document_.createTextNode(ch));
|
||||
else
|
||||
currentNode_.appendChild(document_.createCDATASection(ch));
|
||||
} // characters
|
||||
|
||||
virtual void processingInstruction(const stringT& target, const stringT& data)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createProcessingInstruction(target, data));
|
||||
} // processingInstruction
|
||||
|
||||
virtual void skippedEntity(const stringT& name)
|
||||
{
|
||||
if(currentNode_ == 0 || inDTD_ == true)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createEntityReference(name));
|
||||
} // skippedEntity
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// ErrorHandler
|
||||
virtual void warning(const SAXParseExceptionT& e)
|
||||
{
|
||||
if(errorHandler_)
|
||||
errorHandler_->warning(e);
|
||||
} // warning
|
||||
|
||||
virtual void error(const SAXParseExceptionT& e)
|
||||
{
|
||||
if(errorHandler_)
|
||||
errorHandler_->error(e);
|
||||
reset();
|
||||
} // error
|
||||
|
||||
virtual void fatalError(const SAXParseExceptionT& e)
|
||||
{
|
||||
if(errorHandler_)
|
||||
errorHandler_->fatalError(e);
|
||||
reset();
|
||||
} // fatalError
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// LexicalHandler
|
||||
virtual void startDTD(const stringT& name,
|
||||
const stringT& publicId,
|
||||
const stringT& systemId)
|
||||
{
|
||||
documentType_ = new DocumentType<stringT, string_adaptorT >(name, publicId, systemId);
|
||||
document_.insertBefore(documentType_, 0);
|
||||
inDTD_ = true;
|
||||
} // startDTD
|
||||
|
||||
virtual void endDTD()
|
||||
{
|
||||
documentType_->setReadOnly(true);
|
||||
inDTD_ = false;
|
||||
} // endDTD
|
||||
|
||||
virtual void startEntity(const stringT& name)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
if(++inEntity_ == 1)
|
||||
{
|
||||
cachedCurrent_ = currentNode_;
|
||||
currentNode_ = declaredEntities_[name];
|
||||
if(currentNode_ != 0 && currentNode_.hasChildNodes() == true) // already populated
|
||||
currentNode_ = 0;
|
||||
}
|
||||
} // startEntity
|
||||
|
||||
virtual void endEntity(const stringT& name)
|
||||
{
|
||||
if(--inEntity_ == 0)
|
||||
currentNode_ = cachedCurrent_;
|
||||
|
||||
currentNode_.appendChild(document_.createEntityReference(name));
|
||||
} // endEntity
|
||||
|
||||
virtual void startCDATA()
|
||||
{
|
||||
inCDATA_ = true;
|
||||
} // startCDATA
|
||||
|
||||
virtual void endCDATA()
|
||||
{
|
||||
inCDATA_ = false;
|
||||
} // endCDATA
|
||||
|
||||
virtual void comment(const stringT& text)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createComment(text));
|
||||
} // comment
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DeclHandler
|
||||
virtual void elementDecl(const stringT& name, const stringT& /*model*/)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
documentType_->addElement(name);
|
||||
} // elementDecl
|
||||
|
||||
virtual void attributeDecl(const stringT& elementName,
|
||||
const stringT& attributeName,
|
||||
const stringT& type,
|
||||
const stringT& /*valueDefault*/,
|
||||
const stringT& value)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
if(!string_adaptorT::empty(value))
|
||||
documentType_->addDefaultAttr(elementName, attributeName, value);
|
||||
if(type == attributeTypes_.id)
|
||||
documentType_->addElementId(attributeName);
|
||||
} // attributeDecl
|
||||
|
||||
virtual void internalEntityDecl(const stringT& name, const stringT& value)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
|
||||
static const stringT LEFT_ANGLE_BRACKET = string_adaptorT::construct_from_utf8("<");
|
||||
|
||||
EntityT* entity = new EntityT(0, name, string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""));
|
||||
declaredEntities_.insert(std::make_pair(name, entity));
|
||||
documentType_->addEntity(entity);
|
||||
DOM::Node<stringT, string_adaptorT> n = entity;
|
||||
|
||||
if(string_adaptorT::find(value, LEFT_ANGLE_BRACKET) == string_adaptorT::npos())
|
||||
{
|
||||
n.appendChild(document_.createTextNode(value));
|
||||
return;
|
||||
} // if ...
|
||||
|
||||
// parse the value into a Document
|
||||
// this may not quite do the right thing for some custom strug types,
|
||||
// but at the time I've writing this, the code has been missing this
|
||||
// stuff for something like 8 years and nobody's noticed so it's not
|
||||
// massively used.
|
||||
// I only noticed myself when I started running the DOM conformance tests
|
||||
std::stringstream ss;
|
||||
ss << "<wrapper>" << string_adaptorT::asStdString(value) << "</wrapper>";
|
||||
|
||||
Arabica::SAX::InputSource<stringT, string_adaptorT> is(ss);
|
||||
Arabica::SAX2DOM::Parser<stringT, string_adaptorT> parser;
|
||||
parser.parse(is);
|
||||
|
||||
DOM::Document<stringT, string_adaptorT> entityDoc = parser.getDocument();
|
||||
DOM::Element<stringT, string_adaptorT> entityElem = entityDoc.getDocumentElement();
|
||||
DOM::Node<stringT, string_adaptorT> child = entityElem.getFirstChild();
|
||||
while(child != 0)
|
||||
{
|
||||
// import the contents thereof
|
||||
DOM::Node<stringT, string_adaptorT> imported = document_.importNode(child, true);
|
||||
// append to entity
|
||||
n.appendChild(imported);
|
||||
|
||||
child = child.getNextSibling();
|
||||
} // while
|
||||
} // internalEntityDecl
|
||||
|
||||
virtual void externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
EntityT* entity = new EntityT(0, name, publicId, systemId, string_adaptorT::construct_from_utf8(""));
|
||||
declaredEntities_.insert(std::make_pair(name, entity)); // we'll populate it later
|
||||
documentType_->addEntity(entity);
|
||||
} // externalEntityDecl
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// DTDHandler
|
||||
virtual void notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
documentType_->addNotation(new NotationT(0, name, publicId, systemId));
|
||||
} // notationDecl
|
||||
|
||||
virtual void unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
documentType_->addEntity(new EntityT(0, name, publicId, systemId, notationName));
|
||||
} // unparsedEntityDecl
|
||||
}; // class Parser
|
||||
|
||||
} // namespace SAX2DOM
|
||||
} // namespace Arabica
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef JEZUK_SAX2DOM_PARSER_H
|
||||
#define JEZUK_SAX2DOM_PARSER_H
|
||||
|
||||
#include <SAX/XMLReader.hpp>
|
||||
#include <SAX/helpers/DefaultHandler.hpp>
|
||||
#include <SAX/helpers/AttributeTypes.hpp>
|
||||
#include <SAX/filter/TextCoalescer.hpp>
|
||||
#include <DOM/Simple/DOMImplementation.hpp>
|
||||
#include <DOM/Simple/NotationImpl.hpp>
|
||||
#include <DOM/Simple/EntityImpl.hpp>
|
||||
#include <DOM/Document.hpp>
|
||||
#include <DOM/DOMException.hpp>
|
||||
#include <DOM/SAX2DOM/DocumentTypeImpl.hpp>
|
||||
#include <map>
|
||||
#include <SAX/helpers/FeatureNames.hpp>
|
||||
#include <SAX/helpers/PropertyNames.hpp>
|
||||
#include <SAX/SAXParseException.hpp>
|
||||
|
||||
namespace Arabica
|
||||
{
|
||||
namespace SAX2DOM
|
||||
{
|
||||
|
||||
template<class string_type, class T0, class T1>
|
||||
struct ParserTypes
|
||||
{
|
||||
typedef typename Arabica::get_param<Arabica::string_adaptor_tag,
|
||||
Arabica::default_string_adaptor<string_type>,
|
||||
T0,
|
||||
T1>::type string_adaptor;
|
||||
typedef typename Arabica::get_param<Arabica::SAX::XMLReaderInterface_tag,
|
||||
Arabica::SAX::XMLReader<string_type, string_adaptor>,
|
||||
T1,
|
||||
T0>::type SAX_parser_type;
|
||||
};
|
||||
|
||||
template<class stringT,
|
||||
class T0 = Arabica::nil_t,
|
||||
class T1 = Arabica::nil_t>
|
||||
class Parser : protected Arabica::SAX::DefaultHandler<stringT, typename ParserTypes<stringT, T0, T1>::string_adaptor>
|
||||
{
|
||||
typedef typename ParserTypes<stringT, T0, T1>::string_adaptor string_adaptorT;
|
||||
typedef typename ParserTypes<stringT, T0, T1>::SAX_parser_type SAX_parser_type;
|
||||
typedef Arabica::SAX::XMLReaderInterface<stringT, string_adaptorT> XMLReaderInterfaceT;
|
||||
typedef Arabica::SAX::TextCoalescer<stringT, string_adaptorT> TextCoalescerT;
|
||||
typedef Arabica::SAX::Attributes<stringT, string_adaptorT> AttributesT;
|
||||
typedef Arabica::SAX::EntityResolver<stringT, string_adaptorT> EntityResolverT;
|
||||
typedef Arabica::SAX::ErrorHandler<stringT, string_adaptorT> ErrorHandlerT;
|
||||
typedef Arabica::SAX::LexicalHandler<stringT, string_adaptorT> LexicalHandlerT;
|
||||
typedef Arabica::SAX::DeclHandler<stringT, string_adaptorT> DeclHandlerT;
|
||||
typedef Arabica::SAX::InputSource<stringT, string_adaptorT> InputSourceT;
|
||||
typedef Arabica::SimpleDOM::EntityImpl<stringT, string_adaptorT> EntityT;
|
||||
typedef Arabica::SimpleDOM::NotationImpl<stringT, string_adaptorT> NotationT;
|
||||
typedef Arabica::SimpleDOM::ElementImpl<stringT, string_adaptorT> ElementT;
|
||||
typedef typename ErrorHandlerT::SAXParseExceptionT SAXParseExceptionT;
|
||||
|
||||
public:
|
||||
Parser() :
|
||||
documentType_(0),
|
||||
entityResolver_(0),
|
||||
errorHandler_(0)
|
||||
{
|
||||
Arabica::SAX::FeatureNames<stringT, string_adaptorT> fNames;
|
||||
features_.insert(std::make_pair(fNames.namespaces, true));
|
||||
features_.insert(std::make_pair(fNames.namespace_prefixes, true));
|
||||
features_.insert(std::make_pair(fNames.validation, false));
|
||||
} // Parser
|
||||
|
||||
void setEntityResolver(EntityResolverT& resolver) { entityResolver_ = &resolver; }
|
||||
EntityResolverT* getEntityResolver() const { return entityResolver_; }
|
||||
|
||||
void setErrorHandler(ErrorHandlerT& handler) { errorHandler_ = &handler; }
|
||||
ErrorHandlerT* getErrorHandler() const { return errorHandler_; }
|
||||
|
||||
void setFeature(const stringT& name, bool value)
|
||||
{
|
||||
typename Features::iterator f = features_.find(name);
|
||||
if(f == features_.end())
|
||||
features_.insert(std::make_pair(name, value));
|
||||
else
|
||||
f->second = value;
|
||||
} // setFeature
|
||||
|
||||
bool getFeature(const stringT& name) const
|
||||
{
|
||||
typename Features::const_iterator f = features_.find(name);
|
||||
if(f == features_.end())
|
||||
throw Arabica::SAX::SAXNotRecognizedException(std::string("Feature not recognized ") + string_adaptorT::asStdString(name));
|
||||
return f->second;
|
||||
} // getFeature
|
||||
|
||||
bool parse(const stringT& systemId)
|
||||
{
|
||||
InputSourceT is(systemId);
|
||||
return parse(is);
|
||||
} // loadDOM
|
||||
|
||||
bool parse(InputSourceT& source)
|
||||
{
|
||||
Arabica::SAX::PropertyNames<stringT, string_adaptorT> pNames;
|
||||
|
||||
DOM::DOMImplementation<stringT, string_adaptorT> di = Arabica::SimpleDOM::DOMImplementation<stringT, string_adaptorT>::getDOMImplementation();
|
||||
document_ = di.createDocument(string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), 0);
|
||||
currentNode_ = document_;
|
||||
inCDATA_ = false;
|
||||
inDTD_ = false;
|
||||
inEntity_ = 0;
|
||||
|
||||
SAX_parser_type base_parser;
|
||||
TextCoalescerT parser(base_parser);
|
||||
parser.setContentHandler(*this);
|
||||
parser.setErrorHandler(*this);
|
||||
if(entityResolver_)
|
||||
parser.setEntityResolver(*entityResolver_);
|
||||
|
||||
parser.setLexicalHandler(*this);
|
||||
parser.setDeclHandler(*this);
|
||||
|
||||
setParserFeatures(parser);
|
||||
|
||||
try
|
||||
{
|
||||
parser.parse(source);
|
||||
}
|
||||
catch(const DOM::DOMException& de)
|
||||
{
|
||||
document_ = 0;
|
||||
|
||||
if(errorHandler_)
|
||||
{
|
||||
SAXParseExceptionT pe(de.what());
|
||||
errorHandler_->fatalError(pe);
|
||||
} // if ...
|
||||
} // catch
|
||||
|
||||
return (document_ != 0);
|
||||
} // loadDOM
|
||||
|
||||
DOM::Document<stringT, string_adaptorT> getDocument() const
|
||||
{
|
||||
return document_;
|
||||
} // getDocument
|
||||
|
||||
void reset()
|
||||
{
|
||||
currentNode_ = 0;
|
||||
document_ = 0;
|
||||
} // reset
|
||||
|
||||
protected:
|
||||
DOM::Node<stringT, string_adaptorT>& currentNode() { return currentNode_; }
|
||||
|
||||
private:
|
||||
// no implementations
|
||||
Parser(const Parser&);
|
||||
bool operator==(const Parser&) const;
|
||||
Parser& operator=(const Parser&);
|
||||
|
||||
// instance variables
|
||||
DOM::Document<stringT, string_adaptorT> document_;
|
||||
DocumentType<stringT, string_adaptorT >* documentType_;
|
||||
DOM::Node<stringT, string_adaptorT> currentNode_;
|
||||
DOM::Node<stringT, string_adaptorT> cachedCurrent_;
|
||||
|
||||
typedef std::map<stringT, bool> Features;
|
||||
Features features_;
|
||||
|
||||
bool inCDATA_;
|
||||
bool inDTD_;
|
||||
int inEntity_;
|
||||
|
||||
std::map<stringT, EntityT*> declaredEntities_;
|
||||
|
||||
EntityResolverT* entityResolver_;
|
||||
ErrorHandlerT* errorHandler_;
|
||||
Arabica::SAX::AttributeTypes<stringT, string_adaptorT> attributeTypes_;
|
||||
|
||||
protected:
|
||||
void setParserFeatures(XMLReaderInterfaceT& parser) const
|
||||
{
|
||||
for(typename Features::const_iterator f = features_.begin(), e = features_.end(); f != e; ++f)
|
||||
try {
|
||||
parser.setFeature(f->first, f->second);
|
||||
}
|
||||
catch(const Arabica::SAX::SAXException&) { }
|
||||
} // setParserFeatures
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// ContentHandler
|
||||
virtual void endDocument()
|
||||
{
|
||||
currentNode_ = 0;
|
||||
} // endDocument
|
||||
|
||||
virtual void startElement(const stringT& namespaceURI,
|
||||
const stringT& /*localName*/,
|
||||
const stringT& qName,
|
||||
const AttributesT& atts)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
DOM::Element<stringT, string_adaptorT> elem = document_.createElementNS(namespaceURI, qName);
|
||||
currentNode_.appendChild(elem);
|
||||
|
||||
// attributes here
|
||||
for(int i = 0; i < atts.getLength(); ++i)
|
||||
{
|
||||
stringT qName = atts.getQName(i);
|
||||
if(string_adaptorT::empty(qName))
|
||||
qName = atts.getLocalName(i);
|
||||
elem.setAttributeNS(atts.getURI(i), qName, atts.getValue(i));
|
||||
}
|
||||
|
||||
currentNode_ = elem;
|
||||
}
|
||||
catch(const DOM::DOMException& de)
|
||||
{
|
||||
reset();
|
||||
|
||||
if(errorHandler_)
|
||||
{
|
||||
SAXParseExceptionT pe(de.what());
|
||||
errorHandler_->fatalError(pe);
|
||||
} // if ...
|
||||
} // catch
|
||||
} // startElement
|
||||
|
||||
virtual void endElement(const stringT& /*namespaceURI*/,
|
||||
const stringT& /*localName*/,
|
||||
const stringT& /*qName*/)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
currentNode_ = currentNode_.getParentNode();
|
||||
} // endElement
|
||||
|
||||
virtual void characters(const stringT& ch)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
if(!inCDATA_)
|
||||
currentNode_.appendChild(document_.createTextNode(ch));
|
||||
else
|
||||
currentNode_.appendChild(document_.createCDATASection(ch));
|
||||
} // characters
|
||||
|
||||
virtual void processingInstruction(const stringT& target, const stringT& data)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createProcessingInstruction(target, data));
|
||||
} // processingInstruction
|
||||
|
||||
virtual void skippedEntity(const stringT& name)
|
||||
{
|
||||
if(currentNode_ == 0 || inDTD_ == true)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createEntityReference(name));
|
||||
} // skippedEntity
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// ErrorHandler
|
||||
virtual void warning(const SAXParseExceptionT& e)
|
||||
{
|
||||
if(errorHandler_)
|
||||
errorHandler_->warning(e);
|
||||
} // warning
|
||||
|
||||
virtual void error(const SAXParseExceptionT& e)
|
||||
{
|
||||
if(errorHandler_)
|
||||
errorHandler_->error(e);
|
||||
reset();
|
||||
} // error
|
||||
|
||||
virtual void fatalError(const SAXParseExceptionT& e)
|
||||
{
|
||||
if(errorHandler_)
|
||||
errorHandler_->fatalError(e);
|
||||
reset();
|
||||
} // fatalError
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// LexicalHandler
|
||||
virtual void startDTD(const stringT& name,
|
||||
const stringT& publicId,
|
||||
const stringT& systemId)
|
||||
{
|
||||
documentType_ = new DocumentType<stringT, string_adaptorT >(name, publicId, systemId);
|
||||
document_.insertBefore(documentType_, 0);
|
||||
inDTD_ = true;
|
||||
} // startDTD
|
||||
|
||||
virtual void endDTD()
|
||||
{
|
||||
documentType_->setReadOnly(true);
|
||||
inDTD_ = false;
|
||||
} // endDTD
|
||||
|
||||
virtual void startEntity(const stringT& name)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
if(++inEntity_ == 1)
|
||||
{
|
||||
cachedCurrent_ = currentNode_;
|
||||
currentNode_ = declaredEntities_[name];
|
||||
if(currentNode_ != 0 && currentNode_.hasChildNodes() == true) // already populated
|
||||
currentNode_ = 0;
|
||||
}
|
||||
} // startEntity
|
||||
|
||||
virtual void endEntity(const stringT& name)
|
||||
{
|
||||
if(--inEntity_ == 0)
|
||||
currentNode_ = cachedCurrent_;
|
||||
|
||||
if(dtd_pseudo_entity == name)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createEntityReference(name));
|
||||
} // endEntity
|
||||
|
||||
virtual void startCDATA()
|
||||
{
|
||||
inCDATA_ = true;
|
||||
} // startCDATA
|
||||
|
||||
virtual void endCDATA()
|
||||
{
|
||||
inCDATA_ = false;
|
||||
} // endCDATA
|
||||
|
||||
virtual void comment(const stringT& text)
|
||||
{
|
||||
if(currentNode_ == 0)
|
||||
return;
|
||||
|
||||
currentNode_.appendChild(document_.createComment(text));
|
||||
} // comment
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DeclHandler
|
||||
virtual void elementDecl(const stringT& name, const stringT& /*model*/)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
documentType_->addElement(name);
|
||||
} // elementDecl
|
||||
|
||||
virtual void attributeDecl(const stringT& elementName,
|
||||
const stringT& attributeName,
|
||||
const stringT& type,
|
||||
const stringT& /*valueDefault*/,
|
||||
const stringT& value)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
if(!string_adaptorT::empty(value))
|
||||
documentType_->addDefaultAttr(elementName, attributeName, value);
|
||||
if(type == attributeTypes_.id)
|
||||
documentType_->addElementId(attributeName);
|
||||
} // attributeDecl
|
||||
|
||||
virtual void internalEntityDecl(const stringT& name, const stringT& value)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
|
||||
static const stringT LEFT_ANGLE_BRACKET = string_adaptorT::construct_from_utf8("<");
|
||||
|
||||
EntityT* entity = new EntityT(0, name, string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""), string_adaptorT::construct_from_utf8(""));
|
||||
declaredEntities_.insert(std::make_pair(name, entity));
|
||||
documentType_->addEntity(entity);
|
||||
DOM::Node<stringT, string_adaptorT> n = entity;
|
||||
|
||||
if(string_adaptorT::find(value, LEFT_ANGLE_BRACKET) == string_adaptorT::npos())
|
||||
{
|
||||
n.appendChild(document_.createTextNode(value));
|
||||
return;
|
||||
} // if ...
|
||||
|
||||
// parse the value into a Document
|
||||
// this may not quite do the right thing for some custom strug types,
|
||||
// but at the time I've writing this, the code has been missing this
|
||||
// stuff for something like 8 years and nobody's noticed so it's not
|
||||
// massively used.
|
||||
// I only noticed myself when I started running the DOM conformance tests
|
||||
std::stringstream ss;
|
||||
ss << "<wrapper>" << string_adaptorT::asStdString(value) << "</wrapper>";
|
||||
|
||||
Arabica::SAX::InputSource<stringT, string_adaptorT> is(ss);
|
||||
Arabica::SAX2DOM::Parser<stringT, string_adaptorT> parser;
|
||||
parser.parse(is);
|
||||
|
||||
DOM::Document<stringT, string_adaptorT> entityDoc = parser.getDocument();
|
||||
DOM::Element<stringT, string_adaptorT> entityElem = entityDoc.getDocumentElement();
|
||||
DOM::Node<stringT, string_adaptorT> child = entityElem.getFirstChild();
|
||||
while(child != 0)
|
||||
{
|
||||
// import the contents thereof
|
||||
DOM::Node<stringT, string_adaptorT> imported = document_.importNode(child, true);
|
||||
// append to entity
|
||||
n.appendChild(imported);
|
||||
|
||||
child = child.getNextSibling();
|
||||
} // while
|
||||
} // internalEntityDecl
|
||||
|
||||
virtual void externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
EntityT* entity = new EntityT(0, name, publicId, systemId, string_adaptorT::construct_from_utf8(""));
|
||||
declaredEntities_.insert(std::make_pair(name, entity)); // we'll populate it later
|
||||
documentType_->addEntity(entity);
|
||||
} // externalEntityDecl
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// DTDHandler
|
||||
virtual void notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
documentType_->addNotation(new NotationT(0, name, publicId, systemId));
|
||||
} // notationDecl
|
||||
|
||||
virtual void unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName)
|
||||
{
|
||||
if(!documentType_)
|
||||
return;
|
||||
documentType_->addEntity(new EntityT(0, name, publicId, systemId, notationName));
|
||||
} // unparsedEntityDecl
|
||||
}; // class Parser
|
||||
|
||||
} // namespace SAX2DOM
|
||||
} // namespace Arabica
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,194 +1,202 @@
|
|||
#ifndef ARABICA_LEXICAL_HANDLER_H
|
||||
#define ARABICA_LEXICAL_HANDLER_H
|
||||
|
||||
// LexicalHandler.h
|
||||
// $Id$
|
||||
|
||||
#include <string>
|
||||
#include <SAX/ArabicaConfig.hpp>
|
||||
#include <Arabica/StringAdaptor.hpp>
|
||||
|
||||
namespace Arabica
|
||||
{
|
||||
namespace SAX
|
||||
{
|
||||
|
||||
/**
|
||||
* SAX2 extension handler for lexical events.
|
||||
*
|
||||
* <p>This is an optional extension handler for SAX2 to provide
|
||||
* lexical information about an XML document, such as comments
|
||||
* and CDATA section boundaries; XML readers are not required to
|
||||
* support this handler, and it is not part of the core SAX2
|
||||
* distribution.</p>
|
||||
*
|
||||
* <p>The events in the lexical handler apply to the entire document,
|
||||
* not just to the document element, and all lexical handler events
|
||||
* must appear between the content handler's startDocument and
|
||||
* endDocument events.</p>
|
||||
*
|
||||
* <p>To set the LexicalHandler for an XML reader, use the
|
||||
* {@link XMLReader#setProperty setProperty} method
|
||||
* with the propertyId "http://xml.org/sax/properties/lexical-handler".
|
||||
* If the reader does not support lexical events, it will throw a
|
||||
* {@link SAXNotRecognizedException SAXNotRecognizedException}
|
||||
* or a
|
||||
* {@link SAXNotSupportedException SAXNotSupportedException}
|
||||
* when you attempt to register the handler.</p>
|
||||
*
|
||||
* @since 2.0
|
||||
* @author Jez Higgins,
|
||||
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
|
||||
* @version 1.0
|
||||
* @see XMLReader#setProperty
|
||||
* @see SAXNotRecognizedException
|
||||
* @see SAXNotSupportedException
|
||||
*/
|
||||
template<class string_type, class string_adaptor = Arabica::default_string_adaptor<string_type> >
|
||||
class LexicalHandler
|
||||
{
|
||||
public:
|
||||
virtual ~LexicalHandler() { }
|
||||
|
||||
/**
|
||||
* Report the start of DTD declarations, if any.
|
||||
*
|
||||
* <p>This method is intended to report the beginning of the
|
||||
* DOCTYPE declaration; if the document has no DOCTYPE declaration,
|
||||
* this method will not be invoked.</p>
|
||||
*
|
||||
* <p>All declarations reported through
|
||||
* {@link DTDHandler DTDHandler} or
|
||||
* {@link DeclHandler DeclHandler} events must appear
|
||||
* between the startDTD and {@link #endDTD endDTD} events.
|
||||
* Declarations are assumed to belong to the internal DTD subset
|
||||
* unless they appear between {@link #startEntity startEntity}
|
||||
* and {@link #endEntity endEntity} events. Comments and
|
||||
* processing instructions from the DTD should also be reported
|
||||
* between the startDTD and endDTD events, in their original
|
||||
* order of (logical) occurrence; they are not required to
|
||||
* appear in their correct locations relative to DTDHandler
|
||||
* or DeclHandler events, however.</p>
|
||||
*
|
||||
* <p>Note that the start/endDTD events will appear within
|
||||
* the start/endDocument events from ContentHandler and
|
||||
* before the first
|
||||
* {@link ContentHandler#startElement startElement}
|
||||
* event.</p>
|
||||
*
|
||||
* @param name The document type name.
|
||||
* @param publicId The declared public identifier for the
|
||||
* external DTD subset, or an empty string if none was declared.
|
||||
* @param systemId The declared system identifier for the
|
||||
* external DTD subset, or an empty string if none was declared.
|
||||
* @see #endDTD
|
||||
* @see #startEntity
|
||||
*/
|
||||
virtual void startDTD(const string_type& name,
|
||||
const string_type& publicId,
|
||||
const string_type& systemId) = 0;
|
||||
/**
|
||||
* Report the end of DTD declarations.
|
||||
*
|
||||
* <p>This method is intended to report the end of the
|
||||
* DOCTYPE declaration; if the document has no DOCTYPE declaration,
|
||||
* this method will not be invoked.</p>
|
||||
*
|
||||
* @see #startDTD
|
||||
*/
|
||||
virtual void endDTD() = 0;
|
||||
|
||||
/**
|
||||
* Report the beginning of some internal and external XML entities.
|
||||
*
|
||||
* <p>The reporting of parameter entities (including
|
||||
* the external DTD subset) is optional, and SAX2 drivers that
|
||||
* support LexicalHandler may not support it; you can use the
|
||||
* <code
|
||||
* >http://xml.org/sax/features/lexical-handler/parameter-entities</code>
|
||||
* feature to query or control the reporting of parameter entities.</p>
|
||||
*
|
||||
* <p>General entities are reported with their regular names,
|
||||
* parameter entities have '%' prepended to their names, and
|
||||
* the external DTD subset has the pseudo-entity name "[dtd]".</p>
|
||||
*
|
||||
* <p>When a SAX2 driver is providing these events, all other
|
||||
* events must be properly nested within start/end entity
|
||||
* events. There is no additional requirement that events from
|
||||
* {@link DeclHandler DeclHandler} or
|
||||
* {@link DTDHandler DTDHandler} be properly ordered.</p>
|
||||
*
|
||||
* <p>Note that skipped entities will be reported through the
|
||||
* {@link ContentHandler#skippedEntity skippedEntity}
|
||||
* event, which is part of the ContentHandler interface.</p>
|
||||
*
|
||||
* <p>Because of the streaming event model that SAX uses, some
|
||||
* entity boundaries cannot be reported under any
|
||||
* circumstances:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>general entities within attribute values</li>
|
||||
* <li>parameter entities within declarations</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>These will be silently expanded, with no indication of where
|
||||
* the original entity boundaries were.</p>
|
||||
*
|
||||
* <p>Note also that the boundaries of character references (which
|
||||
* are not really entities anyway) are not reported.</p>
|
||||
*
|
||||
* <p>All start/endEntity events must be properly nested.
|
||||
*
|
||||
* @param name The name of the entity. If it is a parameter
|
||||
* entity, the name will begin with '%', and if it is the
|
||||
* external DTD subset, it will be "[dtd]".
|
||||
* @see #endEntity
|
||||
* @see DeclHandler#internalEntityDecl
|
||||
* @see DeclHandler#externalEntityDecl
|
||||
*/
|
||||
virtual void startEntity(const string_type& name) = 0;
|
||||
/**
|
||||
* Report the end of an entity.
|
||||
*
|
||||
* @param name The name of the entity that is ending.
|
||||
* @see #startEntity
|
||||
*/
|
||||
virtual void endEntity(const string_type& name) = 0;
|
||||
|
||||
/**
|
||||
* Report the start of a CDATA section.
|
||||
*
|
||||
* <p>The contents of the CDATA section will be reported through
|
||||
* the regular {@link ContentHandler#characters
|
||||
* characters} event; this event is intended only to report
|
||||
* the boundary.</p>
|
||||
*
|
||||
* @see #endCDATA
|
||||
*/
|
||||
virtual void startCDATA() = 0;
|
||||
/**
|
||||
* Report the end of a CDATA section.
|
||||
*
|
||||
* @see #startCDATA
|
||||
*/
|
||||
virtual void endCDATA() = 0;
|
||||
|
||||
/**
|
||||
* Report an XML comment anywhere in the document.
|
||||
*
|
||||
* <p>This callback will be used for comments inside or outside the
|
||||
* document element, including comments in the external DTD
|
||||
* subset (if read). Comments in the DTD must be properly
|
||||
* nested inside start/endDTD and start/endEntity events (if
|
||||
* used).</p>
|
||||
*
|
||||
* @param text A string holding the comment.
|
||||
*/
|
||||
virtual void comment(const string_type& text) = 0;
|
||||
}; // class LexicalHandler
|
||||
|
||||
} // namespace SAX
|
||||
} // namespace Arabica
|
||||
|
||||
#endif
|
||||
// end of file
|
||||
#ifndef ARABICA_LEXICAL_HANDLER_H
|
||||
#define ARABICA_LEXICAL_HANDLER_H
|
||||
|
||||
// LexicalHandler.h
|
||||
// $Id$
|
||||
|
||||
#include <string>
|
||||
#include <SAX/ArabicaConfig.hpp>
|
||||
#include <Arabica/StringAdaptor.hpp>
|
||||
|
||||
namespace Arabica
|
||||
{
|
||||
namespace SAX
|
||||
{
|
||||
|
||||
/**
|
||||
* SAX2 extension handler for lexical events.
|
||||
*
|
||||
* <p>This is an optional extension handler for SAX2 to provide
|
||||
* lexical information about an XML document, such as comments
|
||||
* and CDATA section boundaries; XML readers are not required to
|
||||
* support this handler, and it is not part of the core SAX2
|
||||
* distribution.</p>
|
||||
*
|
||||
* <p>The events in the lexical handler apply to the entire document,
|
||||
* not just to the document element, and all lexical handler events
|
||||
* must appear between the content handler's startDocument and
|
||||
* endDocument events.</p>
|
||||
*
|
||||
* <p>To set the LexicalHandler for an XML reader, use the
|
||||
* {@link XMLReader#setProperty setProperty} method
|
||||
* with the propertyId "http://xml.org/sax/properties/lexical-handler".
|
||||
* If the reader does not support lexical events, it will throw a
|
||||
* {@link SAXNotRecognizedException SAXNotRecognizedException}
|
||||
* or a
|
||||
* {@link SAXNotSupportedException SAXNotSupportedException}
|
||||
* when you attempt to register the handler.</p>
|
||||
*
|
||||
* @since 2.0
|
||||
* @author Jez Higgins,
|
||||
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
|
||||
* @version 1.0
|
||||
* @see XMLReader#setProperty
|
||||
* @see SAXNotRecognizedException
|
||||
* @see SAXNotSupportedException
|
||||
*/
|
||||
template<class string_type, class string_adaptor = Arabica::default_string_adaptor<string_type> >
|
||||
class LexicalHandler
|
||||
{
|
||||
public:
|
||||
virtual ~LexicalHandler() { }
|
||||
|
||||
/**
|
||||
* Report the start of DTD declarations, if any.
|
||||
*
|
||||
* <p>This method is intended to report the beginning of the
|
||||
* DOCTYPE declaration; if the document has no DOCTYPE declaration,
|
||||
* this method will not be invoked.</p>
|
||||
*
|
||||
* <p>All declarations reported through
|
||||
* {@link DTDHandler DTDHandler} or
|
||||
* {@link DeclHandler DeclHandler} events must appear
|
||||
* between the startDTD and {@link #endDTD endDTD} events.
|
||||
* Declarations are assumed to belong to the internal DTD subset
|
||||
* unless they appear between {@link #startEntity startEntity}
|
||||
* and {@link #endEntity endEntity} events. Comments and
|
||||
* processing instructions from the DTD should also be reported
|
||||
* between the startDTD and endDTD events, in their original
|
||||
* order of (logical) occurrence; they are not required to
|
||||
* appear in their correct locations relative to DTDHandler
|
||||
* or DeclHandler events, however.</p>
|
||||
*
|
||||
* <p>Note that the start/endDTD events will appear within
|
||||
* the start/endDocument events from ContentHandler and
|
||||
* before the first
|
||||
* {@link ContentHandler#startElement startElement}
|
||||
* event.</p>
|
||||
*
|
||||
* @param name The document type name.
|
||||
* @param publicId The declared public identifier for the
|
||||
* external DTD subset, or an empty string if none was declared.
|
||||
* @param systemId The declared system identifier for the
|
||||
* external DTD subset, or an empty string if none was declared.
|
||||
* @see #endDTD
|
||||
* @see #startEntity
|
||||
*/
|
||||
virtual void startDTD(const string_type& name,
|
||||
const string_type& publicId,
|
||||
const string_type& systemId) = 0;
|
||||
/**
|
||||
* Report the end of DTD declarations.
|
||||
*
|
||||
* <p>This method is intended to report the end of the
|
||||
* DOCTYPE declaration; if the document has no DOCTYPE declaration,
|
||||
* this method will not be invoked.</p>
|
||||
*
|
||||
* @see #startDTD
|
||||
*/
|
||||
virtual void endDTD() = 0;
|
||||
|
||||
/**
|
||||
* Report the beginning of some internal and external XML entities.
|
||||
*
|
||||
* <p>The reporting of parameter entities (including
|
||||
* the external DTD subset) is optional, and SAX2 drivers that
|
||||
* support LexicalHandler may not support it; you can use the
|
||||
* <code
|
||||
* >http://xml.org/sax/features/lexical-handler/parameter-entities</code>
|
||||
* feature to query or control the reporting of parameter entities.</p>
|
||||
*
|
||||
* <p>General entities are reported with their regular names,
|
||||
* parameter entities have '%' prepended to their names, and
|
||||
* the external DTD subset has the pseudo-entity name "[dtd]".</p>
|
||||
*
|
||||
* <p>When a SAX2 driver is providing these events, all other
|
||||
* events must be properly nested within start/end entity
|
||||
* events. There is no additional requirement that events from
|
||||
* {@link DeclHandler DeclHandler} or
|
||||
* {@link DTDHandler DTDHandler} be properly ordered.</p>
|
||||
*
|
||||
* <p>Note that skipped entities will be reported through the
|
||||
* {@link ContentHandler#skippedEntity skippedEntity}
|
||||
* event, which is part of the ContentHandler interface.</p>
|
||||
*
|
||||
* <p>Because of the streaming event model that SAX uses, some
|
||||
* entity boundaries cannot be reported under any
|
||||
* circumstances:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>general entities within attribute values</li>
|
||||
* <li>parameter entities within declarations</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>These will be silently expanded, with no indication of where
|
||||
* the original entity boundaries were.</p>
|
||||
*
|
||||
* <p>Note also that the boundaries of character references (which
|
||||
* are not really entities anyway) are not reported.</p>
|
||||
*
|
||||
* <p>All start/endEntity events must be properly nested.
|
||||
*
|
||||
* @param name The name of the entity. If it is a parameter
|
||||
* entity, the name will begin with '%', and if it is the
|
||||
* external DTD subset, it will be "[dtd]".
|
||||
* @see #endEntity
|
||||
* @see DeclHandler#internalEntityDecl
|
||||
* @see DeclHandler#externalEntityDecl
|
||||
*/
|
||||
virtual void startEntity(const string_type& name) = 0;
|
||||
/**
|
||||
* Report the end of an entity.
|
||||
*
|
||||
* @param name The name of the entity that is ending.
|
||||
* @see #startEntity
|
||||
*/
|
||||
virtual void endEntity(const string_type& name) = 0;
|
||||
|
||||
/**
|
||||
* Report the start of a CDATA section.
|
||||
*
|
||||
* <p>The contents of the CDATA section will be reported through
|
||||
* the regular {@link ContentHandler#characters
|
||||
* characters} event; this event is intended only to report
|
||||
* the boundary.</p>
|
||||
*
|
||||
* @see #endCDATA
|
||||
*/
|
||||
virtual void startCDATA() = 0;
|
||||
/**
|
||||
* Report the end of a CDATA section.
|
||||
*
|
||||
* @see #startCDATA
|
||||
*/
|
||||
virtual void endCDATA() = 0;
|
||||
|
||||
/**
|
||||
* Report an XML comment anywhere in the document.
|
||||
*
|
||||
* <p>This callback will be used for comments inside or outside the
|
||||
* document element, including comments in the external DTD
|
||||
* subset (if read). Comments in the DTD must be properly
|
||||
* nested inside start/endDTD and start/endEntity events (if
|
||||
* used).</p>
|
||||
*
|
||||
* @param text A string holding the comment.
|
||||
*/
|
||||
virtual void comment(const string_type& text) = 0;
|
||||
|
||||
const string_type dtd_pseudo_entity;
|
||||
|
||||
protected:
|
||||
LexicalHandler() :
|
||||
dtd_pseudo_entity(string_adaptor::construct_from_utf8("[dtd]"))
|
||||
{
|
||||
}
|
||||
}; // class LexicalHandler
|
||||
|
||||
} // namespace SAX
|
||||
} // namespace Arabica
|
||||
|
||||
#endif
|
||||
// end of file
|
||||
|
|
Loading…
Reference in a new issue