mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-03 23:02:11 +01:00
fd5c310e0b
which pulls in the headers for the various parsers if the appropriate macros are defined - USE_EXPAT, USE_XERCES, USE_LIBXML2, USE_MSXML. You can define any or all of them, or none should you so wish. It also sets SAX::XMLReader as a default class - ie if you define one of the above macros, SAX::XMLReader<stringT> will become a synonym for SAX::expat_wrapper<stringT> or whatever.
455 lines
14 KiB
C++
455 lines
14 KiB
C++
#ifndef XMLReaderH
|
|
#define XMLReaderH
|
|
|
|
// XMLReader.h
|
|
// $Id$
|
|
|
|
#include <string>
|
|
#include <memory>
|
|
|
|
#include <SAX/ContentHandler.h>
|
|
#include <SAX/EntityResolver.h>
|
|
#include <SAX/DTDHandler.h>
|
|
#include <SAX/ErrorHandler.h>
|
|
#include <SAX/InputSource.h>
|
|
|
|
namespace SAX
|
|
{
|
|
|
|
/**
|
|
* Interface for reading an XML document using callbacks.
|
|
*
|
|
* <p>XMLReader is the interface that an XML parser's SAX2 driver must
|
|
* implement. This interface allows an application to set and
|
|
* query features and properties in the parser, to register
|
|
* event handlers for document processing, and to initiate
|
|
* a document parse.</p>
|
|
*
|
|
* <p>All SAX interfaces are assumed to be synchronous: the
|
|
* {@link #parse parse} methods must not return until parsing
|
|
* is complete, and readers must wait for an event-handler callback
|
|
* to return before reporting the next event.</p>
|
|
*
|
|
* <p>This interface replaces the (now deprecated) SAX 1.0 {@link
|
|
* basic_Parser Parser} interface. The XMLReader interface
|
|
* contains two important enhancements over the old Parser
|
|
* interface:</p>
|
|
*
|
|
* <ol>
|
|
* <li>it adds a standard way to query and set features and
|
|
* properties; and</li>
|
|
* <li>it adds Namespace support, which is required for many
|
|
* higher-level XML standards.</li>
|
|
* </ol>
|
|
*
|
|
* <p>There are adapters available to convert a SAX1 Parser to
|
|
* a SAX2 XMLReader and vice-versa.</p>
|
|
*
|
|
* @since SAX 2.0
|
|
* @author Jez Higgins,
|
|
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
|
|
* @version 2.0
|
|
* @see basic_XMLFilter
|
|
* @see basic_helpers.ParserAdapter
|
|
* @see basic_helpers.XMLReaderAdapter
|
|
*/
|
|
template<class string_type>
|
|
class basic_XMLReader
|
|
{
|
|
public:
|
|
typedef string_type stringT;
|
|
typedef basic_EntityResolver<stringT> EntityResolverT;
|
|
typedef basic_DTDHandler<stringT> DTDHandlerT;
|
|
typedef basic_ContentHandler<stringT> ContentHandlerT;
|
|
typedef basic_InputSource<stringT> InputSourceT;
|
|
|
|
virtual ~basic_XMLReader() { }
|
|
|
|
/////////////////////////////////////////////////
|
|
// Configuration
|
|
/**
|
|
* Look up the value of a feature.
|
|
*
|
|
* <p>The feature name is any fully-qualified URI. It is
|
|
* possible for an XMLReader to recognize a feature name but
|
|
* to be unable to return its value; this is especially true
|
|
* in the case of an adapter for a SAX1 Parser, which has
|
|
* no way of knowing whether the underlying parser is
|
|
* performing validation or expanding external entities.</p>
|
|
*
|
|
* <p>All XMLReaders are required to recognize the
|
|
* http://xml.org/sax/features/namespaces and the
|
|
* http://xml.org/sax/features/namespace-prefixes feature names.</p>
|
|
*
|
|
* <p>Some feature values may be available only in specific
|
|
* contexts, such as before, during, or after a parse.</p>
|
|
*
|
|
* <p>Typical usage is something like this:</p>
|
|
*
|
|
* <pre>
|
|
* XMLReader r = new MySAXDriver();
|
|
*
|
|
* // try to activate validation
|
|
* try {
|
|
* r.setFeature("http://xml.org/sax/features/validation", true);
|
|
* } catch (SAXException e) {
|
|
* System.err.println("Cannot activate validation.");
|
|
* }
|
|
*
|
|
* // register event handlers
|
|
* r.setContentHandler(new MyContentHandler());
|
|
* r.setErrorHandler(new MyErrorHandler());
|
|
*
|
|
* // parse the first document
|
|
* try {
|
|
* r.parse("http://www.foo.com/mydoc.xml");
|
|
* } catch (IOException e) {
|
|
* System.err.println("I/O exception reading XML document");
|
|
* } catch (SAXException e) {
|
|
* System.err.println("XML exception reading document.");
|
|
* }
|
|
* </pre>
|
|
*
|
|
* <p>Implementors are free (and encouraged) to invent their own features,
|
|
* using names built on their own URIs.</p>
|
|
*
|
|
* @param name The feature name, which is a fully-qualified URI.
|
|
* @return The current state of the feature (true or false).
|
|
* @see #setFeature
|
|
*/
|
|
virtual bool getFeature(const stringT& name) const = 0;
|
|
/**
|
|
* Set the state of a feature.
|
|
*
|
|
* <p>The feature name is any fully-qualified URI. It is
|
|
* possible for an XMLReader to recognize a feature name but
|
|
* to be unable to set its value; this is especially true
|
|
* in the case of an adapter for a SAX1 {@link basic_Parser Parser},
|
|
* which has no way of affecting whether the underlying parser is
|
|
* validating, for example.</p>
|
|
*
|
|
* <p>All XMLReaders are required to support setting
|
|
* http://xml.org/sax/features/namespaces to true and
|
|
* http://xml.org/sax/features/namespace-prefixes to false.</p>
|
|
*
|
|
* <p>Some feature values may be immutable or mutable only
|
|
* in specific contexts, such as before, during, or after
|
|
* a parse.</p>
|
|
*
|
|
* @param name The feature name, which is a fully-qualified URI.
|
|
* @param state The requested state of the feature (true or false).
|
|
* @exception SAXNotRecognizedException When the
|
|
* XMLReader does not recognize the feature name.
|
|
* @exception SAXNotSupportedException When the
|
|
* XMLReader recognizes the feature name but
|
|
* cannot set the requested value.
|
|
* @see #getFeature
|
|
*/
|
|
virtual void setFeature(const stringT& name, bool value) = 0;
|
|
|
|
/////////////////////////////////////////////////
|
|
// Event Handlers
|
|
/**
|
|
* Allow an application to register an entity resolver.
|
|
*
|
|
* <p>If the application does not register an entity resolver,
|
|
* the XMLReader will perform its own default resolution.</p>
|
|
*
|
|
* <p>Applications may register a new or different resolver in the
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
* resolver immediately.</p>
|
|
*
|
|
* @param resolver The entity resolver.
|
|
* @see #getEntityResolver
|
|
*/
|
|
virtual void setEntityResolver(EntityResolverT& resolver) = 0;
|
|
/**
|
|
* Return the current entity resolver.
|
|
*
|
|
* @return The current entity resolver, or null if none
|
|
* has been registered.
|
|
* @see #setEntityResolver
|
|
*/
|
|
virtual EntityResolverT* getEntityResolver() const = 0;
|
|
|
|
/**
|
|
* Allow an application to register a DTD event handler.
|
|
*
|
|
* <p>If the application does not register a DTD handler, all DTD
|
|
* events reported by the SAX parser will be silently ignored.</p>
|
|
*
|
|
* <p>Applications may register a new or different handler in the
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
* handler immediately.</p>
|
|
*
|
|
* @param handler The DTD handler.
|
|
* @see #getDTDHandler
|
|
*/
|
|
virtual void setDTDHandler(DTDHandlerT& handler) = 0;
|
|
/**
|
|
* Return the current DTD handler.
|
|
*
|
|
* @return The current DTD handler, or null if none
|
|
* has been registered.
|
|
* @see #setDTDHandler
|
|
*/
|
|
virtual DTDHandlerT* getDTDHandler() const = 0;
|
|
|
|
/**
|
|
* Allow an application to register a content event handler.
|
|
*
|
|
* <p>If the application does not register a content handler, all
|
|
* content events reported by the SAX parser will be silently
|
|
* ignored.</p>
|
|
*
|
|
* <p>Applications may register a new or different handler in the
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
* handler immediately.</p>
|
|
*
|
|
* @param handler The content handler.
|
|
* @see #getContentHandler
|
|
*/
|
|
virtual void setContentHandler(ContentHandlerT& handler) = 0;
|
|
/**
|
|
* Return the current content handler.
|
|
*
|
|
* @return The current content handler, or null if none
|
|
* has been registered.
|
|
* @see #setContentHandler
|
|
*/
|
|
virtual ContentHandlerT* getContentHandler() const = 0;
|
|
|
|
/**
|
|
* Allow an application to register an error event handler.
|
|
*
|
|
* <p>If the application does not register an error handler, all
|
|
* error events reported by the SAX parser will be silently
|
|
* ignored; however, normal processing may not continue. It is
|
|
* highly recommended that all SAX applications implement an
|
|
* error handler to avoid unexpected bugs.</p>
|
|
*
|
|
* <p>Applications may register a new or different handler in the
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
* handler immediately.</p>
|
|
*
|
|
* @param handler The error handler.
|
|
* @see #getErrorHandler
|
|
*/
|
|
virtual void setErrorHandler(SAX::ErrorHandler& handler) = 0;
|
|
/**
|
|
* Return the current error handler.
|
|
*
|
|
* @return The current error handler, or null if none
|
|
* has been registered.
|
|
* @see #setErrorHandler
|
|
*/
|
|
virtual SAX::ErrorHandler* getErrorHandler() const = 0;
|
|
|
|
//////////////////////////////////////////////////
|
|
// Parsing
|
|
/**
|
|
* Parse an XML document from a system identifier (URI).
|
|
*
|
|
* <p>This method is a shortcut for the common case of reading a
|
|
* document from a system identifier. It is the exact
|
|
* equivalent of the following:</p>
|
|
*
|
|
* <pre>
|
|
* InputSource is(systemId);
|
|
* parse(is);
|
|
* </pre>
|
|
*
|
|
* <p>If the system identifier is a URL, it must be fully resolved
|
|
* by the application before it is passed to the parser.</p>
|
|
*
|
|
* @param systemId The system identifier (URI).
|
|
* @see #parse(basic_InputSource&)
|
|
*/
|
|
void parse(const stringT& systemId)
|
|
{
|
|
InputSourceT is(systemId);
|
|
parse(is);
|
|
} // parser
|
|
/**
|
|
* Parse an XML document.
|
|
*
|
|
* <p>The application can use this method to instruct the XML
|
|
* reader to begin parsing an XML document from any valid input
|
|
* source (a character stream, a byte stream, or a URI).</p>
|
|
*
|
|
* <p>Applications may not invoke this method while a parse is in
|
|
* progress (they should create a new XMLReader instead for each
|
|
* nested XML document). Once a parse is complete, an
|
|
* application may reuse the same XMLReader object, possibly with a
|
|
* different input source.</p>
|
|
*
|
|
* <p>During the parse, the XMLReader will provide information
|
|
* about the XML document through the registered event
|
|
* handlers.</p>
|
|
*
|
|
* <p>This method is synchronous: it will not return until parsing
|
|
* has ended. If a client application wants to terminate
|
|
* parsing early, it should throw an exception.</p>
|
|
*
|
|
* @param source The input source for the top-level of the
|
|
* XML document.
|
|
* @see basic_InputSource
|
|
* @see #parse(const stringT&)
|
|
* @see #setEntityResolver
|
|
* @see #setDTDHandler
|
|
* @see #setContentHandler
|
|
* @see #setErrorHandler
|
|
*/
|
|
virtual void parse(InputSourceT& input) = 0;
|
|
|
|
////////////////////////////////////////////////////
|
|
// property implementation
|
|
protected:
|
|
class PropertyBase
|
|
{
|
|
public:
|
|
virtual ~PropertyBase() { }
|
|
}; // PropertyBase
|
|
|
|
template<typename propertyTypeT>
|
|
class Property : public PropertyBase
|
|
{
|
|
public:
|
|
Property(propertyTypeT wrappee) : wrappee_(wrappee) { }
|
|
|
|
propertyTypeT get() { return wrappee_; }
|
|
|
|
private:
|
|
propertyTypeT wrappee_;
|
|
}; // class Property
|
|
|
|
public:
|
|
virtual std::auto_ptr<PropertyBase> doGetProperty(const stringT& name) = 0;
|
|
virtual void doSetProperty(const stringT& name, std::auto_ptr<PropertyBase> value) = 0;
|
|
|
|
/**
|
|
* Look up the value of a property.
|
|
*
|
|
* <p>The property name is any fully-qualified URI. It is
|
|
* possible for an XMLReader to recognize a property name but
|
|
* to be unable to return its state; this is especially true
|
|
* in the case of an adapter for a SAX1 {@link basic_Parser
|
|
* Parser}.</p>
|
|
*
|
|
* <p>XMLReaders are not required to recognize any specific
|
|
* property names, though an initial core set is documented for
|
|
* SAX2.</p>
|
|
*
|
|
* <p>Some property values may be available only in specific
|
|
* contexts, such as before, during, or after a parse.</p>
|
|
*
|
|
* <p>Implementors are free (and encouraged) to invent their own properties,
|
|
* using names built on their own URIs.</p>
|
|
*
|
|
* @param name The property name, which is a fully-qualified URI.
|
|
* @return The current value of the property.
|
|
* @exception SAXNotRecognizedException When the
|
|
* XMLReader does not recognize the property name.
|
|
* @exception SAXNotSupportedException When the
|
|
* XMLReader recognizes the property name but
|
|
* cannot determine its value at this time.
|
|
* @see #setProperty
|
|
*/
|
|
template<typename propertyTypeT>
|
|
propertyTypeT& getProperty(const stringT& name) const
|
|
{
|
|
std::auto_ptr<PropertyBase> pb = doGetProperty(name);
|
|
Property<propertyTypeT&>* prop = dynamic_cast<Property<propertyTypeT&>* >(pb.get());
|
|
|
|
if(!prop)
|
|
throw std::bad_cast("Property " + name + " is wrong type.");
|
|
|
|
return pro->get();
|
|
} // getProperty
|
|
|
|
/**
|
|
* Set the value of a property.
|
|
*
|
|
* <p>The property name is any fully-qualified URI. It is
|
|
* possible for an XMLReader to recognize a property name but
|
|
* to be unable to set its value; this is especially true
|
|
* in the case of an adapter for a SAX1 {@link basic_Parser
|
|
* Parser}.</p>
|
|
*
|
|
* <p>XMLReaders are not required to recognize setting
|
|
* any specific property names, though a core set is provided with
|
|
* SAX2.</p>
|
|
*
|
|
* <p>Some property values may be immutable or mutable only
|
|
* in specific contexts, such as before, during, or after
|
|
* a parse.</p>
|
|
*
|
|
* <p>This method is also the standard mechanism for setting
|
|
* extended handlers.</p>
|
|
*
|
|
* @param name The property name, which is a fully-qualified URI.
|
|
* @param state The requested value for the property.
|
|
* @exception SAXNotRecognizedException When the
|
|
* XMLReader does not recognize the property name.
|
|
* @exception SAXNotSupportedException When the
|
|
* XMLReader recognizes the property name but
|
|
* cannot set the requested value.
|
|
*/
|
|
template<typename propertyTypeT>
|
|
void setProperty(const stringT& name, propertyTypeT& value)
|
|
{
|
|
Property<propertyTypeT&>* prop = new Property<propertyTypeT&>(value);
|
|
doSetProperty(name, std::auto_ptr<PropertyBase>(prop));
|
|
} // setProperty
|
|
}; // class basic_XMLReader
|
|
|
|
}; // namespace SAX
|
|
|
|
// ifdef ladder to set up the default parser
|
|
#ifdef USE_LIBXML2
|
|
#pragma message("Including libxml2")
|
|
#include <SAX/wrappers/saxlibxml2.h>
|
|
#undef DEF_SAX_P
|
|
#define DEF_SAX_P libxml2_wrapper
|
|
#endif
|
|
#ifdef USE_MSXML
|
|
#pragma message("Including MSXML")
|
|
#include <SAX/wrappers/saxmsxml2.h>
|
|
#undef DEF_SAX_P
|
|
#define DEF_SAX_P msxml2_wrapper
|
|
#endif
|
|
#ifdef USE_XERCES
|
|
#pragma message("Including Xerces")
|
|
#include <SAX/wrappers/saxxerces.h>
|
|
#undef DEF_SAX_P
|
|
#define DEF_SAX_P xerces_wrapper
|
|
#endif
|
|
#ifdef USE_GARDEN
|
|
#pragma message("Including Garden")
|
|
#include <SAX/parsers/saxgarden.h>
|
|
#undef DEF_SAX_P
|
|
#define DEF_SAX_P Garden
|
|
#endif
|
|
#ifdef USE_EXPAT
|
|
#pragma message("Including Expat")
|
|
#include <SAX/wrappers/saxexpat.h>
|
|
#undef DEF_SAX_P
|
|
#define DEF_SAX_P expat_wrapper
|
|
#endif
|
|
|
|
#ifndef NO_DEFAULT_PARSER
|
|
#ifdef DEF_SAX_P
|
|
namespace SAX
|
|
{
|
|
template<class string_type>
|
|
class XMLReader : public DEF_SAX_P<string_type> { };
|
|
} // namespace SAX
|
|
#else
|
|
#error No default parser defined.
|
|
#endif
|
|
#endif
|
|
|
|
#undef DEF_P
|
|
|
|
#endif
|
|
// end of file
|