2003-09-11 12:26:53 +02:00
|
|
|
#ifndef ARABICA_XML_READER_H
|
|
|
|
#define ARABICA_XML_READER_H
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
// XMLReader.h
|
|
|
|
// $Id$
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <memory>
|
|
|
|
|
2007-09-05 00:55:47 +02:00
|
|
|
#include <SAX/ArabicaConfig.hpp>
|
2003-09-09 13:14:48 +02:00
|
|
|
|
2007-09-05 00:55:47 +02:00
|
|
|
#include <SAX/ContentHandler.hpp>
|
|
|
|
#include <SAX/EntityResolver.hpp>
|
|
|
|
#include <SAX/DTDHandler.hpp>
|
|
|
|
#include <SAX/ErrorHandler.hpp>
|
|
|
|
#include <SAX/InputSource.hpp>
|
|
|
|
#include <SAX/ext/LexicalHandler.hpp>
|
|
|
|
#include <SAX/ext/DeclHandler.hpp>
|
|
|
|
#include <SAX/SAXNotSupportedException.hpp>
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-10 19:52:04 +02:00
|
|
|
#include <Arabica/StringAdaptor.hpp>
|
|
|
|
#include <Arabica/getparam.hpp>
|
2007-09-06 00:08:51 +02:00
|
|
|
|
2007-09-05 11:49:18 +02:00
|
|
|
namespace Arabica
|
|
|
|
{
|
2002-06-21 13:16:28 +02:00
|
|
|
namespace SAX
|
|
|
|
{
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Interface for reading an XML document using callbacks.
|
|
|
|
*
|
|
|
|
* <p>XMLReader is the interface that an XML parser's SAX2 driver must
|
|
|
|
* implement. This interface allows an application to set and
|
|
|
|
* query features and properties in the parser, to register
|
|
|
|
* event handlers for document processing, and to initiate
|
|
|
|
* a document parse.</p>
|
|
|
|
*
|
|
|
|
* <p>All SAX interfaces are assumed to be synchronous: the
|
|
|
|
* {@link #parse parse} methods must not return until parsing
|
|
|
|
* is complete, and readers must wait for an event-handler callback
|
|
|
|
* to return before reporting the next event.</p>
|
|
|
|
*
|
|
|
|
* <p>This interface replaces the (now deprecated) SAX 1.0 {@link
|
2007-09-05 14:57:07 +02:00
|
|
|
* Parser Parser} interface. The XMLReader interface
|
2002-06-21 13:16:28 +02:00
|
|
|
* contains two important enhancements over the old Parser
|
|
|
|
* interface:</p>
|
|
|
|
*
|
|
|
|
* <ol>
|
|
|
|
* <li>it adds a standard way to query and set features and
|
|
|
|
* properties; and</li>
|
|
|
|
* <li>it adds Namespace support, which is required for many
|
|
|
|
* higher-level XML standards.</li>
|
|
|
|
* </ol>
|
|
|
|
*
|
|
|
|
* <p>There are adapters available to convert a SAX1 Parser to
|
|
|
|
* a SAX2 XMLReader and vice-versa.</p>
|
|
|
|
*
|
|
|
|
* @since SAX 2.0
|
|
|
|
* @author Jez Higgins,
|
|
|
|
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
|
|
|
|
* @version 2.0
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see XMLFilter
|
|
|
|
* @see helpers.ParserAdapter
|
|
|
|
* @see helpers.XMLReaderAdapter
|
2002-06-21 13:16:28 +02:00
|
|
|
*/
|
2008-04-07 17:37:47 +02:00
|
|
|
class XMLReaderInterface_tag { };
|
|
|
|
|
2008-07-16 11:01:08 +02:00
|
|
|
template<class string_type, class string_adaptor_type>
|
2008-04-07 17:37:47 +02:00
|
|
|
class XMLReaderInterface : public XMLReaderInterface_tag
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
public:
|
2008-07-16 11:01:08 +02:00
|
|
|
typedef string_adaptor_type string_adaptor;
|
2007-09-06 00:08:51 +02:00
|
|
|
typedef EntityResolver<string_type, string_adaptor> EntityResolverT;
|
|
|
|
typedef DTDHandler<string_type, string_adaptor> DTDHandlerT;
|
|
|
|
typedef ContentHandler<string_type, string_adaptor> ContentHandlerT;
|
|
|
|
typedef InputSource<string_type, string_adaptor> InputSourceT;
|
|
|
|
typedef ErrorHandler<string_type, string_adaptor> ErrorHandlerT;
|
|
|
|
typedef DeclHandler<string_type, string_adaptor> DeclHandlerT;
|
|
|
|
typedef LexicalHandler<string_type, string_adaptor> LexicalHandlerT;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-06 00:08:51 +02:00
|
|
|
virtual ~XMLReaderInterface() { }
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
/////////////////////////////////////////////////
|
|
|
|
// Configuration
|
|
|
|
/**
|
|
|
|
* Look up the value of a feature.
|
|
|
|
*
|
|
|
|
* <p>The feature name is any fully-qualified URI. It is
|
|
|
|
* possible for an XMLReader to recognize a feature name but
|
|
|
|
* to be unable to return its value; this is especially true
|
|
|
|
* in the case of an adapter for a SAX1 Parser, which has
|
|
|
|
* no way of knowing whether the underlying parser is
|
|
|
|
* performing validation or expanding external entities.</p>
|
|
|
|
*
|
|
|
|
* <p>All XMLReaders are required to recognize the
|
|
|
|
* http://xml.org/sax/features/namespaces and the
|
|
|
|
* http://xml.org/sax/features/namespace-prefixes feature names.</p>
|
|
|
|
*
|
|
|
|
* <p>Some feature values may be available only in specific
|
|
|
|
* contexts, such as before, during, or after a parse.</p>
|
|
|
|
*
|
|
|
|
* <p>Typical usage is something like this:</p>
|
|
|
|
*
|
|
|
|
* <pre>
|
|
|
|
* XMLReader r = new MySAXDriver();
|
|
|
|
*
|
|
|
|
* // try to activate validation
|
|
|
|
* try {
|
|
|
|
* r.setFeature("http://xml.org/sax/features/validation", true);
|
|
|
|
* } catch (SAXException e) {
|
|
|
|
* System.err.println("Cannot activate validation.");
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* // register event handlers
|
|
|
|
* r.setContentHandler(new MyContentHandler());
|
|
|
|
* r.setErrorHandler(new MyErrorHandler());
|
|
|
|
*
|
|
|
|
* // parse the first document
|
|
|
|
* try {
|
|
|
|
* r.parse("http://www.foo.com/mydoc.xml");
|
|
|
|
* } catch (IOException e) {
|
|
|
|
* System.err.println("I/O exception reading XML document");
|
|
|
|
* } catch (SAXException e) {
|
|
|
|
* System.err.println("XML exception reading document.");
|
|
|
|
* }
|
|
|
|
* </pre>
|
|
|
|
*
|
|
|
|
* <p>Implementors are free (and encouraged) to invent their own features,
|
|
|
|
* using names built on their own URIs.</p>
|
|
|
|
*
|
|
|
|
* @param name The feature name, which is a fully-qualified URI.
|
|
|
|
* @return The current state of the feature (true or false).
|
|
|
|
* @see #setFeature
|
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
virtual bool getFeature(const string_type& name) const = 0;
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Set the state of a feature.
|
|
|
|
*
|
|
|
|
* <p>The feature name is any fully-qualified URI. It is
|
|
|
|
* possible for an XMLReader to recognize a feature name but
|
|
|
|
* to be unable to set its value; this is especially true
|
2007-09-05 14:57:07 +02:00
|
|
|
* in the case of an adapter for a SAX1 {@link Parser Parser},
|
2002-06-21 13:16:28 +02:00
|
|
|
* which has no way of affecting whether the underlying parser is
|
|
|
|
* validating, for example.</p>
|
|
|
|
*
|
|
|
|
* <p>All XMLReaders are required to support setting
|
|
|
|
* http://xml.org/sax/features/namespaces to true and
|
|
|
|
* http://xml.org/sax/features/namespace-prefixes to false.</p>
|
|
|
|
*
|
|
|
|
* <p>Some feature values may be immutable or mutable only
|
|
|
|
* in specific contexts, such as before, during, or after
|
|
|
|
* a parse.</p>
|
|
|
|
*
|
|
|
|
* @param name The feature name, which is a fully-qualified URI.
|
2002-10-08 03:00:22 +02:00
|
|
|
* @param value The requested value of the feature (true or false).
|
2002-06-21 13:16:28 +02:00
|
|
|
* @exception SAXNotRecognizedException When the
|
|
|
|
* XMLReader does not recognize the feature name.
|
|
|
|
* @exception SAXNotSupportedException When the
|
|
|
|
* XMLReader recognizes the feature name but
|
|
|
|
* cannot set the requested value.
|
|
|
|
* @see #getFeature
|
2002-10-08 03:00:22 +02:00
|
|
|
* @see FeatureNames
|
|
|
|
* @see http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html#package_description for a list of SAX2 features.
|
2002-06-21 13:16:28 +02:00
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
virtual void setFeature(const string_type& name, bool value) = 0;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
/////////////////////////////////////////////////
|
|
|
|
// Event Handlers
|
|
|
|
/**
|
|
|
|
* Allow an application to register an entity resolver.
|
|
|
|
*
|
|
|
|
* <p>If the application does not register an entity resolver,
|
|
|
|
* the XMLReader will perform its own default resolution.</p>
|
|
|
|
*
|
|
|
|
* <p>Applications may register a new or different resolver in the
|
|
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
|
|
* resolver immediately.</p>
|
|
|
|
*
|
|
|
|
* @param resolver The entity resolver.
|
|
|
|
* @see #getEntityResolver
|
|
|
|
*/
|
|
|
|
virtual void setEntityResolver(EntityResolverT& resolver) = 0;
|
|
|
|
/**
|
|
|
|
* Return the current entity resolver.
|
|
|
|
*
|
|
|
|
* @return The current entity resolver, or null if none
|
|
|
|
* has been registered.
|
|
|
|
* @see #setEntityResolver
|
|
|
|
*/
|
|
|
|
virtual EntityResolverT* getEntityResolver() const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow an application to register a DTD event handler.
|
|
|
|
*
|
|
|
|
* <p>If the application does not register a DTD handler, all DTD
|
|
|
|
* events reported by the SAX parser will be silently ignored.</p>
|
|
|
|
*
|
|
|
|
* <p>Applications may register a new or different handler in the
|
|
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
|
|
* handler immediately.</p>
|
|
|
|
*
|
|
|
|
* @param handler The DTD handler.
|
|
|
|
* @see #getDTDHandler
|
|
|
|
*/
|
|
|
|
virtual void setDTDHandler(DTDHandlerT& handler) = 0;
|
|
|
|
/**
|
|
|
|
* Return the current DTD handler.
|
|
|
|
*
|
|
|
|
* @return The current DTD handler, or null if none
|
|
|
|
* has been registered.
|
|
|
|
* @see #setDTDHandler
|
|
|
|
*/
|
|
|
|
virtual DTDHandlerT* getDTDHandler() const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow an application to register a content event handler.
|
|
|
|
*
|
|
|
|
* <p>If the application does not register a content handler, all
|
|
|
|
* content events reported by the SAX parser will be silently
|
|
|
|
* ignored.</p>
|
|
|
|
*
|
|
|
|
* <p>Applications may register a new or different handler in the
|
|
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
|
|
* handler immediately.</p>
|
|
|
|
*
|
|
|
|
* @param handler The content handler.
|
|
|
|
* @see #getContentHandler
|
|
|
|
*/
|
|
|
|
virtual void setContentHandler(ContentHandlerT& handler) = 0;
|
|
|
|
/**
|
|
|
|
* Return the current content handler.
|
|
|
|
*
|
|
|
|
* @return The current content handler, or null if none
|
|
|
|
* has been registered.
|
|
|
|
* @see #setContentHandler
|
|
|
|
*/
|
|
|
|
virtual ContentHandlerT* getContentHandler() const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow an application to register an error event handler.
|
|
|
|
*
|
|
|
|
* <p>If the application does not register an error handler, all
|
|
|
|
* error events reported by the SAX parser will be silently
|
|
|
|
* ignored; however, normal processing may not continue. It is
|
|
|
|
* highly recommended that all SAX applications implement an
|
|
|
|
* error handler to avoid unexpected bugs.</p>
|
|
|
|
*
|
|
|
|
* <p>Applications may register a new or different handler in the
|
|
|
|
* middle of a parse, and the SAX parser must begin using the new
|
|
|
|
* handler immediately.</p>
|
|
|
|
*
|
|
|
|
* @param handler The error handler.
|
|
|
|
* @see #getErrorHandler
|
|
|
|
*/
|
2003-08-27 16:26:32 +02:00
|
|
|
virtual void setErrorHandler(ErrorHandlerT& handler) = 0;
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Return the current error handler.
|
|
|
|
*
|
|
|
|
* @return The current error handler, or null if none
|
|
|
|
* has been registered.
|
|
|
|
* @see #setErrorHandler
|
|
|
|
*/
|
2003-08-27 16:26:32 +02:00
|
|
|
virtual ErrorHandlerT* getErrorHandler() const = 0;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-01-15 15:21:33 +01:00
|
|
|
virtual void setDeclHandler(DeclHandlerT& handler) = 0;
|
|
|
|
/**
|
|
|
|
* Return the current decl handler.
|
|
|
|
*
|
|
|
|
* @return The current decl handler, or 0 if none has been registered
|
|
|
|
* @see #setDeclHandler
|
|
|
|
*/
|
|
|
|
virtual DeclHandlerT* getDeclHandler() const = 0;
|
|
|
|
|
|
|
|
virtual void setLexicalHandler(LexicalHandlerT& handler) = 0;
|
|
|
|
/**
|
|
|
|
* Return the current lexical handler.
|
|
|
|
*
|
|
|
|
* @return The current lexical handler, or 0 if none has been registered
|
|
|
|
* @see #setLexicalHandler
|
|
|
|
*/
|
|
|
|
virtual LexicalHandlerT* getLexicalHandler() const = 0;
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
//////////////////////////////////////////////////
|
|
|
|
// Parsing
|
|
|
|
/**
|
|
|
|
* Parse an XML document from a system identifier (URI).
|
|
|
|
*
|
|
|
|
* <p>This method is a shortcut for the common case of reading a
|
|
|
|
* document from a system identifier. It is the exact
|
|
|
|
* equivalent of the following:</p>
|
|
|
|
*
|
|
|
|
* <pre>
|
|
|
|
* InputSource is(systemId);
|
|
|
|
* parse(is);
|
|
|
|
* </pre>
|
|
|
|
*
|
|
|
|
* <p>If the system identifier is a URL, it must be fully resolved
|
|
|
|
* by the application before it is passed to the parser.</p>
|
|
|
|
*
|
|
|
|
* @param systemId The system identifier (URI).
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see #parse(InputSource&)
|
2002-06-21 13:16:28 +02:00
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
void parse(const string_type& systemId)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
InputSourceT is(systemId);
|
|
|
|
parse(is);
|
|
|
|
} // parser
|
|
|
|
/**
|
|
|
|
* Parse an XML document.
|
|
|
|
*
|
|
|
|
* <p>The application can use this method to instruct the XML
|
|
|
|
* reader to begin parsing an XML document from any valid input
|
|
|
|
* source (a character stream, a byte stream, or a URI).</p>
|
|
|
|
*
|
|
|
|
* <p>Applications may not invoke this method while a parse is in
|
|
|
|
* progress (they should create a new XMLReader instead for each
|
|
|
|
* nested XML document). Once a parse is complete, an
|
|
|
|
* application may reuse the same XMLReader object, possibly with a
|
|
|
|
* different input source.</p>
|
|
|
|
*
|
|
|
|
* <p>During the parse, the XMLReader will provide information
|
|
|
|
* about the XML document through the registered event
|
|
|
|
* handlers.</p>
|
|
|
|
*
|
|
|
|
* <p>This method is synchronous: it will not return until parsing
|
|
|
|
* has ended. If a client application wants to terminate
|
|
|
|
* parsing early, it should throw an exception.</p>
|
|
|
|
*
|
2002-10-08 03:00:22 +02:00
|
|
|
* @param input The input source for the top-level of the
|
2002-06-21 13:16:28 +02:00
|
|
|
* XML document.
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see InputSource
|
2007-09-06 00:08:51 +02:00
|
|
|
* @see #parse(const string_type&)
|
2002-06-21 13:16:28 +02:00
|
|
|
* @see #setEntityResolver
|
|
|
|
* @see #setDTDHandler
|
|
|
|
* @see #setContentHandler
|
|
|
|
* @see #setErrorHandler
|
|
|
|
*/
|
|
|
|
virtual void parse(InputSourceT& input) = 0;
|
2007-09-05 14:57:07 +02:00
|
|
|
}; // class XMLReaderInterface
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 11:49:18 +02:00
|
|
|
} // namespace SAX
|
|
|
|
} // namespace Arabica
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 00:55:47 +02:00
|
|
|
/* Included to ensure that #include<SAX/XMLReader.hpp> defines a class called
|
2002-10-08 03:00:22 +02:00
|
|
|
* XMLReader.
|
|
|
|
*/
|
2007-09-05 00:55:47 +02:00
|
|
|
#include <SAX/ParserConfig.hpp>
|
2002-07-13 00:10:52 +02:00
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
#endif
|
|
|
|
// end of file
|