mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-01 06:20:38 +01:00
332 lines
9.9 KiB
C++
332 lines
9.9 KiB
C++
#ifndef ARABICA_INPUTSOURCE_H
|
|
#define ARABICA_INPUTSOURCE_H
|
|
|
|
// InputSource.h
|
|
// $Id$
|
|
|
|
#include <iosfwd>
|
|
#include <string>
|
|
|
|
#include <SAX/ArabicaConfig.h>
|
|
#include <SAX/IStreamHandle.h>
|
|
|
|
namespace SAX
|
|
{
|
|
|
|
/**
|
|
* A single input source for an XML entity.
|
|
*
|
|
* <p>This class allows a SAX application to encapsulate information
|
|
* about an input source in a single object, which may include
|
|
* a public identifier, a system identifier, a byte stream (possibly
|
|
* with a specified encoding).</p>
|
|
*
|
|
* <p>There are two places that the application will deliver this
|
|
* input source to the parser: as the argument to the Parser.parse
|
|
* method, or as the return value of the EntityResolver.resolveEntity
|
|
* method.</p>
|
|
*
|
|
* <p>The SAX parser will use the InputSource object to determine
|
|
* how to read XML input. If there is a byte stream available,
|
|
* the parser will read that stream directly; if a byte stream is
|
|
* not available, the parser will attempt to open a URI connection
|
|
* to the resource identified by the system identifier.</p>
|
|
*
|
|
* <p>An InputSource object belongs to the application: the SAX parser
|
|
* shall never modify it in any way (it may modify a copy if
|
|
* necessary).</p>
|
|
*
|
|
* <strong>InputSource is still under active consideration, and its
|
|
* interface may change.</strong>
|
|
*
|
|
* @since SAX 1.0
|
|
* @author Jez Higgins,
|
|
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
|
|
* @version 2.0
|
|
* @see basic_Parser#parse
|
|
* @see basic_EntityResolver#resolveEntity
|
|
*/
|
|
template<class string_type>
|
|
class basic_InputSource
|
|
{
|
|
public:
|
|
typedef string_type stringT;
|
|
|
|
/**
|
|
* Zero-argument default constructor.
|
|
*
|
|
* @see #setPublicId
|
|
* @see #setSystemId
|
|
* @see #setByteStream
|
|
* @see #setEncoding
|
|
*/
|
|
basic_InputSource() :
|
|
byteStream_(),
|
|
publicId_(),
|
|
systemId_(),
|
|
encoding_()
|
|
{ }
|
|
/**
|
|
* Create a new input source with a system identifier.
|
|
*
|
|
* <p>Applications may use setPublicId to include a
|
|
* public identifier as well, or setEncoding to specify
|
|
* the character encoding, if known.</p>
|
|
*
|
|
* <p>If the system identifier is a URL, it must be full resolved.</p>
|
|
*
|
|
* @param systemId The system identifier (URI).
|
|
* @see #setPublicId
|
|
* @see #setSystemId
|
|
* @see #setByteStream
|
|
* @see #setEncoding
|
|
*/
|
|
basic_InputSource(const stringT& systemId) :
|
|
byteStream_(),
|
|
publicId_(),
|
|
systemId_(systemId),
|
|
encoding_()
|
|
{ }
|
|
basic_InputSource(const basic_InputSource& rhs) :
|
|
byteStream_(rhs.byteStream_),
|
|
publicId_(rhs.publicId_),
|
|
systemId_(rhs.systemId_),
|
|
encoding_(rhs.encoding_)
|
|
{ }
|
|
/**
|
|
* Create a new input source with a byte stream.
|
|
*
|
|
* <p>Application writers may use setSystemId to provide a base
|
|
* for resolving relative URIs, setPublicId to include a
|
|
* public identifier, and/or setEncoding to specify the object's
|
|
* character encoding.</p>
|
|
*
|
|
* @param byteStream The raw byte stream containing the document. The
|
|
* basic_InputSource does not assume ownership of
|
|
* this byteStream.
|
|
* @see #basic_InputSource(std::auto_ptr<std::istream>)
|
|
* @see #setPublicId
|
|
* @see #setSystemId
|
|
* @see #setEncoding
|
|
* @see #setByteStream
|
|
*/
|
|
basic_InputSource(std::istream& byteStream) :
|
|
byteStream_(byteStream),
|
|
publicId_(),
|
|
systemId_(),
|
|
encoding_()
|
|
{ }
|
|
|
|
|
|
/**
|
|
* Create a new input source with a byte stream.
|
|
*
|
|
* <p>Application writers may use setSystemId to provide a base
|
|
* for resolving relative URIs, setPublicId to include a
|
|
* public identifier, and/or setEncoding to specify the object's
|
|
* character encoding.</p>
|
|
*
|
|
* @param byteStream The raw byte stream containing the document. The
|
|
* basic_InputSource assumes ownership of the byteStream
|
|
* and will delete it when no-longer required.
|
|
* @see basic_InputSource(std::istream&)
|
|
* @see #setPublicId
|
|
* @see #setSystemId
|
|
* @see #setEncoding
|
|
* @see #setByteStream
|
|
*/
|
|
basic_InputSource(std::auto_ptr<std::istream> byteStream) :
|
|
byteStream_(byteStream),
|
|
publicId_(),
|
|
systemId_(),
|
|
encoding_()
|
|
{ }
|
|
|
|
virtual ~basic_InputSource() { }
|
|
|
|
basic_InputSource& operator=(const basic_InputSource& rhs)
|
|
{
|
|
byteStream_ = rhs.byteStream_;
|
|
publicId_ = rhs.publicId_;
|
|
systemId_ = rhs.systemId_;
|
|
encoding_ = rhs.encoding_;
|
|
|
|
return *this;
|
|
} // operator=
|
|
|
|
/**
|
|
* Set the public identifier for this input source.
|
|
*
|
|
* <p>The public identifier is always optional: if the application
|
|
* writer includes one, it will be provided as part of the
|
|
* location information.</p>
|
|
*
|
|
* @param publicId The public identifier as a string.
|
|
* @see #getPublicId
|
|
* @see basic_Locator#getPublicId
|
|
* @see basic_SAXParseException#getPublicId
|
|
*/
|
|
void setPublicId(const stringT& publicId) { publicId_ = publicId; }
|
|
/**
|
|
* Get the public identifier for this input source.
|
|
*
|
|
* @return The public identifier, or an empty string if none was supplied.
|
|
* @see #setPublicId
|
|
*/
|
|
const stringT& getPublicId() const { return publicId_; }
|
|
|
|
/**
|
|
* Set the system identifier for this input source.
|
|
*
|
|
* <p>The system identifier is optional if there is a byte
|
|
* stream but it is still useful to provide one, since the
|
|
* application can use it to resolve relative URIs and can
|
|
* include it in error messages and warnings (the parser will
|
|
* attempt to open a connection to the URI only if there is no
|
|
* byte stream specified).</p>
|
|
*
|
|
* <p>If the application knows the character encoding of the
|
|
* object pointed to by the system identifier, it can register
|
|
* the encoding using the setEncoding method.</p>
|
|
*
|
|
* <p>If the system ID is a URL, it must be fully resolved.</p>
|
|
*
|
|
* @param systemId The system identifier as a string.
|
|
* @see #setEncoding
|
|
* @see #getSystemId
|
|
* @see basic_Locator#getSystemId
|
|
* @see basic_SAXParseException#getSystemId
|
|
*/
|
|
void setSystemId(const stringT& systemId) { systemId_ = systemId; }
|
|
/**
|
|
* Get the system identifier for this input source.
|
|
*
|
|
* <p>The getEncoding method will return the character encoding
|
|
* of the object pointed to, or an empty string if unknown.</p>
|
|
*
|
|
* <p>If the system ID is a URL, it will be fully resolved.</p>
|
|
*
|
|
* @return The system identifier.
|
|
* @see #setSystemId
|
|
* @see #getEncoding
|
|
*/
|
|
const stringT& getSystemId() const { return systemId_; }
|
|
|
|
/**
|
|
* Set the byte stream for this input source.
|
|
*
|
|
* <p>The SAX parser will use a byte stream in preference
|
|
* to opening a URI connection itself.</p>
|
|
*
|
|
* <p>If the application knows the character encoding of the
|
|
* byte stream, it should set it with the setEncoding method.</p>
|
|
*
|
|
* @param byteStream A byte stream containing an XML document or
|
|
* other entity. The basic_InputSource does not assume
|
|
* ownership of byteStream.
|
|
* @see #setByteStream(std::auto_ptr<std::istream>) To transfer ownership of
|
|
* an std::istream to an InputSource
|
|
* @see #setEncoding
|
|
* @see #getByteStream
|
|
* @see #getEncoding
|
|
*/
|
|
void setByteStream(std::istream& byteStream)
|
|
{
|
|
byteStream_ = byteStream;
|
|
} // setByteStream
|
|
|
|
/**
|
|
* Set the byte stream for this input source.
|
|
*
|
|
* <p>The SAX parser will use a byte stream in preference to
|
|
* opening a URI connection itself.</p>
|
|
*
|
|
* <p>If the application knows the character encoding of the
|
|
* byte stream, it should set it with the setEncoding method.</p>
|
|
*
|
|
* @param byteStream A byte stream containing an XML document or
|
|
* other entity. The basic_InputSource assumes
|
|
* ownership of byteStream.
|
|
* @see #setByteStream(std::istream&)
|
|
* @see #setEncoding
|
|
* @see #getByteStream
|
|
* @see #getEncoding
|
|
*/
|
|
void setByteStream(std::auto_ptr<std::istream> byteStream)
|
|
{
|
|
byteStream_ = byteStream;
|
|
} // setByteStream
|
|
|
|
/**
|
|
* Get the byte stream for this input source as a <code>std::istream*</code>.
|
|
*
|
|
* <p>The getEncoding method will return the character
|
|
* encoding for this byte stream, or an empty string if unknown.</p>
|
|
*
|
|
* @return The byte stream, or null if none was supplied. No ownership is
|
|
* transfered.
|
|
* @see #getEncoding
|
|
* @see #setByteStream
|
|
*/
|
|
std::istream* getByteStream() const
|
|
{
|
|
return byteStream_.get();
|
|
} // getByteStream
|
|
|
|
/**
|
|
* Get the byte stream for this input source as an IStreamHandle.
|
|
*
|
|
* <p>The getEncoding method will return the character
|
|
* encoding for this byte stream, or null if unknown.</p>
|
|
*
|
|
* @return The byte stream, or null if none was supplied. Ownership is
|
|
* shared between this and the client code.
|
|
* @see #getEncoding
|
|
* @see #setByteStream
|
|
*/
|
|
IStreamHandle getByteStreamHandle() const
|
|
{
|
|
return byteStream_;
|
|
}
|
|
/**
|
|
* Set the character encoding, if known.
|
|
*
|
|
* <p>The encoding must be a string acceptable for an
|
|
* XML encoding declaration (see section 4.3.3 of the XML 1.0
|
|
* recommendation).</p>
|
|
*
|
|
* @param encoding A string describing the character encoding.
|
|
* @see #setSystemId
|
|
* @see #setByteStream
|
|
* @see #getEncoding
|
|
*/
|
|
void setEncoding(const stringT& encoding) { encoding_ = encoding; }
|
|
/**
|
|
* Get the character encoding for a byte stream or URI.
|
|
*
|
|
* @return The encoding, or an empty string if none was supplied.
|
|
* @see #setByteStream
|
|
* @see #getSystemId
|
|
* @see #getByteStream
|
|
*/
|
|
const stringT& getEncoding() const { return encoding_; }
|
|
|
|
///////////////////////////////////////////////////////////
|
|
private:
|
|
IStreamHandle byteStream_;
|
|
stringT publicId_;
|
|
stringT systemId_;
|
|
stringT encoding_;
|
|
|
|
bool operator==(const basic_InputSource&); // no implementation
|
|
}; // class basic_InputSource
|
|
|
|
typedef basic_InputSource<std::string> InputSource;
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
typedef basic_InputSource<std::wstring> wInputSource;
|
|
#endif
|
|
|
|
}; // namespace SAX
|
|
|
|
#endif // #define InputSourceH
|
|
// end of file
|