2003-09-11 12:26:53 +02:00
|
|
|
#ifndef ARABICA_INPUTSOURCE_H
|
|
|
|
#define ARABICA_INPUTSOURCE_H
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
// InputSource.h
|
|
|
|
// $Id$
|
|
|
|
|
|
|
|
#include <iosfwd>
|
|
|
|
#include <string>
|
|
|
|
|
2007-09-05 00:55:47 +02:00
|
|
|
#include <SAX/ArabicaConfig.hpp>
|
|
|
|
#include <SAX/IStreamHandle.hpp>
|
2007-09-10 19:52:04 +02:00
|
|
|
#include <Arabica/StringAdaptor.hpp>
|
2003-09-09 13:14:48 +02:00
|
|
|
|
2007-09-05 11:49:18 +02:00
|
|
|
namespace Arabica
|
|
|
|
{
|
2002-06-21 13:16:28 +02:00
|
|
|
namespace SAX
|
|
|
|
{
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A single input source for an XML entity.
|
|
|
|
*
|
|
|
|
* <p>This class allows a SAX application to encapsulate information
|
|
|
|
* about an input source in a single object, which may include
|
|
|
|
* a public identifier, a system identifier, a byte stream (possibly
|
2003-09-12 16:09:13 +02:00
|
|
|
* with a specified encoding).</p>
|
2002-06-21 13:16:28 +02:00
|
|
|
*
|
|
|
|
* <p>There are two places that the application will deliver this
|
|
|
|
* input source to the parser: as the argument to the Parser.parse
|
|
|
|
* method, or as the return value of the EntityResolver.resolveEntity
|
|
|
|
* method.</p>
|
|
|
|
*
|
2003-09-12 16:09:13 +02:00
|
|
|
* <p>The SAX parser will use the InputSource object to determine
|
|
|
|
* how to read XML input. If there is a byte stream available,
|
|
|
|
* the parser will read that stream directly; if a byte stream is
|
|
|
|
* not available, the parser will attempt to open a URI connection
|
|
|
|
* to the resource identified by the system identifier.</p>
|
2002-06-21 13:16:28 +02:00
|
|
|
*
|
|
|
|
* <p>An InputSource object belongs to the application: the SAX parser
|
|
|
|
* shall never modify it in any way (it may modify a copy if
|
|
|
|
* necessary).</p>
|
|
|
|
*
|
|
|
|
* <strong>InputSource is still under active consideration, and its
|
|
|
|
* interface may change.</strong>
|
|
|
|
*
|
|
|
|
* @since SAX 1.0
|
|
|
|
* @author Jez Higgins,
|
|
|
|
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
|
|
|
|
* @version 2.0
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see Parser#parse
|
|
|
|
* @see EntityResolver#resolveEntity
|
2002-06-21 13:16:28 +02:00
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
template<class string_type, class string_adaptor = Arabica::default_string_adaptor<string_type> >
|
2007-09-05 14:57:07 +02:00
|
|
|
class InputSource
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* Zero-argument default constructor.
|
|
|
|
*
|
|
|
|
* @see #setPublicId
|
|
|
|
* @see #setSystemId
|
|
|
|
* @see #setByteStream
|
|
|
|
* @see #setEncoding
|
|
|
|
*/
|
2007-09-05 14:57:07 +02:00
|
|
|
InputSource() :
|
2003-09-12 16:09:13 +02:00
|
|
|
byteStream_(),
|
|
|
|
publicId_(),
|
|
|
|
systemId_(),
|
|
|
|
encoding_()
|
|
|
|
{ }
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Create a new input source with a system identifier.
|
|
|
|
*
|
|
|
|
* <p>Applications may use setPublicId to include a
|
|
|
|
* public identifier as well, or setEncoding to specify
|
|
|
|
* the character encoding, if known.</p>
|
|
|
|
*
|
|
|
|
* <p>If the system identifier is a URL, it must be full resolved.</p>
|
|
|
|
*
|
|
|
|
* @param systemId The system identifier (URI).
|
|
|
|
* @see #setPublicId
|
|
|
|
* @see #setSystemId
|
|
|
|
* @see #setByteStream
|
|
|
|
* @see #setEncoding
|
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
InputSource(const string_type& systemId) :
|
2003-09-12 16:09:13 +02:00
|
|
|
byteStream_(),
|
|
|
|
publicId_(),
|
|
|
|
systemId_(systemId),
|
|
|
|
encoding_()
|
|
|
|
{ }
|
2007-09-05 14:57:07 +02:00
|
|
|
InputSource(const InputSource& rhs) :
|
2003-09-12 16:09:13 +02:00
|
|
|
byteStream_(rhs.byteStream_),
|
2002-06-21 13:16:28 +02:00
|
|
|
publicId_(rhs.publicId_),
|
|
|
|
systemId_(rhs.systemId_),
|
2003-09-12 16:09:13 +02:00
|
|
|
encoding_(rhs.encoding_)
|
|
|
|
{ }
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Create a new input source with a byte stream.
|
|
|
|
*
|
|
|
|
* <p>Application writers may use setSystemId to provide a base
|
|
|
|
* for resolving relative URIs, setPublicId to include a
|
|
|
|
* public identifier, and/or setEncoding to specify the object's
|
|
|
|
* character encoding.</p>
|
|
|
|
*
|
2003-09-12 16:09:13 +02:00
|
|
|
* @param byteStream The raw byte stream containing the document. The
|
2007-09-05 14:57:07 +02:00
|
|
|
* InputSource does not assume ownership of
|
2003-09-12 16:09:13 +02:00
|
|
|
* this byteStream.
|
2020-04-15 22:01:14 +02:00
|
|
|
* @see #InputSource(std::unique_ptr<std::istream>)
|
2002-06-21 13:16:28 +02:00
|
|
|
* @see #setPublicId
|
|
|
|
* @see #setSystemId
|
|
|
|
* @see #setEncoding
|
|
|
|
* @see #setByteStream
|
|
|
|
*/
|
2007-09-05 14:57:07 +02:00
|
|
|
InputSource(std::istream& byteStream) :
|
2003-09-12 16:09:13 +02:00
|
|
|
byteStream_(byteStream),
|
|
|
|
publicId_(),
|
|
|
|
systemId_(),
|
|
|
|
encoding_()
|
|
|
|
{ }
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a new input source with a byte stream.
|
|
|
|
*
|
|
|
|
* <p>Application writers may use setSystemId to provide a base
|
|
|
|
* for resolving relative URIs, setPublicId to include a
|
|
|
|
* public identifier, and/or setEncoding to specify the object's
|
|
|
|
* character encoding.</p>
|
|
|
|
*
|
|
|
|
* @param byteStream The raw byte stream containing the document. The
|
2007-09-05 14:57:07 +02:00
|
|
|
* InputSource assumes ownership of the byteStream
|
2003-09-12 16:09:13 +02:00
|
|
|
* and will delete it when no-longer required.
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see InputSource(std::istream&)
|
2003-09-12 16:09:13 +02:00
|
|
|
* @see #setPublicId
|
|
|
|
* @see #setSystemId
|
|
|
|
* @see #setEncoding
|
|
|
|
* @see #setByteStream
|
|
|
|
*/
|
2020-04-15 22:01:14 +02:00
|
|
|
InputSource(std::unique_ptr<std::istream>&& byteStream) :
|
|
|
|
byteStream_(std::move(byteStream)),
|
2003-09-12 16:09:13 +02:00
|
|
|
publicId_(),
|
|
|
|
systemId_(),
|
|
|
|
encoding_()
|
|
|
|
{ }
|
2007-08-07 23:27:55 +02:00
|
|
|
|
2020-04-15 22:01:14 +02:00
|
|
|
InputSource(std::unique_ptr<std::iostream>&& byteStream) :
|
|
|
|
byteStream_(std::move(byteStream)),
|
2007-08-07 23:27:55 +02:00
|
|
|
publicId_(),
|
|
|
|
systemId_(),
|
|
|
|
encoding_()
|
|
|
|
{ }
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 14:57:07 +02:00
|
|
|
virtual ~InputSource() { }
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 14:57:07 +02:00
|
|
|
InputSource& operator=(const InputSource& rhs)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
2003-09-12 16:09:13 +02:00
|
|
|
byteStream_ = rhs.byteStream_;
|
2002-06-21 13:16:28 +02:00
|
|
|
publicId_ = rhs.publicId_;
|
|
|
|
systemId_ = rhs.systemId_;
|
2003-09-12 16:09:13 +02:00
|
|
|
encoding_ = rhs.encoding_;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
return *this;
|
|
|
|
} // operator=
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the public identifier for this input source.
|
|
|
|
*
|
|
|
|
* <p>The public identifier is always optional: if the application
|
|
|
|
* writer includes one, it will be provided as part of the
|
|
|
|
* location information.</p>
|
|
|
|
*
|
|
|
|
* @param publicId The public identifier as a string.
|
|
|
|
* @see #getPublicId
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see Locator#getPublicId
|
|
|
|
* @see SAXParseException#getPublicId
|
2002-06-21 13:16:28 +02:00
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
void setPublicId(const string_type& publicId) { publicId_ = publicId; }
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Get the public identifier for this input source.
|
|
|
|
*
|
2002-10-08 03:00:22 +02:00
|
|
|
* @return The public identifier, or an empty string if none was supplied.
|
2002-06-21 13:16:28 +02:00
|
|
|
* @see #setPublicId
|
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
const string_type& getPublicId() const { return publicId_; }
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the system identifier for this input source.
|
|
|
|
*
|
2003-09-12 16:09:13 +02:00
|
|
|
* <p>The system identifier is optional if there is a byte
|
|
|
|
* stream but it is still useful to provide one, since the
|
|
|
|
* application can use it to resolve relative URIs and can
|
|
|
|
* include it in error messages and warnings (the parser will
|
|
|
|
* attempt to open a connection to the URI only if there is no
|
|
|
|
* byte stream specified).</p>
|
2002-06-21 13:16:28 +02:00
|
|
|
*
|
|
|
|
* <p>If the application knows the character encoding of the
|
|
|
|
* object pointed to by the system identifier, it can register
|
|
|
|
* the encoding using the setEncoding method.</p>
|
|
|
|
*
|
|
|
|
* <p>If the system ID is a URL, it must be fully resolved.</p>
|
|
|
|
*
|
|
|
|
* @param systemId The system identifier as a string.
|
|
|
|
* @see #setEncoding
|
|
|
|
* @see #getSystemId
|
2007-09-05 14:57:07 +02:00
|
|
|
* @see Locator#getSystemId
|
|
|
|
* @see SAXParseException#getSystemId
|
2002-06-21 13:16:28 +02:00
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
void setSystemId(const string_type& systemId) { systemId_ = systemId; }
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Get the system identifier for this input source.
|
|
|
|
*
|
|
|
|
* <p>The getEncoding method will return the character encoding
|
2002-10-08 03:00:22 +02:00
|
|
|
* of the object pointed to, or an empty string if unknown.</p>
|
2002-06-21 13:16:28 +02:00
|
|
|
*
|
|
|
|
* <p>If the system ID is a URL, it will be fully resolved.</p>
|
|
|
|
*
|
|
|
|
* @return The system identifier.
|
|
|
|
* @see #setSystemId
|
|
|
|
* @see #getEncoding
|
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
const string_type& getSystemId() const { return systemId_; }
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the byte stream for this input source.
|
|
|
|
*
|
2003-09-12 16:09:13 +02:00
|
|
|
* <p>The SAX parser will use a byte stream in preference
|
2002-06-21 13:16:28 +02:00
|
|
|
* to opening a URI connection itself.</p>
|
|
|
|
*
|
|
|
|
* <p>If the application knows the character encoding of the
|
|
|
|
* byte stream, it should set it with the setEncoding method.</p>
|
|
|
|
*
|
|
|
|
* @param byteStream A byte stream containing an XML document or
|
2007-09-05 14:57:07 +02:00
|
|
|
* other entity. The InputSource does not assume
|
2003-09-12 16:09:13 +02:00
|
|
|
* ownership of byteStream.
|
2020-04-15 22:01:14 +02:00
|
|
|
* @see #setByteStream(std::unique_ptr<std::istream>) To transfer ownership of
|
2003-09-12 16:09:13 +02:00
|
|
|
* an std::istream to an InputSource
|
2002-06-21 13:16:28 +02:00
|
|
|
* @see #setEncoding
|
|
|
|
* @see #getByteStream
|
|
|
|
* @see #getEncoding
|
|
|
|
*/
|
|
|
|
void setByteStream(std::istream& byteStream)
|
|
|
|
{
|
2003-09-12 16:09:13 +02:00
|
|
|
byteStream_ = byteStream;
|
|
|
|
} // setByteStream
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the byte stream for this input source.
|
|
|
|
*
|
|
|
|
* <p>The SAX parser will use a byte stream in preference to
|
|
|
|
* opening a URI connection itself.</p>
|
|
|
|
*
|
|
|
|
* <p>If the application knows the character encoding of the
|
|
|
|
* byte stream, it should set it with the setEncoding method.</p>
|
|
|
|
*
|
|
|
|
* @param byteStream A byte stream containing an XML document or
|
2007-09-05 14:57:07 +02:00
|
|
|
* other entity. The InputSource assumes
|
2003-09-12 16:09:13 +02:00
|
|
|
* ownership of byteStream.
|
|
|
|
* @see #setByteStream(std::istream&)
|
|
|
|
* @see #setEncoding
|
|
|
|
* @see #getByteStream
|
|
|
|
* @see #getEncoding
|
|
|
|
*/
|
2020-04-15 22:01:14 +02:00
|
|
|
void setByteStream(std::unique_ptr<std::istream> byteStream)
|
2003-09-12 16:09:13 +02:00
|
|
|
{
|
|
|
|
byteStream_ = byteStream;
|
2002-06-21 13:16:28 +02:00
|
|
|
} // setByteStream
|
2003-09-12 16:09:13 +02:00
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
2003-09-12 16:09:13 +02:00
|
|
|
* Get the byte stream for this input source as a <code>std::istream*</code>.
|
|
|
|
*
|
|
|
|
* <p>The getEncoding method will return the character
|
|
|
|
* encoding for this byte stream, or an empty string if unknown.</p>
|
|
|
|
*
|
|
|
|
* @return The byte stream, or null if none was supplied. No ownership is
|
|
|
|
* transfered.
|
|
|
|
* @see #getEncoding
|
|
|
|
* @see #setByteStream
|
|
|
|
*/
|
|
|
|
std::istream* getByteStream() const
|
|
|
|
{
|
|
|
|
return byteStream_.get();
|
|
|
|
} // getByteStream
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the byte stream for this input source as an IStreamHandle.
|
2002-06-21 13:16:28 +02:00
|
|
|
*
|
|
|
|
* <p>The getEncoding method will return the character
|
|
|
|
* encoding for this byte stream, or null if unknown.</p>
|
|
|
|
*
|
2003-09-12 16:09:13 +02:00
|
|
|
* @return The byte stream, or null if none was supplied. Ownership is
|
|
|
|
* shared between this and the client code.
|
2002-06-21 13:16:28 +02:00
|
|
|
* @see #getEncoding
|
|
|
|
* @see #setByteStream
|
|
|
|
*/
|
2003-09-12 16:09:13 +02:00
|
|
|
IStreamHandle getByteStreamHandle() const
|
|
|
|
{
|
|
|
|
return byteStream_;
|
|
|
|
}
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Set the character encoding, if known.
|
|
|
|
*
|
|
|
|
* <p>The encoding must be a string acceptable for an
|
|
|
|
* XML encoding declaration (see section 4.3.3 of the XML 1.0
|
|
|
|
* recommendation).</p>
|
|
|
|
*
|
|
|
|
* @param encoding A string describing the character encoding.
|
|
|
|
* @see #setSystemId
|
|
|
|
* @see #setByteStream
|
|
|
|
* @see #getEncoding
|
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
void setEncoding(const string_type& encoding) { encoding_ = encoding; }
|
2002-06-21 13:16:28 +02:00
|
|
|
/**
|
|
|
|
* Get the character encoding for a byte stream or URI.
|
|
|
|
*
|
2002-10-08 03:00:22 +02:00
|
|
|
* @return The encoding, or an empty string if none was supplied.
|
2002-06-21 13:16:28 +02:00
|
|
|
* @see #setByteStream
|
|
|
|
* @see #getSystemId
|
|
|
|
* @see #getByteStream
|
|
|
|
*/
|
2007-09-06 00:08:51 +02:00
|
|
|
const string_type& getEncoding() const { return encoding_; }
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
private:
|
2003-09-12 16:09:13 +02:00
|
|
|
IStreamHandle byteStream_;
|
2007-09-06 00:08:51 +02:00
|
|
|
string_type publicId_;
|
|
|
|
string_type systemId_;
|
|
|
|
string_type encoding_;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 14:57:07 +02:00
|
|
|
bool operator==(const InputSource&); // no implementation
|
|
|
|
}; // class InputSource
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2007-09-05 11:49:18 +02:00
|
|
|
} // namespace SAX
|
|
|
|
} // namespace Arabica
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
#endif // #define InputSourceH
|
|
|
|
// end of file
|