arabica/include/SAX/ext/Locator2.hpp

81 lines
2.6 KiB
C++
Raw Normal View History

2003-09-11 12:26:53 +02:00
#ifndef ARABICA_LOCATOR2_H
#define ARABICA_LOCATOR2_H
2002-06-21 13:16:28 +02:00
// Locator2.h
// $Id$
2007-09-05 00:55:47 +02:00
#include <SAX/ArabicaConfig.hpp>
#include <SAX/Locator.hpp>
2002-06-21 13:16:28 +02:00
2007-09-05 11:49:18 +02:00
namespace Arabica
{
2002-06-21 13:16:28 +02:00
namespace SAX
{
/**
* SAX2 extension to augment the entity information provided though a Locator.
* <p>
* If an implementation supports this extension, the Locator provided in
* ContentHandler.setDocumentLocator() will implement this interface,
* and the http://xml.org/sax/features/use-locator2 feature flag will have
* the value true.
* <p>
* @since SAX 2.0
* @author Jez Higgins,
* <a href="mailto:jez@jezuk.co.uk">jez@jezuk.co.uk</a>
* @version 2.0
*/
template<class string_type>
class basic_Locator2 : public basic_Locator<string_type>
{
public:
typedef string_type stringT;
virtual ~basic_Locator2() { }
/**
* Returns the version of XML used for the entity. This will normally
* be the identifier from the current entity's <?xml version='...' ...?>
* declaration, or be defaulted by the parser.
* <p>
* At this writing, only one version ("1.0") is defined, but it seems
* likely that a new version will be defined which has slightly different
* rules about which characters are legal in XML names.
* <p>
* @return Identifier for the XML version being used to interpret the entity's text.
*/
virtual stringT getXMLVersion() const = 0;
/**
* Returns the name of the character encoding for the entity. If the encoding was
* declared externally (for example, in a MIME Content-Type header), that will be
* the name returned. Else if there was an <?xml ...encoding='...'?> declaration at
* the start of the document, that encoding name will be returned. Otherwise the
* encoding will been inferred (norally to be UTF-8, or some UTF-16 variant), and
* that inferred name will be returned.
* <p>
* Note that some recent W3C specifications require that text in some encodings
* be normalized, using Unicode Normalization Form C, before processing. Such
* normalization must be performed by applications, and would normally be triggered
* based on the value returned by this method.
* <p>
* Encoding names may be those used by the underlying implementation, and
* comparisons should be case-insensitive.
*
* @return Name of the character encoding being used to interpret the entity's text.
*/
virtual stringT getEncoding() const = 0;
}; // class basic_Locator2
typedef basic_Locator2<std::string> Locator2;
#ifndef ARABICA_NO_WCHAR_T
2002-06-21 13:16:28 +02:00
typedef basic_Locator2<std::wstring> wLocator2;
#endif
2002-06-21 13:16:28 +02:00
2007-09-05 11:49:18 +02:00
} // namespace SAX
} // namespace Arabica
2002-06-21 13:16:28 +02:00
#endif
// end of file