diff --git a/configure.ac b/configure.ac index 38f7b25b..3b862589 100644 --- a/configure.ac +++ b/configure.ac @@ -27,6 +27,7 @@ AC_CONFIG_FILES([arabica.pc]) AC_CONFIG_FILES([src/Makefile]) AC_CONFIG_FILES([examples/Makefile]) AC_CONFIG_FILES([examples/Utils/Makefile]) +AC_CONFIG_FILES([examples/Taggle/Makefile]) AC_CONFIG_FILES([examples/SAX/Makefile]) AC_CONFIG_FILES([examples/DOM/Makefile]) AC_CONFIG_FILES([examples/XPath/Makefile]) diff --git a/examples/Makefile.am b/examples/Makefile.am index c102c9f8..23c402cf 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = SAX Utils +SUBDIRS = SAX Taggle Utils if WANT_DOM SUBDIRS += DOM endif diff --git a/examples/Taggle/Makefile.am b/examples/Taggle/Makefile.am new file mode 100755 index 00000000..d5d1d007 --- /dev/null +++ b/examples/Taggle/Makefile.am @@ -0,0 +1,7 @@ +noinst_PROGRAMS = taggle + +AM_CPPFLAGS = -I$(top_srcdir)/include @PARSER_HEADERS@ $(BOOST_CPPFLAGS) +LIBARABICA = $(top_builddir)/src/libarabica.la + +taggle_SOURCES = taggle.cpp +taggle_LDADD = $(LIBARABICA) diff --git a/examples/Taggle/sample.pyx b/examples/Taggle/sample.pyx new file mode 100755 index 00000000..6f42d751 --- /dev/null +++ b/examples/Taggle/sample.pyx @@ -0,0 +1,60 @@ +(po +Aid P01456 +(date +Ayear 2002 +Amonth 6 +Aday 14 +)date +(address +Atype shipping +(name +-Frits Mendels +)name +(street +-152 Cherry St +)street +(city +-San Francisco +)city +(state +-CA +)state +(zip +-94045 +)zip +)address +(address +Atype billing +(name +-Frits Mendels +)name +(street +-PO Box 6789 +)street +(city +-San Francisco +)city +(state +-CA +)state +(zip +-94123-6798 +)zip +)address +(items +(item +Aquantity 1 +AproductCode R-273 +Adescription 14.4 Volt Cordless Drill +AunitCost 198.95 +)item +(item +Aquantity 1 +AproductCode 16325 +Adescription 12 Piece Drill Bit Set +AunitCost 14.95 +)item +)items +)po + + diff --git a/examples/Taggle/taggle.cpp b/examples/Taggle/taggle.cpp new file mode 100755 index 00000000..bba199da --- /dev/null +++ b/examples/Taggle/taggle.cpp @@ -0,0 +1,54 @@ +#pragma warning(disable: 4250) + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, const char* argv[]) +{ + if(argc == 1) + { + std::cout << "taggle [file1] [file2] ... [filen]\n" + << " taggle reads arbitrary HTML, outputting it as well-formed XML\n"; + return 0; + } // if(argc == 1) + + Arabica::SAX::Taggle parser; + std::ostringstream sink; + Arabica::SAX::Writer writer(sink, 4); + Arabica::SAX::CatchErrorHandler eh; + + writer.setParent(parser); + writer.setErrorHandler(eh); + + for(int i = 1; i < argc; ++i) + { + std::string file(argv[i]); + Arabica::SAX::InputSource is; + is.setSystemId(file); + + if(file != "-") + writer.parse(is); + else + { + is.setSystemId("stdin"); + is.setByteStream(std::cin); + + writer.parse(is); + } // if(file != "-") + + if(eh.errorsReported()) + { + std::cerr << eh.errors() << std::endl; + eh.reset(); + } // if ... + + std::cout << sink.str(); + } // for ... + + return 0; +} // main diff --git a/include/DOM/SAX2DOM/SAX2DOM.hpp b/include/DOM/SAX2DOM/SAX2DOM.hpp index c636cfb5..5b0b3d77 100644 --- a/include/DOM/SAX2DOM/SAX2DOM.hpp +++ b/include/DOM/SAX2DOM/SAX2DOM.hpp @@ -20,11 +20,26 @@ namespace Arabica namespace SAX2DOM { -template, - class SAX_parser = Arabica::SAX::XMLReader > -class Parser : protected Arabica::SAX::DefaultHandler +template +struct ParserTypes { + typedef typename Arabica::get_param, + T0, + T1>::type string_adaptor; + typedef typename Arabica::get_param, + T1, + T0>::type SAX_parser_type; +}; + +template +class Parser : protected Arabica::SAX::DefaultHandler::string_adaptor> +{ + typedef typename ParserTypes::string_adaptor string_adaptorT; + typedef typename ParserTypes::SAX_parser_type SAX_parser_type; typedef Arabica::SAX::Attributes AttributesT; typedef Arabica::SAX::EntityResolver EntityResolverT; typedef Arabica::SAX::ErrorHandler ErrorHandlerT; @@ -88,7 +103,7 @@ class Parser : protected Arabica::SAX::DefaultHandler inDTD_ = false; inEntity_ = 0; - SAX_parser parser; + SAX_parser_type parser; parser.setContentHandler(*this); parser.setErrorHandler(*this); if(entityResolver_) @@ -157,7 +172,7 @@ class Parser : protected Arabica::SAX::DefaultHandler Arabica::SAX::AttributeTypes attributeTypes_; protected: - void setParserFeatures(SAX_parser& parser) const + void setParserFeatures(SAX_parser_type& parser) const { for(typename Features::const_iterator f = features_.begin(), e = features_.end(); f != e; ++f) try { diff --git a/include/DOM/io/Stream.hpp b/include/DOM/io/Stream.hpp index 052c43ad..06b201d4 100644 --- a/include/DOM/io/Stream.hpp +++ b/include/DOM/io/Stream.hpp @@ -37,7 +37,7 @@ namespace DOM namespace StreamImpl { template -void streamChildren(std::basic_ostream& stream, DOM::Node& node) +void streamChildren(std::basic_ostream& stream, const DOM::Node& node) { DOM::Node child = node.getFirstChild(); while(child != 0) @@ -72,7 +72,7 @@ std::pair is_uri_declared(std::vector template void check_and_output_node_name(std::basic_ostream& stream, - DOM::Node& node, + const DOM::Node& node, std::vector >* prefix_stack) { std::map& current = *(prefix_stack->rbegin()); @@ -112,7 +112,7 @@ bool isXmlns(const stringT& str) template int prefix_mapper(std::basic_ostream& stream, - DOM::Node& node) + const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; @@ -189,7 +189,7 @@ int prefix_mapper(std::basic_ostream& stream, template void prefix_mapper_pop(std::basic_ostream& stream, - DOM::Node node, + const DOM::Node& node, int index, bool output) { @@ -212,7 +212,7 @@ void prefix_mapper_pop(std::basic_ostream& stream, template std::basic_ostream& operator<<(std::basic_ostream& stream, - DOM::Node& node) + const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; diff --git a/include/SAX/Locator.hpp b/include/SAX/Locator.hpp index 9c64b703..c2bfda75 100644 --- a/include/SAX/Locator.hpp +++ b/include/SAX/Locator.hpp @@ -7,6 +7,7 @@ #include #include +#include namespace Arabica { @@ -43,7 +44,7 @@ namespace SAX * @version 2.0 * @see ContentHandler#setDocumentLocator */ -template +template > class Locator { public: diff --git a/include/SAX/XMLReader.hpp b/include/SAX/XMLReader.hpp index 60878eb3..90a30438 100644 --- a/include/SAX/XMLReader.hpp +++ b/include/SAX/XMLReader.hpp @@ -63,8 +63,10 @@ namespace SAX * @see helpers.ParserAdapter * @see helpers.XMLReaderAdapter */ +class XMLReaderInterface_tag { }; + template -class XMLReaderInterface +class XMLReaderInterface : public XMLReaderInterface_tag { public: typedef typename Arabica::get_param #include +#include #include namespace Arabica @@ -38,7 +39,7 @@ namespace SAX * jez@jezuk.co.uk * @version 2.0 */ -template +template > class AttributesImpl : public Attributes { public: @@ -64,6 +65,14 @@ public: return *this; } // operator= + bool operator==(const Attr& rhs) const + { + return (uri_ == rhs.uri_) && + (localName_ == rhs.localName_) && + (qName_ == rhs.qName_) && + (type_ == rhs.type_) && + (value_ == rhs.value_); + } // operator== string_type uri_; string_type localName_; @@ -75,11 +84,21 @@ public: //////////////////////////////////////////////////////////////////// // Constructors. AttributesImpl() { } - AttributesImpl(const AttributesT& atts) + AttributesImpl(const AttributesT& rhs) { - setAttributes(atts); + setAttributes(rhs); } // AttributesImpl + AttributesImpl& operator=(const AttributesT& rhs) + { + setAttributes(rhs); + } // operator= + + bool operator==(const AttributesImpl& rhs) const + { + return attributes_ == rhs.attributes_; + } // operator== + //////////////////////////////////////////////////////////////////// // Implementation of SAX::Attributes. /** diff --git a/include/Taggle/Taggle.hpp b/include/Taggle/Taggle.hpp new file mode 100644 index 00000000..070e3a6e --- /dev/null +++ b/include/Taggle/Taggle.hpp @@ -0,0 +1,13 @@ +#ifndef ARABICA_TAGGLE_TAGGLE_HPP +#define ARABICA_TAGGLE_TAGGLE_HPP + +#include "impl/ScanHandler.hpp" +#include "impl/ElementType.hpp" +#include "impl/Element.hpp" +#include "impl/Schema.hpp" +#include "impl/html/HTMLModels.hpp" +#include "impl/html/HTMLScanner.hpp" +#include "impl/html/HTMLSchema.hpp" +#include "impl/Parser.hpp" + +#endif diff --git a/include/Taggle/impl/Element.hpp b/include/Taggle/impl/Element.hpp new file mode 100755 index 00000000..bac6003c --- /dev/null +++ b/include/Taggle/impl/Element.hpp @@ -0,0 +1,304 @@ +#ifndef ARABICA_SAX_TAGSOUP_ELEMENT_HPP +#define ARABICA_SAX_TAGSOUP_ELEMENT_HPP + +#include +#include +#include "ElementType.hpp" + +namespace Arabica +{ + +namespace SAX +{ + +/** +The internal representation of an actual element (not an element type). +An Element has an element type, attributes, and a successor Element +for use in constructing stacks and queues of Elements. +@see ElementType +@see AttributesImpl + +Based on code from John Cowan's super TagSoup package +*/ +class Element +{ +private: + ElementType* type_; // type of element + AttributesImpl atts_; // attributes of element + const Element* next_; // successor of element + bool preclosed_; // this element has been preclosed + +public: + static const Element Null; + + Element() : + type_(&ElementType::Null), + atts_(), + next_(0), + preclosed_(false) + { + } // Element + + Element(const Element& rhs): + type_(rhs.type_), + atts_(rhs.atts_), + next_(0), + preclosed_(rhs.preclosed_) + { + if(rhs.next_) + next_ = new Element(*rhs.next_); + } // Element + + /** + Return an Element from a specified ElementType. + @param type The element type of the newly constructed element + @param defaultAttributes True if default attributes are wanted + */ + Element(ElementType& type, bool defaultAttributes) : + type_(&type), + atts_(), + next_(0), + preclosed_(false) + { + if (defaultAttributes) + atts_ = type.atts(); + } // Element + + ~Element() + { + if(next_ && (*next_ != Null)) + delete next_; + } // ~Element + + Element& operator=(const Element& rhs) + { + type_ = rhs.type_; + atts_ = rhs.atts_; + preclosed_ = rhs.preclosed_; + if(next_ && (*next_ != Null)) + delete next_; + if(rhs.next_) + next_ = new Element(*rhs.next_); + else + next_ = 0; + return *this; + } // operator= + + bool operator==(const Element& rhs) const + { + bool ok = (type_ == rhs.type_) && + (atts_ == rhs.atts_) && + (preclosed_ == rhs.preclosed_); + if(!ok) + return false; + + if(!next_ && !rhs.next_) + return true; + + if((!next_ && rhs.next_) || + (next_ && !rhs.next_)) + return false; + + return (*next_ == *rhs.next_); + } // operator== + + bool operator!=(const Element& rhs) const + { + return !(*this == rhs); + } // operator!= + + /** + Return the element type. + @return The element type. + */ + const ElementType& type() const + { + return *type_; + } // type + + /** + Return the attributes as an AttributesImpl object. + Returning an AttributesImpl makes the attributes mutable. + @return The attributes + @see AttributesImpl + */ + const AttributesImpl& atts() const + { + return atts_; + } // atts + + /** + Return the next element in an element stack or queue. + @return The next element + */ + Element next() const + { + if(!next_) + return Null; + return *next_; + } // next + + /** + Change the next element in an element stack or queue. + @param next The new next element + */ + void setNext(const Element& next) + { + if(next_ && (*next_ != Null)) + delete next_; + next_ = new Element(next); + } // setNext + + /** + Return the name of the element's type. + Convenience method. + @return The element type name + */ + std::string name() const + { + return type_->name(); + } // name + + /** + Return the namespace name of the element's type. + Convenience method. + @return The element type namespace name + */ + std::string namespaceName() const + { + return type_->namespaceName(); + } // namespaceName + + /** + Return the local name of the element's type. + Convenience method. + @return The element type local name + */ + std::string localName() const + { + return type_->localName(); + } // localName + + /** + Return the content model vector of the element's type. + Convenience method. + @return The content model vector + */ + int model() const + { + return type_->model(); + } // model + + /** + Return the member-of vector of the element's type. + Convenience method. + @return The member-of vector + */ + int memberOf() const + { + return type_->memberOf(); + } // memberOf + + /** + Return the flags vector of the element's type. + Convenience method. + @return The flags vector + */ + int flags() const + { + return type_->flags(); + } // flags + + /** + Return the parent element type of the element's type. + Convenience method. + @return The parent element type + */ + ElementType& parent() const + { + return type_->parent(); + } // parent + + /** + Return true if the type of this element can contain the type of + another element. + Convenience method. + @param other The other element + */ + bool canContain(const Element& other) const + { + return type_->canContain(*(other.type_)); + } // canContain + + /** + Set an attribute and its value into this element. + @param name The attribute name (Qname) + @param type The attribute type + @param value The attribute value + */ + void setAttribute(const std::string& name, const std::string& type, const std::string& value) + { + type_->setAttribute(atts_, name, type, value); + } // setAttribute + + /** + Make this element anonymous. + Remove any id or name attribute present + in the element's attributes. + */ + void anonymize() + { + for (int i = atts_.getLength() - 1; i >= 0; i--) + { + if((atts_.getType(i) == "ID") || + (atts_.getQName(i) == "name")) + { + atts_.removeAttribute(i); + } + } // for ... + } // anonymize + + /** + Clean the attributes of this element. + Attributes with null name (the name was ill-formed) + or null value (the attribute was present in the element type but + not in this actual element) are removed. + */ + void clean() + { + for (int i = atts_.getLength() - 1; i >= 0; i--) + { + const std::string& name = atts_.getLocalName(i); + if (atts_.getValue(i) == "" || name == "" || name.length() == 0) + { + atts_.removeAttribute(i); + continue; + } // if ... + } // for ... + } // clean + + /** + Force this element to preclosed status, meaning that an end-tag has + been seen but the element cannot yet be closed for structural reasons. + */ + void preclose() + { + preclosed_ = true; + } // preclose + + /** + Return true if this element has been preclosed. + */ + bool isPreclosed() const + { + return preclosed_; + } // isPreclosed +}; // class Element + +const Element Element::Null; + +} // namespace SAX + +} // namespace Arabica + +#endif diff --git a/include/Taggle/impl/ElementType.hpp b/include/Taggle/impl/ElementType.hpp new file mode 100755 index 00000000..2335be14 --- /dev/null +++ b/include/Taggle/impl/ElementType.hpp @@ -0,0 +1,333 @@ +#ifndef ARABICA_SAX_ELEMENT_TYPE_HPP +#define ARABICA_SAX_ELEMENT_TYPE_HPP + +#include +#include +#include "Schema.hpp" + +namespace Arabica +{ + +namespace SAX +{ + + +/** +This class represents an element type in the schema. +An element type has a name, a content model vector, a member-of vector, +a flags vector, default attributes, and a schema to which it belongs. + +Based on code from John Cowan's super TagSoup package +@see Schema +*/ +class ElementType +{ +private: + std::string name_; // element type name (Qname) + std::string namespace_; // element type namespace name + std::string localName_; // element type local name + int model_; // bitmap: what the element contains + int memberOf_; // bitmap: what element is contained in + int flags_; // bitmap: element flags + AttributesImpl atts_; // default attributes + ElementType* parent_; // parent of this element type + Schema* schema_; // schema to which this belongs + +public: + static ElementType Null; + +private: + ElementType() : + name_(""), + namespace_(""), + localName_(""), + model_(0), + memberOf_(0), + flags_(0), + atts_(), + parent_(0), + schema_(0) + { + } // ElementType + + /** + Construct an ElementType: + but it's better to use Schema.element() instead. + The content model, member-of, and flags vectors are specified as ints. + @param name The element type name + @param model ORed-together bits representing the content models + allowed in the content of this element type + @param memberOf ORed-together bits representing the content models + to which this element type belongs + @param flags ORed-together bits representing the flags associated + with this element type + @param schema The schema with which this element type will be + associated + */ + ElementType(const std::string& name, int model, int memberOf, int flags, Schema& schema) : + name_(name), + model_(model), + memberOf_(memberOf), + flags_(flags), + schema_(&schema), + namespace_(), + localName_(), + parent_(0) + { + namespace_ = namespaceName(name, false); + localName_ = localName(name); + } // ElementType + + ElementType(const ElementType& rhs) : + name_(rhs.name_), + model_(rhs.model_), + memberOf_(rhs.memberOf_), + flags_(rhs.flags_), + schema_(rhs.schema_), + namespace_(rhs.namespace_), + localName_(rhs.localName_), + parent_(rhs.parent_) + { + } // ElementType + + friend class SchemaImpl; + +public: + /** + Return a namespace name from a Qname. + The attribute flag tells us whether to return an empty namespace + name if there is no prefix, or use the schema default instead. + @param name The Qname + @param attribute True if name is an attribute name + @return The namespace name + **/ + std::string namespaceName(const std::string& name, bool attribute) const + { + size_t colon = name.find(':'); + if (colon == std::string::npos) + return attribute ? "" : schema_->getURI(); + + std::string prefix = name.substr(0, colon); + if (prefix == "xml") + return "http://www.w3.org/XML/1998/namespace"; + else + return "urn:x-prefix:" + prefix; + } // namespaceName + + /** + Return a local name from a Qname. + @param name The Qname + @return The local name + **/ + std::string localName(const std::string& name) const + { + size_t colon = name.find(':'); + if (colon == std::string::npos) + return name; + else + return name.substr(colon+1); + } // localName + + /** + Returns the name of this element type. + @return The name of the element type + */ + std::string name() const { return name_; } + + /** + Returns the namespace name of this element type. + @return The namespace name of the element type + */ + std::string namespaceName() const { return namespace_; } + + /** + Returns the local name of this element type. + @return The local name of the element type + */ + std::string localName() const { return localName_; } + + /** + Returns the content models of this element type. + @return The content models of this element type as a vector of bits + */ + int model() const { return model_; } + + /** + Returns the content models to which this element type belongs. + @return The content models to which this element type belongs as a + vector of bits + */ + int memberOf() const { return memberOf_; } + + /** + Returns the flags associated with this element type. + @return The flags associated with this element type as a vector of bits + */ + int flags() const { return flags_; } + + /** + Returns the default attributes associated with this element type. + Attributes of type CDATA that don't have default values are + typically not included. Other attributes without default values + have an internal value of null. + The return value is an AttributesImpl to allow the caller to mutate + the attributes. + */ + const AttributesImpl& atts() const { return atts_; } + + /** + Returns the parent element type of this element type. + @return The parent element type + */ + ElementType& parent() const + { + return *parent_; + } // parent + + /** + Returns the schema which this element type is associated with. + @return The schema + */ + Schema& schema() const + { + return *schema_; + } // schema + + + /** + Returns true if this element type can contain another element type. + That is, if any of the models in this element's model vector + match any of the models in the other element type's member-of + vector. + @param other The other element type + */ + bool canContain(const ElementType& other) const + { + return (model_ & other.memberOf_) != 0; + } // canContain + + + /** + Sets an attribute and its value into an AttributesImpl object. + Attempts to set a namespace declaration are ignored. + @param atts The AttributesImpl object + @param name The name (Qname) of the attribute + @param type The type of the attribute + @param value The value of the attribute + */ + void setAttribute(AttributesImpl& atts, + const std::string& name, + const std::string& type, + const std::string& value) + { + if (name == "xmlns" || name.find("xmlns:") == 0) + { + return; + } + + std::string namespaceN = namespaceName(name, true); + std::string localN = localName(name); + std::string actualType = type; + std::string actualValue = value; + + int i = atts.getIndex(name); + if (i == -1) + { + if (actualType == "") + actualType = "CDATA"; + if (actualType != "CDATA") + actualValue = Arabica::text::normalize_whitespace >(value); + atts.addAttribute(namespaceN, localN, name, actualType, actualValue); + } + else + { + if (actualType == "") + actualType = atts.getType(i); + if (actualType != ("CDATA")) + actualValue = Arabica::text::normalize_whitespace >(value); + atts.setAttribute(i, namespaceN, localN, name, actualType, actualValue); + } + } // setAttribute + + /** + Sets an attribute and its value into this element type. + @param name The name of the attribute + @param type The type of the attribute + @param value The value of the attribute + */ + void setAttribute(const std::string& name, const std::string& type, const std::string& value) + { + setAttribute(atts_, name, type, value); + } // setAttribute + + /** + Sets the models of this element type. + @param model The content models of this element type as a vector of bits + */ + void setModel(int model) + { + model_ = model; + } // setModel + + /** + Sets the content models to which this element type belongs. + @param memberOf The content models to which this element type belongs as a vector of bits + */ + void setMemberOf(int memberOf) + { + memberOf_ = memberOf; + } // setMemberOf + + /** + Sets the flags of this element type. + @param flags associated with this element type The flags as a vector of bits + */ + void setFlags(int flags) + { + flags_ = flags; + } // setFlags + + /** + Sets the parent element type of this element type. + @param parent The parent element type + */ + void setParent(ElementType& parent) + { + parent_ = &parent; + } // setParent + + bool operator==(const ElementType& rhs) const + { + return (name_ == rhs.name_) && + (namespace_ == rhs.namespace_) && + (localName_ == rhs.localName_) && + (model_ == rhs.model_) && + (memberOf_ == rhs.memberOf_) && + (flags_ == rhs.flags_) && + (parent_ == rhs.parent_) && + (schema_ == rhs.schema_); + } // operator == + + ElementType& operator=(const ElementType& rhs) + { + name_ = rhs.name_; + namespace_ = rhs.namespace_; + localName_ = rhs.localName_; + model_ = rhs.model_; + memberOf_ = rhs.memberOf_; + flags_ = rhs.flags_; + atts_ = rhs.atts_; + parent_ = rhs.parent_; + schema_ = rhs.schema_; + + return *this; + } // operator= +}; // class ElementType + +ElementType ElementType::Null; + +} // namespace SAX + +} // namespace Arabica + +#endif diff --git a/include/Taggle/impl/Parser.hpp b/include/Taggle/impl/Parser.hpp new file mode 100644 index 00000000..677c2221 --- /dev/null +++ b/include/Taggle/impl/Parser.hpp @@ -0,0 +1,1391 @@ +#ifndef ARABICA_SAX_TAGGLE_PARSER_HPP +#define ARABICA_SAX_TAGGLE_PARSER_HPP + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ScanHandler.hpp" + +namespace Arabica +{ + +namespace SAX +{ + +/** +The Taggle SAX parser class. + +Based on code from John Cowan's super TagSoup package +**/ +template +class Taggle : + public XMLReaderInterface, + private DefaultHandler::string_adaptor>, + private ScanHandler +{ +public: + typedef XMLReaderInterface XMLReaderT; + typedef typename XMLReaderT::string_adaptor string_adaptor; + typedef ContentHandler ContentHandlerT; + typedef LexicalHandler LexicalHandlerT; + typedef DeclHandler DeclHandlerT; + typedef DTDHandler DTDHandlerT; + typedef ErrorHandler ErrorHandlerT; + typedef EntityResolver EntityResolverT; + typedef InputSource InputSourceT; + typedef Locator LocatorT; + + + /** + A value of "true" indicates namespace URIs and unprefixed local + names for element and attribute names will be available. + **/ + static const string_type namespacesFeature; + + /** + A value of "true" indicates that XML qualified names (with prefixes) + and attributes (including xmlns* attributes) will be available. + We don't support this value. + **/ + static const string_type namespacePrefixesFeature; + + /** + Reports whether this parser processes external general entities + (it doesn't). + **/ + static const string_type externalGeneralEntitiesFeature; + + /** + Reports whether this parser processes external parameter entities + (it doesn't). + **/ + static const string_type externalParameterEntitiesFeature; + + /** + May be examined only during a parse, after the startDocument() + callback has been completed; read-only. The value is true if + the document specified standalone="yes" in its XML declaration, + and otherwise is false. (It's always false.) + **/ + static const string_type isStandaloneFeature; + + /** + A value of "true" indicates that the LexicalHandler will report + the beginning and end of parameter entities (it won't). + **/ + static const string_type lexicalHandlerParameterEntitiesFeature; + + /** + A value of "true" indicates that system IDs in declarations will + be absolutized (relative to their base URIs) before reporting. + (This returns true but doesn't actually do anything.) + **/ + static const string_type resolveDTDURIsFeature; + + /** + Has a value of "true" if all XML names (for elements, + prefixes, attributes, entities, notations, and local + names), as well as Namespace URIs, will have been interned + using java.lang.String.intern. This supports fast testing of + equality/inequality against string constants, rather than forcing + slower calls to String.equals(). (We always intern.) + **/ + static const string_type stringInterningFeature; + + /** + Returns "true" if the Attributes objects passed by this + parser in ContentHandler.startElement() implement the + org.xml.sax.ext.Attributes2 interface. (They don't.) + **/ + static const string_type useAttributes2Feature; + + /** + Returns "true" if the Locator objects passed by this parser + in ContentHandler.setDocumentLocator() implement the + org.xml.sax.ext.Locator2 interface. (They don't.) + **/ + static const string_type useLocator2Feature; + + /** + Returns "true" if, when setEntityResolver is given an object + implementing the org.xml.sax.ext.EntityResolver2 interface, + those new methods will be used. (They won't be.) + **/ + static const string_type useEntityResolver2Feature; + + /** + Controls whether the parser is reporting all validity errors + (We don't report any validity errors.) + **/ + static const string_type validationFeature; + + /** + Controls whether the parser reports Unicode normalization + errors as described in section 2.13 and Appendix B of the XML + 1.1 Recommendation. (We don't normalize.) + **/ + static const string_type unicodeNormalizationCheckingFeature; + + /** + Controls whether, when the namespace-prefixes feature is set, + the parser treats namespace declaration attributes as being in + the http://www.w3.org/2000/xmlns/ namespace. (It doesn't.) + **/ + static const string_type xmlnsURIsFeature; + + /** + Returns "true" if the parser supports both XML 1.1 and XML 1.0. + (Always false.) + **/ + static const string_type XML11Feature; + + /** + A value of "true" indicates that the parser will ignore + unknown elements. + **/ + static const string_type ignoreBogonsFeature; + + /** + A value of "true" indicates that the parser will give unknown + elements a content model of EMPTY; a value of "false", a + content model of ANY. + **/ + static const string_type bogonsEmptyFeature; + + /** + A value of "true" indicates that the parser will allow unknown + elements to be the root element. + **/ + static const string_type rootBogonsFeature; + + /** + A value of "true" indicates that the parser will return default + attribute values for missing attributes that have default values. + **/ + static const string_type defaultAttributesFeature; + + /** + A value of "true" indicates that the parser will + translate colons into underscores in names. + **/ + static const string_type translateColonsFeature; + + /** + A value of "true" indicates that the parser will + attempt to restart the restartable elements. + **/ + static const string_type restartElementsFeature; + + /** + A value of "true" indicates that the parser will + transmit whitespace in element-only content via the SAX + ignorableWhitespace callback. Normally this is not done, + because HTML is an SGML application and SGML suppresses + such whitespace. + **/ + static const string_type ignorableWhitespaceFeature; + + /** + A value of "true" indicates that the parser will treat CDATA + elements specially. Normally true, since the input is by + default HTML. + **/ + static const string_type CDATAElementsFeature; + + /** + Used to see some syntax events that are essential in some + applications: comments, CDATA delimiters, selected general + entity inclusions, and the start and end of the DTD (and + declaration of document element name). The Object must implement + org.xml.sax.ext.LexicalHandler. + **/ + static const string_type lexicalHandlerProperty; + + /** + Specifies the Scanner object this Parser uses. + **/ + static const string_type scannerProperty; + + /** + Specifies the Schema object this Parser uses. + **/ + static const string_type schemaProperty; + + /** + Specifies the AutoDetector (for encoding detection) this Parser uses. + **/ + static const string_type autoDetectorProperty; + +private: + // Default values for feature flags + static bool DEFAULT_NAMESPACES; + static bool DEFAULT_IGNORE_BOGONS; + static bool DEFAULT_BOGONS_EMPTY; + static bool DEFAULT_ROOT_BOGONS; + static bool DEFAULT_DEFAULT_ATTRIBUTES; + static bool DEFAULT_TRANSLATE_COLONS; + static bool DEFAULT_RESTART_ELEMENTS; + static bool DEFAULT_IGNORABLE_WHITESPACE; + static bool DEFAULT_CDATA_ELEMENTS; + + static const string_type legal; + + typedef std::map FeatureMapT; + + // XMLReader implementation + ContentHandlerT* contentHandler_; + LexicalHandlerT* lexicalHandler_; + DTDHandlerT* dtdHandler_; + ErrorHandlerT* errorHandler_; + EntityResolverT* entityResolver_; + Schema* schema_; + bool ownSchema_; + Scanner* scanner_; + bool ownScanner_; + FeatureMapT features_; + Element newElement_; + std::string attributeName_; + bool doctypeIsPresent_; + std::string doctypePublicId_; + std::string doctypeSystemId_; + std::string doctypeName_; + std::string piTarget_; + Element stack_; + Element saved_; + Element pcdata_; + int entity_; + + // Feature flags. + bool namespaces; + bool ignoreBogons; + bool bogonsEmpty; + bool rootBogons; + bool defaultAttributes; + bool translateColons; + bool restartElements; + bool ignorableWhitespace; + bool CDATAElements; + bool virginStack; + +public: + Taggle() : + contentHandler_(0), + lexicalHandler_(0), + dtdHandler_(0), + errorHandler_(0), + entityResolver_(0), + features_(initialFeatures()), + schema_(0), + ownSchema_(false), + scanner_(0), + ownScanner_(false), + namespaces(DEFAULT_NAMESPACES), + ignoreBogons(DEFAULT_IGNORE_BOGONS), + bogonsEmpty(DEFAULT_BOGONS_EMPTY), + rootBogons(DEFAULT_ROOT_BOGONS), + defaultAttributes(DEFAULT_DEFAULT_ATTRIBUTES), + translateColons(DEFAULT_TRANSLATE_COLONS), + restartElements(DEFAULT_RESTART_ELEMENTS), + ignorableWhitespace(DEFAULT_IGNORABLE_WHITESPACE), + CDATAElements(DEFAULT_CDATA_ELEMENTS), + newElement_(Element::Null), + attributeName_(), + doctypeIsPresent_(false), + doctypePublicId_(), + doctypeSystemId_(), + doctypeName_(), + piTarget_(), + stack_(Element::Null), + saved_(Element::Null), + pcdata_(Element::Null), + entity_(0), + virginStack(true) + { + contentHandler_ = this; + lexicalHandler_ = this; + dtdHandler_ = this; + errorHandler_ = this; + entityResolver_ = this; + } // Taggle + + ~Taggle() + { + if(ownSchema_) + delete schema_; + if(ownScanner_) + delete scanner_; + } // ~Taggle + +private: + static FeatureMapT initialFeatures() + { + FeatureMapT features; + features[namespacesFeature] = DEFAULT_NAMESPACES; + features[namespacePrefixesFeature] = false; + features[externalGeneralEntitiesFeature] = false; + features[externalParameterEntitiesFeature] = false; + features[isStandaloneFeature] = false; + features[lexicalHandlerParameterEntitiesFeature] = false; + features[resolveDTDURIsFeature] = true; + features[stringInterningFeature] = true; + features[useAttributes2Feature] = false; + features[useLocator2Feature] = false; + features[useEntityResolver2Feature] = false; + features[validationFeature] = false; + features[xmlnsURIsFeature] = false; + features[xmlnsURIsFeature] = false; + features[XML11Feature] = false; + features[ignoreBogonsFeature] = DEFAULT_IGNORE_BOGONS; + features[bogonsEmptyFeature] = DEFAULT_BOGONS_EMPTY; + features[rootBogonsFeature] = DEFAULT_ROOT_BOGONS; + features[defaultAttributesFeature] = DEFAULT_DEFAULT_ATTRIBUTES; + features[translateColonsFeature] = DEFAULT_TRANSLATE_COLONS; + features[restartElementsFeature] = DEFAULT_RESTART_ELEMENTS; + features[ignorableWhitespaceFeature] = DEFAULT_IGNORABLE_WHITESPACE; + features[CDATAElementsFeature] = DEFAULT_CDATA_ELEMENTS; + return features; + } // initialFeatures + +public: + /////////////////////////////////////////////////// + // XMLReader + bool getFeature(const string_type& name) const + { + typename FeatureMapT::const_iterator b = features_.find(name); + if(b == features_.end()) + { + throw SAXNotRecognizedException("Unknown feature " + string_adaptor::asStdString(name)); + } + return b->second; + } // getFeature + + void setFeature(const string_type& name, bool value) + { + typename FeatureMapT::iterator b = features_.find(name); + if(b == features_.end()) + { + throw SAXNotRecognizedException("Unknown feature " + string_adaptor::asStdString(name)); + } + + features_[name] = value; + + if(name == namespacesFeature) + namespaces = value; + else if(name == ignoreBogonsFeature) + ignoreBogons = value; + else if(name == bogonsEmptyFeature) + bogonsEmpty = value; + else if(name == rootBogonsFeature) + rootBogons = value; + else if(name == defaultAttributesFeature) + defaultAttributes = value; + else if(name == translateColonsFeature) + translateColons = value; + else if(name == restartElementsFeature) + restartElements = value; + else if(name == ignorableWhitespaceFeature) + ignorableWhitespace = value; + else if(name == CDATAElementsFeature) + CDATAElements = value; + } // setFeature + + typedef typename XMLReaderInterface::PropertyBase PropertyBaseT; + virtual std::auto_ptr doGetProperty(const string_type& name) + { + return std::auto_ptr(0); + } // doGetProperty + + virtual void doSetProperty(const string_type& name, std::auto_ptr value) + { + } // doSetProperty + + /* + Object getProperty (std::string name) + { + if(name.equals(lexicalHandlerProperty)) + { + return lexicalHandler_ == this ? null : lexicalHandler_; + } + else if(name.equals(scannerProperty)) + { + return scanner_; + } + else if(name.equals(schemaProperty)) + { + return schema_; + } + else if(name.equals(autoDetectorProperty)) + { + return theAutoDetector; + } + else + { + throw new SAXNotRecognizedException("Unknown property " + name); + } + } // getProperty + + void setProperty (std::string name, Object value) + { + if(name.equals(lexicalHandlerProperty)) + { + if(value == null) + { + lexicalHandler_ = this; + } + else if(value instanceof LexicalHandler) + { + lexicalHandler_ = (LexicalHandler)value; + } + else + { + throw new SAXNotSupportedException("Your lexical handler is not a LexicalHandler"); + } + } + else if(name.equals(scannerProperty)) + { + if(value instanceof Scanner) { + scanner_ = (Scanner)value; + } + else { + throw new SAXNotSupportedException("Your scanner is not a Scanner"); + } + } + else if(name.equals(schemaProperty)) { + if(value instanceof Schema) { + schema_ = (Schema)value; + } + else { + throw new SAXNotSupportedException("Your schema is not a Schema"); + } + } + else if(name.equals(autoDetectorProperty)) { + if(value instanceof AutoDetector) { + theAutoDetector = (AutoDetector)value; + } + else { + throw new SAXNotSupportedException("Your auto-detector is not an AutoDetector"); + } + } + else { + throw new SAXNotRecognizedException("Unknown property " + name); + } + } +*/ + + virtual void setEntityResolver(EntityResolverT& resolver) + { + entityResolver_ = &resolver; + } // setEntityResolver + + virtual EntityResolverT* getEntityResolver() const + { + return (entityResolver_ == this) ? 0 : entityResolver_; + } // getEntityResolver + + virtual void setDTDHandler(DTDHandlerT& handler) + { + dtdHandler_ = &handler; + } // setDTDHandler + + virtual DTDHandlerT* getDTDHandler() const + { + return (dtdHandler_ == this) ? 0 : dtdHandler_; + } // getDTDHandler + + virtual void setContentHandler(ContentHandlerT& handler) + { + contentHandler_ = &handler; + } // setContentHandler + + virtual ContentHandlerT* getContentHandler() const + { + return (contentHandler_ == this) ? 0 : contentHandler_; + } // getContentHandler + + virtual void setErrorHandler(ErrorHandlerT& handler) + { + errorHandler_ = &handler; + } // setErrorHandler + + virtual ErrorHandlerT* getErrorHandler() const + { + return (errorHandler_ == this) ? 0 : errorHandler_; + } // getErrorHandler + + virtual void setDeclHandler(DeclHandlerT& handler) + { + } // setDeclHandler + + virtual DeclHandlerT* getDeclHandler() const + { + return 0; + } // getDeclHandler + + virtual void setLexicalHandler(LexicalHandlerT& handler) + { + lexicalHandler_ = &handler; + } // setLexicalHandler + + virtual LexicalHandlerT* getLexicalHandler() const + { + return (lexicalHandler_ == this) ? 0 : lexicalHandler_; + } // getLexicalHandler + + virtual void parse(InputSourceT& input) + { + setup(); + + InputSourceResolver is(input, string_adaptor()); + if(is.resolve() == 0) + { + reportError("Could not resolve XML document", true); + return; + } // if(is.resolver() == 0) + + contentHandler_->startDocument(); + scanner_->resetDocumentLocator(string_adaptor::asStdString(input.getPublicId()), string_adaptor::asStdString(input.getSystemId())); + + if(dynamic_cast(scanner_) != 0) + contentHandler_->setDocumentLocator(*(dynamic_cast(scanner_))); + + if(schema_->getURI() != "") + contentHandler_->startPrefixMapping(S(schema_->getPrefix()), + S(schema_->getURI())); + scanner_->scan(*is.resolve(), *this); + } // parse + +private: + // Sets up instance variables that haven't been set by setFeature + void setup() + { + if(schema_ && ownSchema_) + { + delete schema_; + schema_ = 0; + } // if ... + if(schema_ == 0) + { + schema_ = new HTMLSchema(); + ownSchema_ = true; + } // if ... + + if(scanner_ && ownScanner_) + { + delete scanner_; + scanner_ = 0; + } // if ... + if(scanner_ == 0) + { + scanner_ = new HTMLScanner(); + ownScanner_ = true; + } // if ... + + stack_ = Element(schema_->getElementType(""), defaultAttributes); + pcdata_ = Element(schema_->getElementType(""), defaultAttributes); + + newElement_ = Element::Null; + attributeName_ = ""; + piTarget_ = ""; + saved_ = Element::Null; + entity_ = 0; + virginStack = true; + doctypeName_ = doctypePublicId_ = doctypeSystemId_ = ""; + } // setup + + /////////////////////////////////////////////////////// + // ScanHandler implementation + virtual void adup(const std::string& buff) + { + // std::cerr << "adup(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ == Element::Null || attributeName_ == "") + return; + newElement_.setAttribute(attributeName_, "", attributeName_); + attributeName_ = ""; + } // adup + + virtual void aname(const std::string& buff) + { + // std::cerr << "aname(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ == Element::Null) + return; + // Currently we don't rely on Schema to canonicalize + // attribute names. + attributeName_ = lower_case(makeName(buff)); + } // aname + + virtual void aval(const std::string& buff) + { + // std::cerr << "aval(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ == Element::Null || attributeName_ == "") + return; + std::string value = expandEntities(buff); + newElement_.setAttribute(attributeName_, "", value); + attributeName_ = ""; + } // aval + + // Expand entity references in attribute values selectively. + // Currently we expand a reference iff it is properly terminated + // with a semicolon. + std::string expandEntities(std::string src) + { + size_t refStart = std::string::npos; + std::string dst; + for(std::string::const_iterator i = src.begin(), ie = src.end(); i != ie; ++i) + { + char ch = *i; + dst.push_back(ch); + if(ch == '&' && refStart == -1) + { + // start of a ref excluding & + refStart = dst.length(); + } + else if(refStart == std::string::npos) + { + // not in a ref + } + else if(Arabica::XML::is_letter_or_digit(ch) || ch == '#') + { + // valid entity char + } + else if(ch == ';') + { + // properly terminated ref + int ent = lookupEntity(dst.substr(refStart, dst.size() - refStart - 1)); + if(ent > 0xFFFF) + { + ent -= 0x10000; + dst[refStart - 1] = (char)((ent>>10) + 0xD800); + dst[refStart] = (char)((ent&0x3FF) + 0xDC00); + dst.erase(refStart + 1); + } + else if(ent != 0) + { + dst[refStart - 1] = (char)ent; + dst.erase(refStart); + } + refStart = std::string::npos; + } + else + { + // improperly terminated ref + refStart = std::string::npos; + } // if ... + } // for ... + return std::string(dst, 0, dst.size()); + } // expandEntities + + virtual void entity(const std::string& buff) + { + entity_ = lookupEntity(buff); + } // entity + + // Process numeric character references, + // deferring to the schema for named ones. + int lookupEntity(const std::string& buff) + { + int result = 0; + if(buff.length() < 1) + return result; + + if(buff[0] == '#') + { + const char* b = buff.c_str(); + char* end; + if(buff.length() > 1 && (buff[1] == 'x' || buff[1] == 'X')) + return strtol(b + 2, &end, 16); + return strtol(b + 1, &end, 10); + } + return schema_->getEntity(buff); + } // lookupEntity + + virtual void eof(const std::string& buff) + { + if(virginStack) + rectify(pcdata_); + while (stack_.next() != Element::Null) + { + pop(); + } + if(schema_->getURI() != "") + contentHandler_->endPrefixMapping(S(schema_->getPrefix())); + contentHandler_->endDocument(); + } // eof + + virtual void etag(const std::string& buff) + { + // std::cerr << "etag(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(etag_cdata(buff)) + return; + etag_basic(buff); + } // etag + + bool etag_cdata(const std::string& buff) + { + std::string currentName = stack_.name(); + // If this is a CDATA element and the tag doesn't match, + // or isn't properly formed (junk after the name), + // restart CDATA mode and process the tag as characters. + if(CDATAElements && (stack_.flags() & Schema::F_CDATA) != 0) + { + bool realTag = (buff.length() == currentName.length()); + if(realTag) + { + std::string buffl = lower_case(buff); + std::string currentl = lower_case(currentName); + for (size_t i = 0; i < buffl.length(); ++i) + { + if(buffl[i] != currentl[i]) + { + realTag = false; + break; + } // if ... + } // for ... + } // if ... + if(!realTag) + { + contentHandler_->characters(S("characters(S(buff)); + contentHandler_->characters(S(">")); + scanner_->startCDATA(); + return true; + } // if ... + } // if ... + return false; + } // etag_cdata + + void etag_basic(const std::string& buff) + { + newElement_ = Element::Null; + std::string name; + if(!buff.empty()) + { + // Canonicalize case of name + name = makeName(buff); + ElementType& type = schema_->getElementType(name); + if(type == ElementType::Null) + return; // mysterious end-tag + name = type.name(); + } + else + { + name = stack_.name(); + } + + Element sp; + bool inNoforce = false; + for (sp = stack_; sp != Element::Null; sp = sp.next()) + { + if(sp.name() == name) + break; + if((sp.flags() & Schema::F_NOFORCE) != 0) + inNoforce = true; + } // for ... + + if(sp == Element::Null) + return; // Ignore unknown etags + if(sp.next() == Element::Null || sp.next().next() == Element::Null) + return; + if(inNoforce) + { // inside an F_NOFORCE element? + sp.preclose(); // preclose the matching element + } + else + { // restartably pop everything above us + while (stack_ != sp) + restartablyPop(); + pop(); + } + // pop any preclosed elements now at the top + while (stack_.isPreclosed()) + pop(); + restart(Element::Null); + } // etag_basic + + // Push restartables on the stack if possible + // e is the next element to be started, if we know what it is + void restart(Element e) + { + while (saved_ != Element::Null && stack_.canContain(saved_) && + (e == Element::Null || saved_.canContain(e))) + { + Element next = saved_.next(); + push(saved_); + saved_ = next; + } // while ... + } // restart + + // Pop the stack irrevocably + void pop() + { + if(stack_ == Element::Null) + return; // empty stack + std::string name = stack_.name(); + std::string localName = stack_.localName(); + std::string namespaceName = stack_.namespaceName(); + std::string prefix = prefixOf(name); + + if(!namespaces) + namespaceName = localName = ""; + contentHandler_->endElement(S(namespaceName), + S(localName), + S(name)); + if(foreign(prefix, namespaceName)) + contentHandler_->endPrefixMapping(S(prefix)); + + const Attributes& atts = stack_.atts(); + for (int i = atts.getLength() - 1; i >= 0; i--) + { + std::string attNamespace = atts.getURI(i); + std::string attPrefix = prefixOf(atts.getQName(i)); + if(foreign(attPrefix, attNamespace)) + contentHandler_->endPrefixMapping(S(attPrefix)); + } // for ... + stack_ = stack_.next(); + } // pop + + // Pop the stack restartably + void restartablyPop() + { + Element popped = stack_; + pop(); + if(restartElements && (popped.flags() & Schema::F_RESTART) != 0) + { + popped.anonymize(); + popped.setNext(saved_); + saved_ = popped; + } // if ... + } // restartablyPop + + // Push element onto stack + void push(Element e) + { + std::string name = e.name(); + std::string localName = e.localName(); + std::string namespaceName = e.namespaceName(); + std::string prefix = prefixOf(name); + + e.clean(); + if(!namespaces) + namespaceName = localName = ""; + if(virginStack && (lower_case(localName) == lower_case(doctypeName_))) + entityResolver_->resolveEntity(S(doctypePublicId_), S(doctypeSystemId_)); + if(foreign(prefix, namespaceName)) + contentHandler_->startPrefixMapping(S(prefix), S(namespaceName)); + + AttributesImpl atts; + int len = e.atts().getLength(); + for (int i = 0; i != len; ++i) + { + std::string attNamespace = e.atts().getURI(i); + std::string attPrefix = prefixOf(e.atts().getQName(i)); + if(foreign(attPrefix, attNamespace)) + contentHandler_->startPrefixMapping(S(attPrefix), S(attNamespace)); + + atts.addAttribute(S(e.atts().getURI(i)), + S(e.atts().getLocalName(i)), + S(e.atts().getQName(i)), + S(e.atts().getType(i)), + S(e.atts().getValue(i))); + } // for ... + contentHandler_->startElement(S(namespaceName), S(localName), S(name), atts); + + e.setNext(stack_); + stack_ = e; + virginStack = false; + if(CDATAElements && (stack_.flags() & Schema::F_CDATA) != 0) + scanner_->startCDATA(); + } // push + + // Get the prefix from a QName + std::string prefixOf(std::string name) + { + size_t i = name.find(':'); + std::string prefix = ""; + if(i != std::string::npos) + prefix = name.substr(0, i); + return prefix; + } // prefixOf + + // Return true if we have a foreign name + bool foreign(std::string prefix, std::string namespaceName) + { + bool foreign = !((prefix == "") || (namespaceName == "") || (namespaceName == schema_->getURI())); + return foreign; + } // foreign + + /** + * Parsing the complete XML Document Type Definition is way too complex, + * but for many simple cases we can extract something useful from it. + * + * doctypedecl ::= '' + * DeclSep ::= PEReference | S + * intSubset ::= (markupdecl | DeclSep)* + * markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment + * ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral + */ + + virtual void decl(const std::string& buff) + { + // std::cerr << "decl(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + std::string name; + std::string systemid; + std::string publicid; + std::vector v = split(buff); + if((v.size() > 0) && ("DOCTYPE" == v[0])) + { + if(doctypeIsPresent_) + return; // one doctype only! + doctypeIsPresent_ = true; + if(v.size() > 1) + { + name = v[1]; + if(v.size()>3 && "SYSTEM" == v[2]) + { + systemid = v[3]; + } + else if(v.size() > 3 && "PUBLIC" == v[2]) + { + publicid = v[3]; + if(v.size() > 4) + { + systemid = v[4]; + } + else + { + systemid = ""; + } + } + } + } + publicid = trimquotes(publicid); + systemid = trimquotes(systemid); + if(name != "") + { + publicid = cleanPublicid(publicid); + lexicalHandler_->startDTD(S(name), S(publicid), S(systemid)); + lexicalHandler_->endDTD(); + doctypeName_ = name; + doctypePublicId_ = publicid; + if(dynamic_cast(scanner_)) + { // Must resolve systemid + doctypeSystemId_ = string_adaptor::asStdString(dynamic_cast(scanner_)->getSystemId()); + doctypeSystemId_ = Arabica::io::URI(doctypeSystemId_, systemid).as_string(); + } // if ... + } // if ... + } // decl + + // If the String is quoted, trim the quotes. + static std::string trimquotes(const std::string& in) + { + size_t length = in.length(); + if(length == 0) + return in; + char s = in[0]; + char e = in[length - 1]; + if(s == e && (s == '\'' || s == '"')) + return in.substr(1, length - 1); + return in; + } // trimquotes + + // Split the supplied String into words or phrases seperated by spaces. + // Recognises quotes around a phrase and doesn't split it. + static std::vector split(const std::string& val) + { + std::vector splits; + + std::string v = Arabica::text::normalize_whitespace >(val); + if(v.length() == 0) + { + splits.push_back(v); + return splits; + } + + size_t s = 0; + size_t e = 0; + bool sq = false; // single quote + bool dq = false; // double quote + char lastc = 0; + size_t len = v.length(); + for(e=0; e < len; ++e) + { + char c = v[e]; + if(!dq && c == '\'' && lastc != '\\') + { + sq = !sq; + if(s == std::string::npos) + s = e; + } + else if(!sq && c == '\"' && lastc != '\\') + { + dq = !dq; + if(s == std::string::npos) + s = e; + } + else if(!sq && !dq) + { + if(Arabica::XML::is_space(c)) + { + if(s >= 0) + splits.push_back(v.substr(s, e)); + s = std::string::npos; + } + else if(s == std::string::npos && c != ' ') + { + s = e; + } + } + lastc = c; + } // for ... + splits.push_back(v.substr(s, e)); + + return splits; + } // split + + // Replace junk in publicids with spaces + std::string cleanPublicid(const std::string& src) + { + std::string dst; + bool suppressSpace = true; + for(std::string::const_iterator i = src.begin(), ie = src.end(); i != ie; ++i) + { + if(legal.find(*i) != std::string::npos) + { + // legal but not whitespace + dst.push_back(*i); + suppressSpace = false; + } + else if(suppressSpace) + { // normalizable whitespace or junk + ; + } + else + { + dst.push_back(' '); + suppressSpace = true; + } + } + return dst; + } // cleanPublicId + + virtual void gi(const std::string& buff) + { + // std::cerr << "gi(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ != Element::Null) + return; + std::string name = makeName(buff); + if(name == "") + return; + ElementType* type = &schema_->getElementType(name); + if(*type == ElementType::Null) + { + // Suppress unknown elements if ignore-bogons is on + if(ignoreBogons) + return; + int bogonModel = bogonsEmpty ? Schema::M_EMPTY : Schema::M_ANY; + int bogonMemberOf = rootBogons ? Schema::M_ANY : (Schema::M_ANY & ~Schema::M_ROOT); + schema_->elementType(name, bogonModel, bogonMemberOf, 0); + if(!rootBogons) + schema_->parent(name, schema_->rootElementType().name()); + type = &schema_->getElementType(name); + } // if ... + + newElement_ = Element(*type, defaultAttributes); + } // gi + + virtual void cdsect(const std::string& buff) + { + // std::cerr << "cdsect(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + lexicalHandler_->startCDATA(); + pcdata(buff); + lexicalHandler_->endCDATA(); + } // cdsect + + virtual void pcdata(const std::string& buff) + { + // std::cerr << "pcdata(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(buff.empty()) + return; + bool allWhite = true; + for (std::string::const_iterator i = buff.begin(), ie = buff.end(); i != ie; ++i) + { + if(!Arabica::XML::is_space(*i)) + allWhite = false; + } // for ... + if(allWhite && !stack_.canContain(pcdata_)) + { + if(ignorableWhitespace) + contentHandler_->ignorableWhitespace(S(buff)); + } + else + { + rectify(pcdata_); + contentHandler_->characters(S(buff)); + } // if ... + } // pcdata + + virtual void pitarget(const std::string& buff) + { + // std::cerr << "pitarget(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ != Element::Null) + return; + std::string name = makeName(buff); + size_t colon = name.find(':'); + while(colon != std::string::npos) + { + name[colon] = '_'; + colon = name.find(':'); + } // while + piTarget_ = name; + } // pitarget + + virtual void pi(const std::string& buff) + { + // std::cerr << "pi(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ != Element::Null || piTarget_ == "") + return; + if("xml" == lower_case(piTarget_)) + return; + size_t length = buff.length(); + if((length > 0) && (buff[length - 1] == '?')) + length--; // remove trailing ? + contentHandler_->processingInstruction(S(piTarget_), + S(buff.substr(0, length))); + piTarget_ = ""; + } // pi + + virtual void stagc(const std::string& buff) + { + // std::cerr << "stagc(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ == Element::Null) + return; + rectify(newElement_); + if(stack_.model() == Schema::M_EMPTY) + { + // Force an immediate end tag + etag_basic(buff); + } // if ... + } // stagc + + virtual void stage(const std::string& buff) + { + // std::cerr << "stage(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + if(newElement_ == Element::Null) + return; + rectify(newElement_); + // Force an immediate end tag + etag_basic(buff); + } // stage + + // Comment buffer is twice the size of the output buffer + virtual void cmnt(const std::string& buff) + { + // std::cerr << "cmnt(\"" << buff.substr(offset, length) << "\", " << offset << ", " << length << ")" << std::endl; + lexicalHandler_->comment(S(buff)); + } // cmnt + + // Rectify the stack, pushing and popping as needed + // so that the argument can be safely pushed + void rectify(Element e) + { + Element sp; + while (true) + { + for (sp = stack_; sp != Element::Null; sp = sp.next()) + { + if(sp.canContain(e)) + break; + } // for ... + if(sp != Element::Null) + break; + ElementType& parentType = e.parent(); + if(parentType == ElementType::Null) + break; + Element parent = Element(parentType, defaultAttributes); + parent.setNext(e); + e = parent; + } // while ... + if(sp == Element::Null) + return; // don't know what to do + while (stack_ != sp) + { + if(stack_ == Element::Null || stack_.next() == Element::Null || stack_.next().next() == Element::Null) + break; + restartablyPop(); + } // while ... + while (e != Element::Null) + { + Element nexte = e.next(); + if(e.name() != "") + push(e); + e = nexte; + restart(e); + } // while ... + newElement_ = Element::Null; + } // rectify + + virtual int getEntity() + { + return entity_; + } // getEntity + + // Return the argument as a valid XML name + // This no longer lowercases the result: we depend on Schema to + // canonicalize case. + std::string makeName(const std::string& buff) + { + std::string dst; + bool seenColon = false; + bool start = true; +// String src = new String(buff, offset, length); // DEBUG + for(std::string::const_iterator ch = buff.begin(), che = buff.end(); ch != che; ++ch) + { + if(Arabica::XML::is_letter(*ch) || *ch == '_') + { + start = false; + dst.push_back(*ch); + } + else if(Arabica::XML::is_digit(*ch) || *ch == '-' || *ch == '.') + { + if(start) + dst.push_back('_'); + start = false; + dst.push_back(*ch); + } + else if(*ch == ':' && !seenColon) + { + seenColon = true; + if(start) + dst.push_back('_'); + start = true; + dst.push_back(translateColons ? '_' : *ch); + } + } // for ... + size_t dstLength = dst.length(); + if(dstLength == 0 || dst[dstLength - 1] == ':') + dst.push_back('_'); + return dst; + } // makeName + + static std::string lower_case(const std::string& str) + { + std::string lower; + std::transform(str.begin(), str.end(), std::back_inserter(lower), (int(*)(int))std::tolower); + return lower; + } // lower_case + + void reportError(const std::string& message, bool fatal) + { + SAXParseException e(message, + S(""), + S(""), + -1, + -1); + if(fatal) + errorHandler_->fatalError(e); + else + errorHandler_->error(e); + } // reportError + +public: + static string_type S(const std::string& s) + { + return string_adaptor::construct_from_utf8(s.c_str()); + } // S + + static string_type S(const char* s) + { + return string_adaptor::construct_from_utf8(s); + } // S +}; // class Taggle + +template +bool Taggle::DEFAULT_NAMESPACES = true; +template +bool Taggle::DEFAULT_IGNORE_BOGONS = false; +template +bool Taggle::DEFAULT_BOGONS_EMPTY = false; +template +bool Taggle::DEFAULT_ROOT_BOGONS = true; +template +bool Taggle::DEFAULT_DEFAULT_ATTRIBUTES = true; +template +bool Taggle::DEFAULT_TRANSLATE_COLONS = false; +template +bool Taggle::DEFAULT_RESTART_ELEMENTS = true; +template +bool Taggle::DEFAULT_IGNORABLE_WHITESPACE = false; +template +bool Taggle::DEFAULT_CDATA_ELEMENTS = true; + +template +const string_type Taggle::namespacesFeature = Taggle::S("http://xml.org/sax/features/namespaces"); +template +const string_type Taggle::namespacePrefixesFeature = Taggle::S("http://xml.org/sax/features/namespace-prefixes"); +template +const string_type Taggle::externalGeneralEntitiesFeature = Taggle::S("http://xml.org/sax/features/external-general-entities"); +template +const string_type Taggle::externalParameterEntitiesFeature = Taggle::S("http://xml.org/sax/features/external-parameter-entities"); +template +const string_type Taggle::isStandaloneFeature = Taggle::S("http://xml.org/sax/features/is-standalone"); +template +const string_type Taggle::lexicalHandlerParameterEntitiesFeature = Taggle::S("http://xml.org/sax/features/lexical-handler/parameter-entities"); +template +const string_type Taggle::resolveDTDURIsFeature = Taggle::S("http://xml.org/sax/features/resolve-dtd-uris"); +template +const string_type Taggle::stringInterningFeature = Taggle::S("http://xml.org/sax/features/string-interning"); +template +const string_type Taggle::useAttributes2Feature = Taggle::S("http://xml.org/sax/features/use-attributes2"); +template +const string_type Taggle::useLocator2Feature = Taggle::S("http://xml.org/sax/features/use-locator2"); +template +const string_type Taggle::useEntityResolver2Feature = Taggle::S("http://xml.org/sax/features/use-entity-resolver2"); +template +const string_type Taggle::validationFeature = Taggle::S("http://xml.org/sax/features/validation"); +template +const string_type Taggle::unicodeNormalizationCheckingFeature = Taggle::S("http://xml.org/sax/features/unicode-normalization-checking"); +template +const string_type Taggle::xmlnsURIsFeature = Taggle::S("http://xml.org/sax/features/xmlns-uris"); +template +const string_type Taggle::XML11Feature = Taggle::S("http://xml.org/sax/features/xml-1.1"); +template +const string_type Taggle::ignoreBogonsFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons"); +template +const string_type Taggle::bogonsEmptyFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/bogons-empty"); +template +const string_type Taggle::rootBogonsFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/root-bogons"); +template +const string_type Taggle::defaultAttributesFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/default-attributes"); +template +const string_type Taggle::translateColonsFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/translate-colons"); +template +const string_type Taggle::restartElementsFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/restart-elements"); +template +const string_type Taggle::ignorableWhitespaceFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace"); +template +const string_type Taggle::CDATAElementsFeature = Taggle::S("http://www.ccil.org/~cowan/tagsoup/features/cdata-elements"); +template +const string_type Taggle::lexicalHandlerProperty = Taggle::S("http://xml.org/sax/properties/lexical-handler"); +template +const string_type Taggle::scannerProperty = Taggle::S("http://www.ccil.org/~cowan/tagsoup/properties/scanner"); +template +const string_type Taggle::schemaProperty = Taggle::S("http://www.ccil.org/~cowan/tagsoup/properties/schema"); +template +const string_type Taggle::autoDetectorProperty = Taggle::S("http://www.ccil.org/~cowan/tagsoup/properties/auto-detector"); + +template +const string_type Taggle::legal = + Taggle::S("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%"); + +} // namespace SAX + +} // namespace Arabica +#endif + diff --git a/include/Taggle/impl/ScanHandler.hpp b/include/Taggle/impl/ScanHandler.hpp new file mode 100755 index 00000000..b11b37d3 --- /dev/null +++ b/include/Taggle/impl/ScanHandler.hpp @@ -0,0 +1,105 @@ +#ifndef ARABICA_SCAN_HANDLER_HPP +#define ARABICA_SCAN_HANDLER_HPP + +#include + +namespace Arabica +{ + +namespace SAX +{ + +/** + An interface that Scanners use to report events in the input stream. + + This code is derived from John Cowan's splendid TagSoup package +*/ +class ScanHandler +{ +public: + /** + Reports an attribute name without a value. + **/ + virtual void adup(const std::string& buff) = 0; + + /** + Reports an attribute name; a value will follow. + **/ + virtual void aname(const std::string& buff) = 0; + + /** + Reports an attribute value. + **/ + virtual void aval(const std::string& buff) = 0; + + /** + * Reports the content of a CDATA section (not a CDATA element) + */ + virtual void cdsect(const std::string& buff) = 0; + + /** + * Reports a declaration - typically a DOCTYPE + */ + virtual void decl(const std::string& buff) = 0; + + /** + Reports an entity reference or character reference. + **/ + virtual void entity(const std::string& buff) = 0; + + /** + Reports EOF. + **/ + virtual void eof(const std::string& buff) = 0; + + /** + Reports an end-tag. + **/ + virtual void etag(const std::string& buff) = 0; + + /** + Reports the general identifier (element type name) of a start-tag. + **/ + virtual void gi(const std::string& buff) = 0; + + /** + Reports character content. + **/ + virtual void pcdata(const std::string& buff) = 0; + + /** + Reports the data part of a processing instruction. + **/ + virtual void pi(const std::string& buff) = 0; + + /** + Reports the target part of a processing instruction. + **/ + virtual void pitarget(const std::string& buff) = 0; + + /** + Reports the close of a start-tag. + **/ + virtual void stagc(const std::string& buff) = 0; + + /** + Reports the close of an empty-tag. + **/ + virtual void stage(const std::string& buff) = 0; + + /** + Reports a comment. + **/ + virtual void cmnt(const std::string& buff) = 0; + + /** + Returns the value of the last entity or character reference reported. + **/ + virtual int getEntity() = 0; +}; // class ScanHandler + +} // namespace SAX + +} // namespace Arabica + +#endif diff --git a/include/Taggle/impl/Scanner.hpp b/include/Taggle/impl/Scanner.hpp new file mode 100755 index 00000000..e38d6174 --- /dev/null +++ b/include/Taggle/impl/Scanner.hpp @@ -0,0 +1,45 @@ +#ifndef ARABICA_SAX_SCANNER_HPP +#define ARABICA_SAX_SCANNER_HPP + +#include +#include + +namespace Arabica +{ +namespace SAX +{ + +class ScanHandler; + +/** + An interface allowing Parser to invoke scanners. + + This code is derived from John Cowan's splendid TagSoup package +*/ +class Scanner +{ +public: + /** + Invoke a scanner. + @param r A source of characters to scan + @param h A ScanHandler to report events to + **/ + virtual void scan(std::istream& r, ScanHandler& h) = 0; + + /** + Reset the embedded locator. + @param publicid The publicid of the source + @param systemid The systemid of the source + **/ + virtual void resetDocumentLocator(const std::string& publicid, const std::string& systemid) = 0; + + /** + Signal to the scanner to start CDATA content mode. + **/ + virtual void startCDATA() = 0; +}; // Scanner + + +} // namespace SAX +} // namespace Arabica +#endif diff --git a/include/Taggle/impl/Schema.hpp b/include/Taggle/impl/Schema.hpp new file mode 100644 index 00000000..ed2971ff --- /dev/null +++ b/include/Taggle/impl/Schema.hpp @@ -0,0 +1,44 @@ +#ifndef ARABICA_SAX_TAGGLE_SCHEMA_HPP +#define ARABICA_SAX_TAGGLE_SCHEMA_HPP + +namespace Arabica +{ +namespace SAX +{ + +class ElementType; + +/** +Abstract class representing a TSSL schema. +Actual TSSL schemas are compiled into concrete subclasses of this class. + +Based on code from John Cowan's super TagSoup package +**/ +class Schema +{ +public: + static const int M_ANY = 0xFFFFFFFF; + static const int M_EMPTY = 0; + static const int M_PCDATA = 1 << 30; + static const int M_ROOT = 1 << 31; + + static const int F_RESTART = 1; + static const int F_CDATA = 2; + static const int F_NOFORCE = 4; + + virtual void elementType(const std::string& name, int model, int memberOf, int flags) = 0; + virtual ElementType& rootElementType() = 0; + virtual void parent(std::string name, std::string parentName) = 0; + + virtual ElementType& getElementType(const std::string& name) = 0; + virtual int getEntity(const std::string& name) const = 0; + virtual const std::string& getURI() const = 0; + virtual const std::string& getPrefix() const = 0; + + virtual ~Schema() { } +}; // class Schema + +} // namespace SAX + +} // namespace Arabica +#endif diff --git a/include/Taggle/impl/SchemaImpl.hpp b/include/Taggle/impl/SchemaImpl.hpp new file mode 100644 index 00000000..6e35994c --- /dev/null +++ b/include/Taggle/impl/SchemaImpl.hpp @@ -0,0 +1,182 @@ +#ifndef ARABICA_SAX_TAGGLE_SCHEMAIMPL_HPP +#define ARABICA_SAX_TAGGLE_SCHEMAIMPL_HPP + +#include +#include +#include +#include +#include "ElementType.hpp" +#include "Schema.hpp" + +namespace Arabica +{ +namespace SAX +{ + +/** +Abstract class representing a TSSL schema. +Actual TSSL schemas are compiled into concrete subclasses of this class. + +Based on code from John Cowan's super TagSoup package +**/ +class SchemaImpl : public Schema +{ +private: + std::map entities_; + std::map elementTypes_; + + std::string URI_; + std::string prefix_; + ElementType* root_; + +public: + virtual ~SchemaImpl() + { + for(std::map::iterator i = elementTypes_.begin(), ie = elementTypes_.end(); i != ie; ++i) + delete i->second; + } // ~SchemaImpl + + /** + Add or replace an element type for this schema. + @param name Name (Qname) of the element + @param model Models of the element's content as a vector of bits + @param memberOf Models the element is a member of as a vector of bits + @param flags Flags for the element + **/ + void elementType(const std::string& name, int model, int memberOf, int flags) + { + ElementType* e = new ElementType(name, model, memberOf, flags, *this); + std::string lname = lower_case(name); + elementTypes_[lname] = e; + if(memberOf == M_ROOT) + root_ = elementTypes_[lname]; + } // elementType + + /** + Get the root element of this schema + **/ + ElementType& rootElementType() + { + return *root_; + } // rootElementType + + /** + Add or replace a default attribute for an element type in this schema. + @param elemName Name (Qname) of the element type + @param attrName Name (Qname) of the attribute + @param type Type of the attribute + @param value Default value of the attribute; null if no default + **/ + void attribute(const std::string& elemName, const std::string& attrName, const std::string& type, const std::string& value) + { + ElementType& e = getElementType(elemName); + if (e == ElementType::Null) + { + throw std::runtime_error("Attribute " + attrName + + " specified for unknown element type " + + elemName); + } + e.setAttribute(attrName, type, value); + } // attribute + + /** + Specify natural parent of an element in this schema. + @param name Name of the child element + @param parentName Name of the parent element + **/ + void parent(std::string name, std::string parentName) + { + ElementType& child = getElementType(name); + ElementType& parent = getElementType(parentName); + if (child == ElementType::Null) + { + throw std::runtime_error("No child " + name + " for parent " + parentName); + } + if (parent == ElementType::Null) + { + throw std::runtime_error("No parent " + parentName + " for child " + name); + } + child.setParent(parent); + } // parent + + /** + Add to or replace a character entity in this schema. + @param name Name of the entity + @param value Value of the entity + **/ + void entity(const std::string& name, int value) + { + entities_[name] = value; + } // entity + + /** + Get an ElementType by name. + @param name Name (Qname) of the element type + @return The corresponding ElementType + **/ + ElementType& getElementType(const std::string& name) + { + std::map::iterator elemType = elementTypes_.find(lower_case(name)); + if(elemType == elementTypes_.end()) + return ElementType::Null; + return *elemType->second; + } // getElementType + + /** + Get an entity value by name. + @param name Name of the entity + @return The corresponding character, or 0 if none + **/ + int getEntity(const std::string& name) const + { + std::map::const_iterator ent = entities_.find(name); + if(ent == entities_.end()) + return 0; + return ent->second; + } // getEntity + + /** + Return the URI (namespace name) of this schema. + **/ + const std::string& getURI() const + { + return URI_; + } // getURI + + /** + Return the prefix of this schema. + **/ + const std::string& getPrefix() const + { + return prefix_; + } // getPrefix + + /** + Change the URI (namespace name) of this schema. + **/ + void setURI(std::string uri) + { + URI_ = uri; + } // setURI + + /** + Change the prefix of this schema. + **/ + void setPrefix(std::string prefix) + { + prefix_ = prefix; + } // setPrefix + +private: + static std::string lower_case(const std::string& str) + { + std::string lower; + std::transform(str.begin(), str.end(), std::back_inserter(lower), (int(*)(int))std::tolower); + return lower; + } // lower_case +}; // class Schema + +} // namespace SAX + +} // namespace Arabica +#endif diff --git a/include/Taggle/impl/html/HTMLModels.hpp b/include/Taggle/impl/html/HTMLModels.hpp new file mode 100644 index 00000000..360fd40d --- /dev/null +++ b/include/Taggle/impl/html/HTMLModels.hpp @@ -0,0 +1,49 @@ +#ifndef ARABICA_SAX_TAGGLE_HTML_MODELS_HPP +#define ARABICA_SAX_TAGGLE_HTML_MODELS_HPP + +namespace Arabica +{ + +namespace SAX +{ + +/** +This interface contains generated constants representing HTML content +models. Logically, it is part of HTMLSchema, but it is more +convenient to generate the constants into a separate interface. + +Based on code from John Cowan's super TagSoup package +*/ +class HTMLModels +{ +protected: + // Start of model definitions + static const int M_AREA = 1 << 1; + static const int M_BLOCK = 1 << 2; + static const int M_BLOCKINLINE = 1 << 3; + static const int M_BODY = 1 << 4; + static const int M_CELL = 1 << 5; + static const int M_COL = 1 << 6; + static const int M_DEF = 1 << 7; + static const int M_FORM = 1 << 8; + static const int M_FRAME = 1 << 9; + static const int M_HEAD = 1 << 10; + static const int M_HTML = 1 << 11; + static const int M_INLINE = 1 << 12; + static const int M_LEGEND = 1 << 13; + static const int M_LI = 1 << 14; + static const int M_NOLINK = 1 << 15; + static const int M_OPTION = 1 << 16; + static const int M_OPTIONS = 1 << 17; + static const int M_P = 1 << 18; + static const int M_PARAM = 1 << 19; + static const int M_TABLE = 1 << 20; + static const int M_TABULAR = 1 << 21; + static const int M_TR = 1 << 22; +}; // namespace HTMLModels + +} // namespace SAX + +} // namespace Arabica +#endif + diff --git a/include/Taggle/impl/html/HTMLScanner.hpp b/include/Taggle/impl/html/HTMLScanner.hpp new file mode 100644 index 00000000..0dba84d4 --- /dev/null +++ b/include/Taggle/impl/html/HTMLScanner.hpp @@ -0,0 +1,704 @@ +#ifndef ARABICA_SAX_TAGGLE_HTML_SCANNER_HPP +#define ARABICA_SAX_TAGGLE_HTML_SCANNER_HPP + +#include +#include +#include +#include "../Scanner.hpp" + +namespace Arabica +{ + +namespace SAX +{ + +/** +This class implements a table-driven scanner for HTML, allowing for lots of +defects. It implements the Scanner interface, which accepts a Reader +object to fetch characters from and a ScanHandler object to report lexical +events to. + +Based on code from John Cowan's super TagSoup package +*/ +class HTMLScanner : public Scanner, public SAX::Locator +{ +private: + // Start of state table + static const int S_ANAME = 1; + static const int S_APOS = 2; + static const int S_AVAL = 3; + static const int S_BB = 4; + static const int S_BBC = 5; + static const int S_BBCD = 6; + static const int S_BBCDA = 7; + static const int S_BBCDAT = 8; + static const int S_BBCDATA = 9; + static const int S_CDATA = 10; + static const int S_CDATA2 = 11; + static const int S_CDSECT = 12; + static const int S_CDSECT1 = 13; + static const int S_CDSECT2 = 14; + static const int S_COM = 15; + static const int S_COM2 = 16; + static const int S_COM3 = 17; + static const int S_COM4 = 18; + static const int S_DECL = 19; + static const int S_DECL2 = 20; + static const int S_DONE = 21; + static const int S_EMPTYTAG = 22; + static const int S_ENT = 23; + static const int S_EQ = 24; + static const int S_ETAG = 25; + static const int S_GI = 26; + static const int S_NCR = 27; + static const int S_PCDATA = 28; + static const int S_PI = 29; + static const int S_PITARGET = 30; + static const int S_QUOT = 31; + static const int S_STAGC = 32; + static const int S_TAG = 33; + static const int S_TAGWS = 34; + static const int S_XNCR = 35; + static const int A_ADUP = 1; + static const int A_ADUP_SAVE = 2; + static const int A_ADUP_STAGC = 3; + static const int A_ANAME = 4; + static const int A_ANAME_ADUP = 5; + static const int A_ANAME_ADUP_STAGC = 6; + static const int A_AVAL = 7; + static const int A_AVAL_STAGC = 8; + static const int A_CDATA = 9; + static const int A_CMNT = 10; + static const int A_DECL = 11; + static const int A_EMPTYTAG = 12; + static const int A_ENTITY = 13; + static const int A_ENTITY_START = 14; + static const int A_ETAG = 15; + static const int A_GI = 16; + static const int A_GI_STAGC = 17; + static const int A_LT = 18; + static const int A_LT_PCDATA = 19; + static const int A_MINUS = 20; + static const int A_MINUS2 = 21; + static const int A_MINUS3 = 22; + static const int A_PCDATA = 23; + static const int A_PI = 24; + static const int A_PITARGET = 25; + static const int A_PITARGET_PI = 26; + static const int A_SAVE = 27; + static const int A_SKIP = 28; + static const int A_SP = 29; + static const int A_STAGC = 30; + static const int A_UNGET = 31; + static const int A_UNSAVE_PCDATA = 32; + static const int statetable[]; + static const std::string debug_actionnames[]; + static const std::string debug_statenames[]; + // End of state table + static const int WinCharMap[]; // Windows char map + static const std::string hexLetters; + + std::string publicId_; // Locator state + std::string systemId_; + int lastLine_; + int lastColumn_; + int currentLine_; + int currentColumn_; + + int state_; // Current state + int nextState_; // Next state + std::string outputBuffer_; // Output buffer + + // Compensate for bug in PushbackReader that allows + // pushing back EOF. + //void unread(PushbackReader r, int c) throws IOException { + // if (c != -1) r.unread(c); + // } + +public: + HTMLScanner() : + publicId_(), + systemId_(), + lastLine_(0), + lastColumn_(0), + currentLine_(0), + currentColumn_(0), + state_(0), + nextState_(0), + outputBuffer_() + { + outputBuffer_.reserve(200); + } // HTMLScanner + + // Locator implementation + int getLineNumber() const + { + return lastLine_; + } // getLineNumber + + int getColumnNumber() const + { + return lastColumn_; + } // getColumnNumber + + std::string getPublicId() const + { + return publicId_; + } // getPublicId + + std::string getSystemId() const + { + return systemId_; + } // getSystemId + + + // Scanner implementation + /** + Reset document locator, supplying systemid and publicid. + @param systemid System id + @param publicid Public id + */ + virtual void resetDocumentLocator(const std::string& publicid, const std::string& systemid) + { + publicId_ = publicid; + systemId_ = systemid; + lastLine_ = lastColumn_ = currentLine_ = currentColumn_ = 0; + } // resetDocumentLocator + + /** + Scan HTML source, reporting lexical events. + @param r0 Reader that provides characters + @param h ScanHandler that accepts lexical events. + */ + virtual void scan(std::istream& r, ScanHandler& h) + { + state_ = S_PCDATA; +/* PushbackReader r; + if (r0 instanceof PushbackReader) { + r = (PushbackReader)r0; + } + else if (r0 instanceof BufferedReader) { + r = new PushbackReader(r0); + } + else { + r = new PushbackReader(new BufferedReader(r0)); + } +*/ +// int firstChar = r.read(); // Remove any leading BOM +// if (firstChar != '\uFEFF') unread(r, firstChar); + + while (state_ != S_DONE) + { + int ch = r.get(); + + // Process control characters + if (ch >= 0x80 && ch <= 0x9F) + ch = WinCharMap[ch-0x80]; + + if (ch == '\r') + { + ch = r.get(); // expect LF next + if (ch != '\n') + { + r.unget(); + ch = '\n'; + } + } + + if (ch == '\n') + { + ++currentLine_; + currentColumn_ = 0; + } + else + { + ++currentColumn_; + } + + if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) + continue; + + // Search state table + int action = 0; + for (int i = 0; statetable[i] != -1; i += 4) + { + if (state_ != statetable[i]) + { + if (action != 0) + break; + continue; + } + if (statetable[i+1] == 0) + { + action = statetable[i+2]; + nextState_ = statetable[i+3]; + } + else if (statetable[i+1] == ch) + { + action = statetable[i+2]; + nextState_ = statetable[i+3]; + break; + } + } // for ... + + switch (action) + { + case 0: + { + std::ostringstream os; + os << "HTMLScanner can't cope with " << ch << " in state " << state_; + throw std::runtime_error(os.str()); + } + case A_ADUP: + h.adup(outputBuffer_); + outputBuffer_.clear(); + break; + case A_ADUP_SAVE: + h.adup(outputBuffer_); + outputBuffer_.clear(); + save(ch, h); + break; + case A_ADUP_STAGC: + h.adup(outputBuffer_); + outputBuffer_.clear(); + h.stagc(outputBuffer_); + break; + case A_ANAME: + h.aname(outputBuffer_); + outputBuffer_.clear(); + break; + case A_ANAME_ADUP: + h.aname(outputBuffer_); + outputBuffer_.clear(); + h.adup(outputBuffer_); + break; + case A_ANAME_ADUP_STAGC: + h.aname(outputBuffer_); + outputBuffer_.clear(); + h.adup(outputBuffer_); + h.stagc(outputBuffer_); + break; + case A_AVAL: + h.aval(outputBuffer_); + outputBuffer_.clear(); + break; + case A_AVAL_STAGC: + h.aval(outputBuffer_); + outputBuffer_.clear(); + h.stagc(outputBuffer_); + break; + case A_CDATA: + mark(); + // suppress the final "]]" in the buffer + if (outputBuffer_.size() > 1) + outputBuffer_.erase(outputBuffer_.size()-2); + h.pcdata(outputBuffer_); + outputBuffer_.clear(); + break; + case A_ENTITY_START: + h.pcdata(outputBuffer_); + outputBuffer_.clear(); + save(ch, h); + break; + case A_ENTITY: + { + mark(); + char ch1 = (char)ch; + // System.out.println("Got " + ch1 + " in state " + ((state_ == S_ENT) ? "S_ENT" : ((state_ == S_NCR) ? "S_NCR" : "UNK"))); + if (state_ == S_ENT && ch1 == '#') + { + nextState_ = S_NCR; + save(ch, h); + break; + } + else if (state_ == S_NCR && (ch1 == 'x' || ch1 == 'X')) + { + nextState_ = S_XNCR; + save(ch, h); + break; + } + else if (state_ == S_ENT && XML::is_letter_or_digit(ch1)) + { + save(ch, h); + break; + } + else if (state_ == S_NCR && XML::is_digit(ch1)) + { + save(ch, h); + break; + } + else if (state_ == S_XNCR && (XML::is_digit(ch1) || hexLetters.find(ch1) != std::string::npos)) + { + save(ch, h); + break; + } + + // The whole entity reference has been collected + h.entity(outputBuffer_.substr(1, outputBuffer_.size()-1)); + int ent = h.getEntity(); + if (ent != 0) + { + outputBuffer_.clear(); + if (ent >= 0x80 && ent <= 0x9F) + { + ent = WinCharMap[ent-0x80]; + } + if (ent < 0x20) + { + // Control becomes space + ent = 0x20; + } + else if (ent >= 0xD800 && ent <= 0xDFFF) + { + // Surrogates get dropped + ent = 0; + } + else if (ent <= 0xFFFF) + { + // BMP character + save(ent, h); + } + else + { + // Astral converted to two surrogates + ent -= 0x10000; + save((ent>>10) + 0xD800, h); + save((ent&0x3FF) + 0xDC00, h); + } + if (ch != ';') + { + r.unget(); + currentColumn_--; + } + } + else + { + r.unget(); + currentColumn_--; + } + nextState_ = S_PCDATA; + } // case A_ENTITY: + break; + case A_ETAG: + h.etag(outputBuffer_); + outputBuffer_.clear(); + break; + case A_DECL: + h.decl(outputBuffer_); + outputBuffer_.clear(); + break; + case A_GI: + h.gi(outputBuffer_); + outputBuffer_.clear(); + break; + case A_GI_STAGC: + h.gi(outputBuffer_); + outputBuffer_.clear(); + h.stagc(outputBuffer_); + break; + case A_LT: + mark(); + save('<', h); + save(ch, h); + break; + case A_LT_PCDATA: + mark(); + save('<', h); + h.pcdata(outputBuffer_); + outputBuffer_.clear(); + break; + case A_PCDATA: + mark(); + h.pcdata(outputBuffer_); + outputBuffer_.clear(); + break; + case A_CMNT: + mark(); + h.cmnt(outputBuffer_); + outputBuffer_.clear(); + break; + case A_MINUS3: + save('-', h); + save(' ', h); + break; + case A_MINUS2: + save('-', h); + save(' ', h); + // fall through into A_MINUS + case A_MINUS: + save('-', h); + save(ch, h); + break; + case A_PI: + mark(); + h.pi(outputBuffer_); + outputBuffer_.clear(); + break; + case A_PITARGET: + h.pitarget(outputBuffer_); + outputBuffer_.clear(); + break; + case A_PITARGET_PI: + h.pitarget(outputBuffer_); + outputBuffer_.clear(); + h.pi(outputBuffer_); + break; + case A_SAVE: + save(ch, h); + break; + case A_SKIP: + break; + case A_SP: + save(' ', h); + break; + case A_STAGC: + h.stagc(outputBuffer_); + outputBuffer_.clear(); + break; + case A_EMPTYTAG: + mark(); + if (outputBuffer_.size() > 0) + h.gi(outputBuffer_); + outputBuffer_.clear(); + h.stage(outputBuffer_); + break; + case A_UNGET: + r.unget(); + currentColumn_--; + break; + case A_UNSAVE_PCDATA: + if (outputBuffer_.size() > 0) + outputBuffer_.erase(outputBuffer_.size()-1); + h.pcdata(outputBuffer_); + outputBuffer_.clear(); + break; + default: + throw std::runtime_error("Can't process state " + action); + } // switch ... + state_ = nextState_; + } // while (state_ != S_DONE) + h.eof(""); + } // scan + + /** + A callback for the ScanHandler that allows it to force + the lexer state to CDATA content (no markup is recognized except + the end of element. + */ + void startCDATA() + { + nextState_ = S_CDATA; + } // startCDATA + +private: + /** + * Mark the current scan position as a "point of interest" - start of a tag, + * cdata, processing instruction etc. + */ + void mark() + { + lastColumn_ = currentColumn_; + lastLine_ = currentLine_; + } // mark + + void save(int ch, ScanHandler& h) + { + if (outputBuffer_.size() >= outputBuffer_.capacity() - 20) + { + if (state_ == S_PCDATA || state_ == S_CDATA) + { + // Return a buffer-sized chunk of PCDATA + h.pcdata(outputBuffer_); + outputBuffer_.clear(); + } + } + outputBuffer_ += static_cast(ch); + } // save + + static std::string nicechar(int in) + { + if (in == '\n') + return "\\n"; + std::ostringstream os; + if(in >= 32) + os << '\'' << static_cast(in) << '\''; + else + os << std::hex << std::showbase << in; + return os.str(); + } // nicechar + + HTMLScanner(const HTMLScanner&); + bool operator==(const HTMLScanner&) const; + HTMLScanner& operator=(const HTMLScanner&); +}; // class HTMLScanner + +const int HTMLScanner::statetable[] = { + S_ANAME, '/', A_ANAME_ADUP, S_EMPTYTAG, + S_ANAME, '=', A_ANAME, S_AVAL, + S_ANAME, '>', A_ANAME_ADUP_STAGC, S_PCDATA, + S_ANAME, 0, A_SAVE, S_ANAME, + S_ANAME, -1, A_ANAME_ADUP_STAGC, S_DONE, + S_ANAME, ' ', A_ANAME, S_EQ, + S_ANAME, '\n', A_ANAME, S_EQ, + S_ANAME, '\t', A_ANAME, S_EQ, + S_APOS, '\'', A_AVAL, S_TAGWS, + S_APOS, 0, A_SAVE, S_APOS, + S_APOS, -1, A_AVAL_STAGC, S_DONE, + S_APOS, ' ', A_SP, S_APOS, + S_APOS, '\n', A_SP, S_APOS, + S_APOS, '\t', A_SP, S_APOS, + S_AVAL, '\'', A_SKIP, S_APOS, + S_AVAL, '"', A_SKIP, S_QUOT, + S_AVAL, '>', A_AVAL_STAGC, S_PCDATA, + S_AVAL, 0, A_SAVE, S_STAGC, + S_AVAL, -1, A_AVAL_STAGC, S_DONE, + S_AVAL, ' ', A_SKIP, S_AVAL, + S_AVAL, '\n', A_SKIP, S_AVAL, + S_AVAL, '\t', A_SKIP, S_AVAL, + S_BB, 'C', A_SKIP, S_BBC, + S_BB, 0, A_SKIP, S_DECL, + S_BB, -1, A_SKIP, S_DONE, + S_BBC, 'D', A_SKIP, S_BBCD, + S_BBC, 0, A_SKIP, S_DECL, + S_BBC, -1, A_SKIP, S_DONE, + S_BBCD, 'A', A_SKIP, S_BBCDA, + S_BBCD, 0, A_SKIP, S_DECL, + S_BBCD, -1, A_SKIP, S_DONE, + S_BBCDA, 'T', A_SKIP, S_BBCDAT, + S_BBCDA, 0, A_SKIP, S_DECL, + S_BBCDA, -1, A_SKIP, S_DONE, + S_BBCDAT, 'A', A_SKIP, S_BBCDATA, + S_BBCDAT, 0, A_SKIP, S_DECL, + S_BBCDAT, -1, A_SKIP, S_DONE, + S_BBCDATA, '[', A_SKIP, S_CDSECT, + S_BBCDATA, 0, A_SKIP, S_DECL, + S_BBCDATA, -1, A_SKIP, S_DONE, + S_CDATA, '<', A_SAVE, S_CDATA2, + S_CDATA, 0, A_SAVE, S_CDATA, + S_CDATA, -1, A_PCDATA, S_DONE, + S_CDATA2, '/', A_UNSAVE_PCDATA, S_ETAG, + S_CDATA2, 0, A_SAVE, S_CDATA, + S_CDATA2, -1, A_UNSAVE_PCDATA, S_DONE, + S_CDSECT, ']', A_SAVE, S_CDSECT1, + S_CDSECT, 0, A_SAVE, S_CDSECT, + S_CDSECT, -1, A_SKIP, S_DONE, + S_CDSECT1, ']', A_SAVE, S_CDSECT2, + S_CDSECT1, 0, A_SAVE, S_CDSECT, + S_CDSECT1, -1, A_SKIP, S_DONE, + S_CDSECT2, '>', A_CDATA, S_PCDATA, + S_CDSECT2, 0, A_SAVE, S_CDSECT, + S_CDSECT2, -1, A_SKIP, S_DONE, + S_COM, '-', A_SKIP, S_COM2, + S_COM, 0, A_SAVE, S_COM2, + S_COM, -1, A_CMNT, S_DONE, + S_COM2, '-', A_SKIP, S_COM3, + S_COM2, 0, A_SAVE, S_COM2, + S_COM2, -1, A_CMNT, S_DONE, + S_COM3, '-', A_SKIP, S_COM4, + S_COM3, 0, A_MINUS, S_COM2, + S_COM3, -1, A_CMNT, S_DONE, + S_COM4, '-', A_MINUS3, S_COM4, + S_COM4, '>', A_CMNT, S_PCDATA, + S_COM4, 0, A_MINUS2, S_COM2, + S_COM4, -1, A_CMNT, S_DONE, + S_DECL, '-', A_SKIP, S_COM, + S_DECL, '[', A_SKIP, S_BB, + S_DECL, '>', A_SKIP, S_PCDATA, + S_DECL, 0, A_SAVE, S_DECL2, + S_DECL, -1, A_SKIP, S_DONE, + S_DECL2, '>', A_DECL, S_PCDATA, + S_DECL2, 0, A_SAVE, S_DECL2, + S_DECL2, -1, A_SKIP, S_DONE, + S_EMPTYTAG, '>', A_EMPTYTAG, S_PCDATA, + S_EMPTYTAG, 0, A_SAVE, S_ANAME, + S_EMPTYTAG, ' ', A_SKIP, S_TAGWS, + S_EMPTYTAG, '\n', A_SKIP, S_TAGWS, + S_EMPTYTAG, '\t', A_SKIP, S_TAGWS, + S_ENT, 0, A_ENTITY, S_ENT, + S_ENT, -1, A_ENTITY, S_DONE, + S_EQ, '=', A_SKIP, S_AVAL, + S_EQ, '>', A_ADUP_STAGC, S_PCDATA, + S_EQ, 0, A_ADUP_SAVE, S_ANAME, + S_EQ, -1, A_ADUP_STAGC, S_DONE, + S_EQ, ' ', A_SKIP, S_EQ, + S_EQ, '\n', A_SKIP, S_EQ, + S_EQ, '\t', A_SKIP, S_EQ, + S_ETAG, '>', A_ETAG, S_PCDATA, + S_ETAG, 0, A_SAVE, S_ETAG, + S_ETAG, -1, A_ETAG, S_DONE, + S_ETAG, ' ', A_SKIP, S_ETAG, + S_ETAG, '\n', A_SKIP, S_ETAG, + S_ETAG, '\t', A_SKIP, S_ETAG, + S_GI, '/', A_SKIP, S_EMPTYTAG, + S_GI, '>', A_GI_STAGC, S_PCDATA, + S_GI, 0, A_SAVE, S_GI, + S_GI, -1, A_SKIP, S_DONE, + S_GI, ' ', A_GI, S_TAGWS, + S_GI, '\n', A_GI, S_TAGWS, + S_GI, '\t', A_GI, S_TAGWS, + S_NCR, 0, A_ENTITY, S_NCR, + S_NCR, -1, A_ENTITY, S_DONE, + S_PCDATA, '&', A_ENTITY_START, S_ENT, + S_PCDATA, '<', A_PCDATA, S_TAG, + S_PCDATA, 0, A_SAVE, S_PCDATA, + S_PCDATA, -1, A_PCDATA, S_DONE, + S_PI, '>', A_PI, S_PCDATA, + S_PI, 0, A_SAVE, S_PI, + S_PI, -1, A_PI, S_DONE, + S_PITARGET, '>', A_PITARGET_PI, S_PCDATA, + S_PITARGET, 0, A_SAVE, S_PITARGET, + S_PITARGET, -1, A_PITARGET_PI, S_DONE, + S_PITARGET, ' ', A_PITARGET, S_PI, + S_PITARGET, '\n', A_PITARGET, S_PI, + S_PITARGET, '\t', A_PITARGET, S_PI, + S_QUOT, '"', A_AVAL, S_TAGWS, + S_QUOT, 0, A_SAVE, S_QUOT, + S_QUOT, -1, A_AVAL_STAGC, S_DONE, + S_QUOT, ' ', A_SP, S_QUOT, + S_QUOT, '\n', A_SP, S_QUOT, + S_QUOT, '\t', A_SP, S_QUOT, + S_STAGC, '>', A_AVAL_STAGC, S_PCDATA, + S_STAGC, 0, A_SAVE, S_STAGC, + S_STAGC, -1, A_AVAL_STAGC, S_DONE, + S_STAGC, ' ', A_AVAL, S_TAGWS, + S_STAGC, '\n', A_AVAL, S_TAGWS, + S_STAGC, '\t', A_AVAL, S_TAGWS, + S_TAG, '!', A_SKIP, S_DECL, + S_TAG, '?', A_SKIP, S_PITARGET, + S_TAG, '/', A_SKIP, S_ETAG, + S_TAG, '<', A_SAVE, S_TAG, + S_TAG, 0, A_SAVE, S_GI, + S_TAG, -1, A_LT_PCDATA, S_DONE, + S_TAG, ' ', A_LT, S_PCDATA, + S_TAG, '\n', A_LT, S_PCDATA, + S_TAG, '\t', A_LT, S_PCDATA, + S_TAGWS, '/', A_SKIP, S_EMPTYTAG, + S_TAGWS, '>', A_STAGC, S_PCDATA, + S_TAGWS, 0, A_SAVE, S_ANAME, + S_TAGWS, -1, A_STAGC, S_DONE, + S_TAGWS, ' ', A_SKIP, S_TAGWS, + S_TAGWS, '\n', A_SKIP, S_TAGWS, + S_TAGWS, '\t', A_SKIP, S_TAGWS, + S_XNCR, 0, A_ENTITY, S_XNCR, + S_XNCR, -1, A_ENTITY, S_DONE, + -1, -1, -1, -1 +}; // HTMLScanner::statetable + +const std::string HTMLScanner::debug_actionnames[] = { "", "A_ADUP", "A_ADUP_SAVE", "A_ADUP_STAGC", "A_ANAME", "A_ANAME_ADUP", "A_ANAME_ADUP_STAGC", "A_AVAL", "A_AVAL_STAGC", "A_CDATA", "A_CMNT", "A_DECL", "A_EMPTYTAG", "A_ENTITY", "A_ENTITY_START", "A_ETAG", "A_GI", "A_GI_STAGC", "A_LT", "A_LT_PCDATA", "A_MINUS", "A_MINUS2", "A_MINUS3", "A_PCDATA", "A_PI", "A_PITARGET", "A_PITARGET_PI", "A_SAVE", "A_SKIP", "A_SP", "A_STAGC", "A_UNGET", "A_UNSAVE_PCDATA"}; +const std::string HTMLScanner::debug_statenames[] = { "", "S_ANAME", "S_APOS", "S_AVAL", "S_BB", "S_BBC", "S_BBCD", "S_BBCDA", "S_BBCDAT", "S_BBCDATA", "S_CDATA", "S_CDATA2", "S_CDSECT", "S_CDSECT1", "S_CDSECT2", "S_COM", "S_COM2", "S_COM3", "S_COM4", "S_DECL", "S_DECL2", "S_DONE", "S_EMPTYTAG", "S_ENT", "S_EQ", "S_ETAG", "S_GI", "S_NCR", "S_PCDATA", "S_PI", "S_PITARGET", "S_QUOT", "S_STAGC", "S_TAG", "S_TAGWS", "S_XNCR"}; + +const int HTMLScanner::WinCharMap[] = { // Windows chars map + 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD, + 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178 +}; // HTMLScanner::WinCharMap + +const std::string HTMLScanner::hexLetters = "abcdefABCDEF"; + +} // namespace SAX + +} // namespace Arabica + +#endif + diff --git a/include/Taggle/impl/html/HTMLSchema.hpp b/include/Taggle/impl/html/HTMLSchema.hpp new file mode 100644 index 00000000..3ac3f37a --- /dev/null +++ b/include/Taggle/impl/html/HTMLSchema.hpp @@ -0,0 +1,2955 @@ +#ifndef ARABICA_SAX_TAGGLE_HTML_SCHEMA_HPP +#define ARABICA_SAX_TAGGLE_HTML_SCHEMA_HPP + +#include "../SchemaImpl.hpp" +#include "HTMLModels.hpp" + +namespace Arabica +{ +namespace SAX +{ + +/** +This class provides a Schema that has been preinitialized with HTML +elements, attributes, and character entity declarations. All the declarations +normally provided with HTML 4.01 are given, plus some that are IE-specific +and NS4-specific. Attribute declarations of type CDATA with no default +value are not included. + +Based on code from John Cowan's super TagSoup package +*/ +class HTMLSchema : public SchemaImpl, private HTMLModels +{ +public: + /** + Returns a newly constructed HTMLSchema object independent of + any existing ones. + */ + HTMLSchema() + { + // Start of Schema calls + setURI("http://www.w3.org/1999/xhtml"); + setPrefix("html"); + + // the original Java method body was generated automatically, + // and was huge. unfortunately, that sent gcc into a spin, + // so I've had to break it up + + elementTypes(); + + parents(); + + attributes(); + + entities(); + + } // HTMLSchema + +private: + void elementTypes() + { + elementType("", M_EMPTY, M_PCDATA, 0); + elementType("", M_ROOT, M_EMPTY, 0); + elementType("a", M_PCDATA|M_NOLINK, M_INLINE, 0); + elementType("abbr", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("acronym", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("address", M_PCDATA|M_INLINE|M_P, M_BLOCK, 0); + elementType("applet", M_PCDATA|M_PARAM|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0); + elementType("area", M_EMPTY, M_AREA, 0); + elementType("b", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("base", M_EMPTY, M_HEAD, 0); + elementType("basefont", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("bdo", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("bgsound", M_EMPTY, M_HEAD, 0); + elementType("big", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("blink", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("blockquote", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("body", M_PCDATA|M_INLINE|M_BLOCK, M_HTML|M_BODY, 0); + elementType("br", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("button", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0); + elementType("canvas", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("caption", M_PCDATA|M_INLINE, M_TABULAR, 0); + elementType("center", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("cite", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("code", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("col", M_EMPTY, M_COL|M_TABULAR, 0); + elementType("colgroup", M_COL, M_TABULAR, 0); + elementType("comment", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("dd", M_PCDATA|M_INLINE|M_BLOCK, M_DEF, 0); + elementType("del", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_BLOCKINLINE|M_BLOCK, F_RESTART); + elementType("dfn", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("dir", M_LI, M_BLOCK, 0); + elementType("div", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("dl", M_DEF, M_BLOCK, 0); + elementType("dt", M_PCDATA|M_INLINE, M_DEF, 0); + elementType("em", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("fieldset", M_PCDATA|M_LEGEND|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("font", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("form", M_PCDATA|M_INLINE|M_NOLINK|M_BLOCK|M_TR|M_CELL, M_BLOCK|M_FORM, F_NOFORCE); + elementType("frame", M_EMPTY, M_FRAME, 0); + elementType("frameset", M_FRAME, M_FRAME|M_HTML, 0); + elementType("h1", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h2", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h3", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h4", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h5", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("h6", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("head", M_HEAD, M_HTML, 0); + elementType("hr", M_EMPTY, M_BLOCK, 0); + elementType("html", M_HTML, M_ROOT, 0); + elementType("i", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("iframe", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_NOLINK, 0); + elementType("img", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("input", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("ins", M_PCDATA|M_INLINE|M_BLOCK, M_INLINE|M_BLOCK, F_RESTART); + elementType("isindex", M_EMPTY, M_HEAD, 0); + elementType("kbd", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("label", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("legend", M_PCDATA|M_INLINE, M_LEGEND, 0); + elementType("li", M_PCDATA|M_INLINE|M_BLOCK, M_LI, 0); + elementType("link", M_EMPTY, M_HEAD|M_INLINE, 0); + elementType("listing", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("map", M_BLOCK|M_AREA, M_INLINE, 0); + elementType("marquee", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("menu", M_LI, M_BLOCK, 0); + elementType("meta", M_EMPTY, M_HEAD, 0); + elementType("nobr", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("noframes", M_BODY|M_BLOCK|M_INLINE, M_BLOCK|M_HTML|M_FRAME, 0); + elementType("noscript", M_PCDATA|M_INLINE|M_BLOCK, M_BLOCK, 0); + elementType("object", M_PCDATA|M_PARAM|M_INLINE|M_BLOCK, M_HEAD|M_INLINE|M_NOLINK, 0); + elementType("ol", M_LI, M_BLOCK, 0); + elementType("optgroup", M_OPTIONS, M_OPTIONS, 0); + elementType("option", M_PCDATA, M_OPTION|M_OPTIONS, 0); + elementType("p", M_PCDATA|M_INLINE|M_TABLE, M_BLOCK|M_P, 0); + elementType("param", M_EMPTY, M_PARAM, 0); + elementType("pre", M_PCDATA|M_INLINE, M_BLOCK, 0); + elementType("q", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rb", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rbc", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rp", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rt", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("rtc", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("ruby", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("s", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("samp", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("script", M_PCDATA, M_ANY & ~M_ROOT, F_CDATA); + elementType("select", M_OPTIONS, M_INLINE, 0); + elementType("small", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("span", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("strike", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("strong", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("style", M_PCDATA, M_HEAD|M_INLINE, F_CDATA); + elementType("sub", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("sup", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("table", M_FORM|M_TABULAR, M_BLOCK|M_TABLE, F_NOFORCE); + elementType("tbody", M_TR, M_TABULAR, 0); + elementType("td", M_PCDATA|M_INLINE|M_BLOCK, M_CELL, 0); + elementType("textarea", M_PCDATA, M_INLINE, 0); + elementType("tfoot", M_TR|M_FORM|M_CELL, M_TABULAR, 0); + elementType("th", M_PCDATA|M_INLINE|M_BLOCK, M_CELL, 0); + elementType("thead", M_TR|M_FORM|M_CELL, M_TABULAR, 0); + elementType("title", M_PCDATA, M_HEAD, 0); + elementType("tr", M_FORM|M_CELL, M_TR|M_TABULAR, 0); + elementType("tt", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("u", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, F_RESTART); + elementType("ul", M_LI, M_BLOCK, 0); + elementType("var", M_PCDATA|M_INLINE, M_INLINE|M_NOLINK, 0); + elementType("wbr", M_EMPTY, M_INLINE|M_NOLINK, 0); + elementType("xmp", M_PCDATA|M_INLINE, M_BLOCK, 0); + } // elementTypes + + void parents() + { + parent("", "body"); + parent("html", ""); + parent("a", "body"); + parent("abbr", "body"); + parent("acronym", "body"); + parent("address", "body"); + parent("applet", "body"); + parent("area", "map"); + parent("b", "body"); + parent("base", "head"); + parent("basefont", "body"); + parent("bdo", "body"); + parent("bgsound", "head"); + parent("big", "body"); + parent("blink", "body"); + parent("blockquote", "body"); + parent("body", "html"); + parent("br", "body"); + parent("button", "form"); + parent("canvas", "body"); + parent("caption", "table"); + parent("center", "body"); + parent("cite", "body"); + parent("code", "body"); + parent("col", "table"); + parent("colgroup", "table"); + parent("comment", "body"); + parent("dd", "dl"); + parent("del", "body"); + parent("dfn", "body"); + parent("dir", "body"); + parent("div", "body"); + parent("dl", "body"); + parent("dt", "dl"); + parent("em", "body"); + parent("fieldset", "form"); + parent("font", "body"); + parent("form", "body"); + parent("frame", "frameset"); + parent("frameset", "html"); + parent("h1", "body"); + parent("h2", "body"); + parent("h3", "body"); + parent("h4", "body"); + parent("h5", "body"); + parent("h6", "body"); + parent("head", "html"); + parent("hr", "body"); + parent("i", "body"); + parent("iframe", "body"); + parent("img", "body"); + parent("input", "form"); + parent("ins", "body"); + parent("isindex", "head"); + parent("kbd", "body"); + parent("label", "form"); + parent("legend", "fieldset"); + parent("li", "ul"); + parent("link", "head"); + parent("listing", "body"); + parent("map", "body"); + parent("marquee", "body"); + parent("menu", "body"); + parent("meta", "head"); + parent("nobr", "body"); + parent("noframes", "html"); + parent("noscript", "body"); + parent("object", "body"); + parent("ol", "body"); + parent("optgroup", "select"); + parent("option", "select"); + parent("p", "body"); + parent("param", "object"); + parent("pre", "body"); + parent("q", "body"); + parent("rb", "body"); + parent("rbc", "body"); + parent("rp", "body"); + parent("rt", "body"); + parent("rtc", "body"); + parent("ruby", "body"); + parent("s", "body"); + parent("samp", "body"); + parent("script", "html"); + parent("select", "form"); + parent("small", "body"); + parent("span", "body"); + parent("strike", "body"); + parent("strong", "body"); + parent("style", "head"); + parent("sub", "body"); + parent("sup", "body"); + parent("table", "body"); + parent("tbody", "table"); + parent("td", "tr"); + parent("textarea", "form"); + parent("tfoot", "table"); + parent("th", "tr"); + parent("thead", "table"); + parent("title", "head"); + parent("tr", "tbody"); + parent("tt", "body"); + parent("u", "body"); + parent("ul", "body"); + parent("var", "body"); + parent("wbr", "body"); + parent("xmp", "body"); + } // parents + + void attributes() + { + attributes_misc(); + attributes_class(); + attributes_dir(); + attributes_id(); + attributes_lang(); + } // attributes + + void attributes_misc() + { + attribute("a", "hreflang", "NMTOKEN", ""); + attribute("a", "shape", "CDATA", "rect"); + attribute("a", "tabindex", "NMTOKEN", ""); + attribute("applet", "align", "NMTOKEN", ""); + attribute("area", "nohref", "BOOLEAN", ""); + attribute("area", "shape", "CDATA", "rect"); + attribute("area", "tabindex", "NMTOKEN", ""); + attribute("br", "clear", "CDATA", "none"); + attribute("button", "disabled", "BOOLEAN", ""); + attribute("button", "tabindex", "NMTOKEN", ""); + attribute("button", "type", "CDATA", "submit"); + attribute("caption", "align", "NMTOKEN", ""); + attribute("col", "align", "NMTOKEN", ""); + attribute("col", "span", "CDATA", "1"); + attribute("col", "valign", "NMTOKEN", ""); + attribute("colgroup", "align", "NMTOKEN", ""); + attribute("colgroup", "span", "CDATA", "1"); + attribute("colgroup", "valign", "NMTOKEN", ""); + attribute("dir", "compact", "BOOLEAN", ""); + attribute("div", "align", "NMTOKEN", ""); + attribute("dl", "compact", "BOOLEAN", ""); + attribute("form", "enctype", "CDATA", "application/x-www-form-urlencoded"); + attribute("form", "method", "CDATA", "get"); + attribute("frame", "frameborder", "CDATA", "1"); + attribute("frame", "noresize", "BOOLEAN", ""); + attribute("frame", "scrolling", "CDATA", "auto"); + attribute("h1", "align", "NMTOKEN", ""); + attribute("h2", "align", "NMTOKEN", ""); + attribute("h3", "align", "NMTOKEN", ""); + attribute("h4", "align", "NMTOKEN", ""); + attribute("h5", "align", "NMTOKEN", ""); + attribute("h6", "align", "NMTOKEN", ""); + attribute("hr", "align", "NMTOKEN", ""); + attribute("hr", "noshade", "BOOLEAN", ""); + attribute("iframe", "align", "NMTOKEN", ""); + attribute("iframe", "frameborder", "CDATA", "1"); + attribute("iframe", "scrolling", "CDATA", "auto"); + attribute("img", "align", "NMTOKEN", ""); + attribute("img", "ismap", "BOOLEAN", ""); + attribute("input", "align", "NMTOKEN", ""); + attribute("input", "checked", "BOOLEAN", ""); + attribute("input", "disabled", "BOOLEAN", ""); + attribute("input", "ismap", "BOOLEAN", ""); + attribute("input", "maxlength", "NMTOKEN", ""); + attribute("input", "readonly", "BOOLEAN", ""); + attribute("input", "tabindex", "NMTOKEN", ""); + attribute("input", "type", "CDATA", "text"); + attribute("label", "for", "IDREF", ""); + attribute("legend", "align", "NMTOKEN", ""); + attribute("li", "value", "NMTOKEN", ""); + attribute("link", "hreflang", "NMTOKEN", ""); + attribute("marquee", "width", "NMTOKEN", ""); + attribute("menu", "compact", "BOOLEAN", ""); + attribute("meta", "http-equiv", "NMTOKEN", ""); + attribute("meta", "name", "NMTOKEN", ""); + attribute("object", "align", "NMTOKEN", ""); + attribute("object", "declare", "BOOLEAN", ""); + attribute("object", "tabindex", "NMTOKEN", ""); + attribute("ol", "compact", "BOOLEAN", ""); + attribute("ol", "start", "NMTOKEN", ""); + attribute("optgroup", "disabled", "BOOLEAN", ""); + attribute("option", "disabled", "BOOLEAN", ""); + attribute("option", "selected", "BOOLEAN", ""); + attribute("p", "align", "NMTOKEN", ""); + attribute("param", "valuetype", "CDATA", "data"); + attribute("pre", "width", "NMTOKEN", ""); + attribute("rt", "rbspan", "CDATA", "1"); + attribute("script", "defer", "BOOLEAN", ""); + attribute("select", "disabled", "BOOLEAN", ""); + attribute("select", "multiple", "BOOLEAN", ""); + attribute("select", "size", "NMTOKEN", ""); + attribute("select", "tabindex", "NMTOKEN", ""); + attribute("table", "align", "NMTOKEN", ""); + attribute("table", "frame", "NMTOKEN", ""); + attribute("table", "rules", "NMTOKEN", ""); + attribute("tbody", "align", "NMTOKEN", ""); + attribute("tbody", "valign", "NMTOKEN", ""); + attribute("td", "align", "NMTOKEN", ""); + attribute("td", "colspan", "CDATA", "1"); + attribute("td", "headers", "IDREFS", ""); + attribute("td", "nowrap", "BOOLEAN", ""); + attribute("td", "rowspan", "CDATA", "1"); + attribute("td", "scope", "NMTOKEN", ""); + attribute("td", "valign", "NMTOKEN", ""); + attribute("textarea", "cols", "NMTOKEN", ""); + attribute("textarea", "disabled", "BOOLEAN", ""); + attribute("textarea", "readonly", "BOOLEAN", ""); + attribute("textarea", "rows", "NMTOKEN", ""); + attribute("textarea", "tabindex", "NMTOKEN", ""); + attribute("tfoot", "align", "NMTOKEN", ""); + attribute("tfoot", "valign", "NMTOKEN", ""); + attribute("th", "align", "NMTOKEN", ""); + attribute("th", "colspan", "CDATA", "1"); + attribute("th", "headers", "IDREFS", ""); + attribute("th", "nowrap", "BOOLEAN", ""); + attribute("th", "rowspan", "CDATA", "1"); + attribute("th", "scope", "NMTOKEN", ""); + attribute("th", "valign", "NMTOKEN", ""); + attribute("thead", "align", "NMTOKEN", ""); + attribute("thead", "valign", "NMTOKEN", ""); + attribute("tr", "align", "NMTOKEN", ""); + attribute("tr", "valign", "NMTOKEN", ""); + attribute("ul", "compact", "BOOLEAN", ""); + attribute("ul", "type", "NMTOKEN", ""); + attribute("xmp", "width", "NMTOKEN", ""); + } // attributes_misc + + void attributes_class() + { + attribute("a", "class", "NMTOKEN", ""); + attribute("abbr", "class", "NMTOKEN", ""); + attribute("acronym", "class", "NMTOKEN", ""); + attribute("address", "class", "NMTOKEN", ""); + attribute("applet", "class", "NMTOKEN", ""); + attribute("area", "class", "NMTOKEN", ""); + attribute("b", "class", "NMTOKEN", ""); + attribute("base", "class", "NMTOKEN", ""); + attribute("basefont", "class", "NMTOKEN", ""); + attribute("bdo", "class", "NMTOKEN", ""); + attribute("bgsound", "class", "NMTOKEN", ""); + attribute("big", "class", "NMTOKEN", ""); + attribute("blink", "class", "NMTOKEN", ""); + attribute("blockquote", "class", "NMTOKEN", ""); + attribute("body", "class", "NMTOKEN", ""); + attribute("br", "class", "NMTOKEN", ""); + attribute("button", "class", "NMTOKEN", ""); + attribute("canvas", "class", "NMTOKEN", ""); + attribute("caption", "class", "NMTOKEN", ""); + attribute("center", "class", "NMTOKEN", ""); + attribute("cite", "class", "NMTOKEN", ""); + attribute("code", "class", "NMTOKEN", ""); + attribute("col", "class", "NMTOKEN", ""); + attribute("colgroup", "class", "NMTOKEN", ""); + attribute("comment", "class", "NMTOKEN", ""); + attribute("dd", "class", "NMTOKEN", ""); + attribute("del", "class", "NMTOKEN", ""); + attribute("dfn", "class", "NMTOKEN", ""); + attribute("dir", "class", "NMTOKEN", ""); + attribute("div", "class", "NMTOKEN", ""); + attribute("dl", "class", "NMTOKEN", ""); + attribute("dt", "class", "NMTOKEN", ""); + attribute("em", "class", "NMTOKEN", ""); + attribute("fieldset", "class", "NMTOKEN", ""); + attribute("font", "class", "NMTOKEN", ""); + attribute("form", "class", "NMTOKEN", ""); + attribute("frame", "class", "NMTOKEN", ""); + attribute("frameset", "class", "NMTOKEN", ""); + attribute("h1", "class", "NMTOKEN", ""); + attribute("h2", "class", "NMTOKEN", ""); + attribute("h3", "class", "NMTOKEN", ""); + attribute("h4", "class", "NMTOKEN", ""); + attribute("h5", "class", "NMTOKEN", ""); + attribute("h6", "class", "NMTOKEN", ""); + attribute("head", "class", "NMTOKEN", ""); + attribute("hr", "class", "NMTOKEN", ""); + attribute("html", "class", "NMTOKEN", ""); + attribute("i", "class", "NMTOKEN", ""); + attribute("iframe", "class", "NMTOKEN", ""); + attribute("img", "class", "NMTOKEN", ""); + attribute("input", "class", "NMTOKEN", ""); + attribute("ins", "class", "NMTOKEN", ""); + attribute("isindex", "class", "NMTOKEN", ""); + attribute("kbd", "class", "NMTOKEN", ""); + attribute("label", "class", "NMTOKEN", ""); + attribute("legend", "class", "NMTOKEN", ""); + attribute("li", "class", "NMTOKEN", ""); + attribute("link", "class", "NMTOKEN", ""); + attribute("listing", "class", "NMTOKEN", ""); + attribute("map", "class", "NMTOKEN", ""); + attribute("marquee", "class", "NMTOKEN", ""); + attribute("menu", "class", "NMTOKEN", ""); + attribute("meta", "class", "NMTOKEN", ""); + attribute("nobr", "class", "NMTOKEN", ""); + attribute("noframes", "class", "NMTOKEN", ""); + attribute("noscript", "class", "NMTOKEN", ""); + attribute("object", "class", "NMTOKEN", ""); + attribute("ol", "class", "NMTOKEN", ""); + attribute("optgroup", "class", "NMTOKEN", ""); + attribute("option", "class", "NMTOKEN", ""); + attribute("p", "class", "NMTOKEN", ""); + attribute("param", "class", "NMTOKEN", ""); + attribute("pre", "class", "NMTOKEN", ""); + attribute("q", "class", "NMTOKEN", ""); + attribute("rb", "class", "NMTOKEN", ""); + attribute("rbc", "class", "NMTOKEN", ""); + attribute("rp", "class", "NMTOKEN", ""); + attribute("rt", "class", "NMTOKEN", ""); + attribute("rtc", "class", "NMTOKEN", ""); + attribute("ruby", "class", "NMTOKEN", ""); + attribute("s", "class", "NMTOKEN", ""); + attribute("samp", "class", "NMTOKEN", ""); + attribute("script", "class", "NMTOKEN", ""); + attribute("select", "class", "NMTOKEN", ""); + attribute("small", "class", "NMTOKEN", ""); + attribute("span", "class", "NMTOKEN", ""); + attribute("strike", "class", "NMTOKEN", ""); + attribute("strong", "class", "NMTOKEN", ""); + attribute("style", "class", "NMTOKEN", ""); + attribute("sub", "class", "NMTOKEN", ""); + attribute("sup", "class", "NMTOKEN", ""); + attribute("table", "class", "NMTOKEN", ""); + attribute("tbody", "class", "NMTOKEN", ""); + attribute("td", "class", "NMTOKEN", ""); + attribute("textarea", "class", "NMTOKEN", ""); + attribute("tfoot", "class", "NMTOKEN", ""); + attribute("th", "class", "NMTOKEN", ""); + attribute("thead", "class", "NMTOKEN", ""); + attribute("title", "class", "NMTOKEN", ""); + attribute("tr", "class", "NMTOKEN", ""); + attribute("tt", "class", "NMTOKEN", ""); + attribute("u", "class", "NMTOKEN", ""); + attribute("ul", "class", "NMTOKEN", ""); + attribute("var", "class", "NMTOKEN", ""); + attribute("wbr", "class", "NMTOKEN", ""); + attribute("xmp", "class", "NMTOKEN", ""); + } // attributes_class + + void attributes_dir() + { + + attribute("a", "dir", "NMTOKEN", ""); + attribute("abbr", "dir", "NMTOKEN", ""); + attribute("acronym", "dir", "NMTOKEN", ""); + attribute("address", "dir", "NMTOKEN", ""); + attribute("applet", "dir", "NMTOKEN", ""); + attribute("area", "dir", "NMTOKEN", ""); + attribute("b", "dir", "NMTOKEN", ""); + attribute("base", "dir", "NMTOKEN", ""); + attribute("basefont", "dir", "NMTOKEN", ""); + attribute("bdo", "dir", "NMTOKEN", ""); + attribute("bgsound", "dir", "NMTOKEN", ""); + attribute("big", "dir", "NMTOKEN", ""); + attribute("blink", "dir", "NMTOKEN", ""); + attribute("blockquote", "dir", "NMTOKEN", ""); + attribute("body", "dir", "NMTOKEN", ""); + attribute("br", "dir", "NMTOKEN", ""); + attribute("button", "dir", "NMTOKEN", ""); + attribute("canvas", "dir", "NMTOKEN", ""); + attribute("caption", "dir", "NMTOKEN", ""); + attribute("center", "dir", "NMTOKEN", ""); + attribute("cite", "dir", "NMTOKEN", ""); + attribute("code", "dir", "NMTOKEN", ""); + attribute("col", "dir", "NMTOKEN", ""); + attribute("colgroup", "dir", "NMTOKEN", ""); + attribute("comment", "dir", "NMTOKEN", ""); + attribute("dd", "dir", "NMTOKEN", ""); + attribute("del", "dir", "NMTOKEN", ""); + attribute("dfn", "dir", "NMTOKEN", ""); + attribute("dir", "dir", "NMTOKEN", ""); + attribute("div", "dir", "NMTOKEN", ""); + attribute("dl", "dir", "NMTOKEN", ""); + attribute("dt", "dir", "NMTOKEN", ""); + attribute("em", "dir", "NMTOKEN", ""); + attribute("fieldset", "dir", "NMTOKEN", ""); + attribute("font", "dir", "NMTOKEN", ""); + attribute("form", "dir", "NMTOKEN", ""); + attribute("frame", "dir", "NMTOKEN", ""); + attribute("frameset", "dir", "NMTOKEN", ""); + attribute("h1", "dir", "NMTOKEN", ""); + attribute("h2", "dir", "NMTOKEN", ""); + attribute("h3", "dir", "NMTOKEN", ""); + attribute("h4", "dir", "NMTOKEN", ""); + attribute("h5", "dir", "NMTOKEN", ""); + attribute("h6", "dir", "NMTOKEN", ""); + attribute("head", "dir", "NMTOKEN", ""); + attribute("hr", "dir", "NMTOKEN", ""); + attribute("html", "dir", "NMTOKEN", ""); + attribute("i", "dir", "NMTOKEN", ""); + attribute("iframe", "dir", "NMTOKEN", ""); + attribute("img", "dir", "NMTOKEN", ""); + attribute("input", "dir", "NMTOKEN", ""); + attribute("ins", "dir", "NMTOKEN", ""); + attribute("isindex", "dir", "NMTOKEN", ""); + attribute("kbd", "dir", "NMTOKEN", ""); + attribute("label", "dir", "NMTOKEN", ""); + attribute("legend", "dir", "NMTOKEN", ""); + attribute("li", "dir", "NMTOKEN", ""); + attribute("link", "dir", "NMTOKEN", ""); + attribute("listing", "dir", "NMTOKEN", ""); + attribute("map", "dir", "NMTOKEN", ""); + attribute("marquee", "dir", "NMTOKEN", ""); + attribute("menu", "dir", "NMTOKEN", ""); + attribute("meta", "dir", "NMTOKEN", ""); + attribute("nobr", "dir", "NMTOKEN", ""); + attribute("noframes", "dir", "NMTOKEN", ""); + attribute("noscript", "dir", "NMTOKEN", ""); + attribute("object", "dir", "NMTOKEN", ""); + attribute("ol", "dir", "NMTOKEN", ""); + attribute("optgroup", "dir", "NMTOKEN", ""); + attribute("option", "dir", "NMTOKEN", ""); + attribute("p", "dir", "NMTOKEN", ""); + attribute("param", "dir", "NMTOKEN", ""); + attribute("pre", "dir", "NMTOKEN", ""); + attribute("q", "dir", "NMTOKEN", ""); + attribute("rb", "dir", "NMTOKEN", ""); + attribute("rbc", "dir", "NMTOKEN", ""); + attribute("rp", "dir", "NMTOKEN", ""); + attribute("rt", "dir", "NMTOKEN", ""); + attribute("rtc", "dir", "NMTOKEN", ""); + attribute("ruby", "dir", "NMTOKEN", ""); + attribute("s", "dir", "NMTOKEN", ""); + attribute("samp", "dir", "NMTOKEN", ""); + attribute("script", "dir", "NMTOKEN", ""); + attribute("select", "dir", "NMTOKEN", ""); + attribute("small", "dir", "NMTOKEN", ""); + attribute("span", "dir", "NMTOKEN", ""); + attribute("strike", "dir", "NMTOKEN", ""); + attribute("strong", "dir", "NMTOKEN", ""); + attribute("style", "dir", "NMTOKEN", ""); + attribute("sub", "dir", "NMTOKEN", ""); + attribute("sup", "dir", "NMTOKEN", ""); + attribute("table", "dir", "NMTOKEN", ""); + attribute("tbody", "dir", "NMTOKEN", ""); + attribute("td", "dir", "NMTOKEN", ""); + attribute("textarea", "dir", "NMTOKEN", ""); + attribute("tfoot", "dir", "NMTOKEN", ""); + attribute("th", "dir", "NMTOKEN", ""); + attribute("thead", "dir", "NMTOKEN", ""); + attribute("title", "dir", "NMTOKEN", ""); + attribute("tr", "dir", "NMTOKEN", ""); + attribute("tt", "dir", "NMTOKEN", ""); + attribute("u", "dir", "NMTOKEN", ""); + attribute("ul", "dir", "NMTOKEN", ""); + attribute("var", "dir", "NMTOKEN", ""); + attribute("wbr", "dir", "NMTOKEN", ""); + attribute("xmp", "dir", "NMTOKEN", ""); + } // attributes_dir + + void attributes_id() + { + attribute("a", "id", "ID", ""); + attribute("abbr", "id", "ID", ""); + attribute("acronym", "id", "ID", ""); + attribute("address", "id", "ID", ""); + attribute("applet", "id", "ID", ""); + attribute("area", "id", "ID", ""); + attribute("b", "id", "ID", ""); + attribute("base", "id", "ID", ""); + attribute("basefont", "id", "ID", ""); + attribute("bdo", "id", "ID", ""); + attribute("bgsound", "id", "ID", ""); + attribute("big", "id", "ID", ""); + attribute("blink", "id", "ID", ""); + attribute("blockquote", "id", "ID", ""); + attribute("body", "id", "ID", ""); + attribute("br", "id", "ID", ""); + attribute("button", "id", "ID", ""); + attribute("canvas", "id", "ID", ""); + attribute("caption", "id", "ID", ""); + attribute("center", "id", "ID", ""); + attribute("cite", "id", "ID", ""); + attribute("code", "id", "ID", ""); + attribute("col", "id", "ID", ""); + attribute("colgroup", "id", "ID", ""); + attribute("comment", "id", "ID", ""); + attribute("dd", "id", "ID", ""); + attribute("del", "id", "ID", ""); + attribute("dfn", "id", "ID", ""); + attribute("dir", "id", "ID", ""); + attribute("div", "id", "ID", ""); + attribute("dl", "id", "ID", ""); + attribute("dt", "id", "ID", ""); + attribute("em", "id", "ID", ""); + attribute("fieldset", "id", "ID", ""); + attribute("font", "id", "ID", ""); + attribute("form", "id", "ID", ""); + attribute("frame", "id", "ID", ""); + attribute("frameset", "id", "ID", ""); + attribute("h1", "id", "ID", ""); + attribute("h2", "id", "ID", ""); + attribute("h3", "id", "ID", ""); + attribute("h4", "id", "ID", ""); + attribute("h5", "id", "ID", ""); + attribute("h6", "id", "ID", ""); + attribute("head", "id", "ID", ""); + attribute("hr", "id", "ID", ""); + attribute("html", "id", "ID", ""); + attribute("i", "id", "ID", ""); + attribute("iframe", "id", "ID", ""); + attribute("img", "id", "ID", ""); + attribute("input", "id", "ID", ""); + attribute("ins", "id", "ID", ""); + attribute("isindex", "id", "ID", ""); + attribute("kbd", "id", "ID", ""); + attribute("label", "id", "ID", ""); + attribute("legend", "id", "ID", ""); + attribute("li", "id", "ID", ""); + attribute("link", "id", "ID", ""); + attribute("listing", "id", "ID", ""); + attribute("map", "id", "ID", ""); + attribute("marquee", "id", "ID", ""); + attribute("menu", "id", "ID", ""); + attribute("meta", "id", "ID", ""); + attribute("nobr", "id", "ID", ""); + attribute("noframes", "id", "ID", ""); + attribute("noscript", "id", "ID", ""); + attribute("object", "id", "ID", ""); + attribute("ol", "id", "ID", ""); + attribute("optgroup", "id", "ID", ""); + attribute("option", "id", "ID", ""); + attribute("p", "id", "ID", ""); + attribute("param", "id", "ID", ""); + attribute("pre", "id", "ID", ""); + attribute("q", "id", "ID", ""); + attribute("rb", "id", "ID", ""); + attribute("rbc", "id", "ID", ""); + attribute("rp", "id", "ID", ""); + attribute("rt", "id", "ID", ""); + attribute("rtc", "id", "ID", ""); + attribute("ruby", "id", "ID", ""); + attribute("s", "id", "ID", ""); + attribute("samp", "id", "ID", ""); + attribute("script", "id", "ID", ""); + attribute("select", "id", "ID", ""); + attribute("small", "id", "ID", ""); + attribute("span", "id", "ID", ""); + attribute("strike", "id", "ID", ""); + attribute("strong", "id", "ID", ""); + attribute("style", "id", "ID", ""); + attribute("sub", "id", "ID", ""); + attribute("sup", "id", "ID", ""); + attribute("table", "id", "ID", ""); + attribute("tbody", "id", "ID", ""); + attribute("td", "id", "ID", ""); + attribute("textarea", "id", "ID", ""); + attribute("tfoot", "id", "ID", ""); + attribute("th", "id", "ID", ""); + attribute("thead", "id", "ID", ""); + attribute("title", "id", "ID", ""); + attribute("tr", "id", "ID", ""); + attribute("tt", "id", "ID", ""); + attribute("u", "id", "ID", ""); + attribute("ul", "id", "ID", ""); + attribute("var", "id", "ID", ""); + attribute("wbr", "id", "ID", ""); + attribute("xmp", "id", "ID", ""); + } // attributes_id + + void attributes_lang() + { + attribute("a", "lang", "NMTOKEN", ""); + attribute("abbr", "lang", "NMTOKEN", ""); + attribute("acronym", "lang", "NMTOKEN", ""); + attribute("address", "lang", "NMTOKEN", ""); + attribute("applet", "lang", "NMTOKEN", ""); + attribute("area", "lang", "NMTOKEN", ""); + attribute("b", "lang", "NMTOKEN", ""); + attribute("base", "lang", "NMTOKEN", ""); + attribute("basefont", "lang", "NMTOKEN", ""); + attribute("bdo", "lang", "NMTOKEN", ""); + attribute("bgsound", "lang", "NMTOKEN", ""); + attribute("big", "lang", "NMTOKEN", ""); + attribute("blink", "lang", "NMTOKEN", ""); + attribute("blockquote", "lang", "NMTOKEN", ""); + attribute("body", "lang", "NMTOKEN", ""); + attribute("br", "lang", "NMTOKEN", ""); + attribute("button", "lang", "NMTOKEN", ""); + attribute("canvas", "lang", "NMTOKEN", ""); + attribute("caption", "lang", "NMTOKEN", ""); + attribute("center", "lang", "NMTOKEN", ""); + attribute("cite", "lang", "NMTOKEN", ""); + attribute("code", "lang", "NMTOKEN", ""); + attribute("col", "lang", "NMTOKEN", ""); + attribute("colgroup", "lang", "NMTOKEN", ""); + attribute("comment", "lang", "NMTOKEN", ""); + attribute("dd", "lang", "NMTOKEN", ""); + attribute("del", "lang", "NMTOKEN", ""); + attribute("dfn", "lang", "NMTOKEN", ""); + attribute("dir", "lang", "NMTOKEN", ""); + attribute("div", "lang", "NMTOKEN", ""); + attribute("dl", "lang", "NMTOKEN", ""); + attribute("dt", "lang", "NMTOKEN", ""); + attribute("em", "lang", "NMTOKEN", ""); + attribute("fieldset", "lang", "NMTOKEN", ""); + attribute("font", "lang", "NMTOKEN", ""); + attribute("form", "lang", "NMTOKEN", ""); + attribute("frame", "lang", "NMTOKEN", ""); + attribute("frameset", "lang", "NMTOKEN", ""); + attribute("h1", "lang", "NMTOKEN", ""); + attribute("h2", "lang", "NMTOKEN", ""); + attribute("h3", "lang", "NMTOKEN", ""); + attribute("h4", "lang", "NMTOKEN", ""); + attribute("h5", "lang", "NMTOKEN", ""); + attribute("h6", "lang", "NMTOKEN", ""); + attribute("head", "lang", "NMTOKEN", ""); + attribute("hr", "lang", "NMTOKEN", ""); + attribute("html", "lang", "NMTOKEN", ""); + attribute("i", "lang", "NMTOKEN", ""); + attribute("iframe", "lang", "NMTOKEN", ""); + attribute("img", "lang", "NMTOKEN", ""); + attribute("input", "lang", "NMTOKEN", ""); + attribute("ins", "lang", "NMTOKEN", ""); + attribute("isindex", "lang", "NMTOKEN", ""); + attribute("kbd", "lang", "NMTOKEN", ""); + attribute("label", "lang", "NMTOKEN", ""); + attribute("legend", "lang", "NMTOKEN", ""); + attribute("li", "lang", "NMTOKEN", ""); + attribute("link", "lang", "NMTOKEN", ""); + attribute("listing", "lang", "NMTOKEN", ""); + attribute("map", "lang", "NMTOKEN", ""); + attribute("marquee", "lang", "NMTOKEN", ""); + attribute("menu", "lang", "NMTOKEN", ""); + attribute("meta", "lang", "NMTOKEN", ""); + attribute("nobr", "lang", "NMTOKEN", ""); + attribute("noframes", "lang", "NMTOKEN", ""); + attribute("noscript", "lang", "NMTOKEN", ""); + attribute("object", "lang", "NMTOKEN", ""); + attribute("ol", "lang", "NMTOKEN", ""); + attribute("optgroup", "lang", "NMTOKEN", ""); + attribute("option", "lang", "NMTOKEN", ""); + attribute("p", "lang", "NMTOKEN", ""); + attribute("param", "lang", "NMTOKEN", ""); + attribute("pre", "lang", "NMTOKEN", ""); + attribute("q", "lang", "NMTOKEN", ""); + attribute("rb", "lang", "NMTOKEN", ""); + attribute("rbc", "lang", "NMTOKEN", ""); + attribute("rp", "lang", "NMTOKEN", ""); + attribute("rt", "lang", "NMTOKEN", ""); + attribute("rtc", "lang", "NMTOKEN", ""); + attribute("ruby", "lang", "NMTOKEN", ""); + attribute("s", "lang", "NMTOKEN", ""); + attribute("samp", "lang", "NMTOKEN", ""); + attribute("script", "lang", "NMTOKEN", ""); + attribute("select", "lang", "NMTOKEN", ""); + attribute("small", "lang", "NMTOKEN", ""); + attribute("span", "lang", "NMTOKEN", ""); + attribute("strike", "lang", "NMTOKEN", ""); + attribute("strong", "lang", "NMTOKEN", ""); + attribute("style", "lang", "NMTOKEN", ""); + attribute("sub", "lang", "NMTOKEN", ""); + attribute("sup", "lang", "NMTOKEN", ""); + attribute("table", "lang", "NMTOKEN", ""); + attribute("tbody", "lang", "NMTOKEN", ""); + attribute("td", "lang", "NMTOKEN", ""); + attribute("textarea", "lang", "NMTOKEN", ""); + attribute("tfoot", "lang", "NMTOKEN", ""); + attribute("th", "lang", "NMTOKEN", ""); + attribute("thead", "lang", "NMTOKEN", ""); + attribute("title", "lang", "NMTOKEN", ""); + attribute("tr", "lang", "NMTOKEN", ""); + attribute("tt", "lang", "NMTOKEN", ""); + attribute("u", "lang", "NMTOKEN", ""); + attribute("ul", "lang", "NMTOKEN", ""); + attribute("var", "lang", "NMTOKEN", ""); + attribute("wbr", "lang", "NMTOKEN", ""); + attribute("xmp", "lang", "NMTOKEN", ""); + } // attributes_lang + + void entities() + { + //entity("aacgr", 0x03AC); + //entity("Aacgr", 0x0386); + entity("aacute", 0x00E1); + entity("Aacute", 0x00C1); + //entity("abreve", 0x0103); + //entity("Abreve", 0x0102); + //entity("ac", 0x223E); + //entity("acd", 0x223F); + entity("acirc", 0x00E2); + entity("Acirc", 0x00C2); + entity("acute", 0x00B4); + //entity("acy", 0x0430); + //entity("Acy", 0x0410); + entity("aelig", 0x00E6); + entity("AElig", 0x00C6); + //entity("af", 0x2061); + //entity("afr", 0x1D51E); + //entity("Afr", 0x1D504); + //entity("agr", 0x03B1); + //entity("Agr", 0x0391); + entity("agrave", 0x00E0); + entity("Agrave", 0x00C0); + //entity("alefsym", 0x2135); + //entity("aleph", 0x2135); + //entity("alpha", 0x03B1); + //entity("Alpha", 0x0391); + //entity("amacr", 0x0101); + //entity("Amacr", 0x0100); + //entity("amalg", 0x2A3F); + entity("amp", 0x0026); + //entity("and", 0x2227); + //entity("And", 0x2A53); + //entity("andand", 0x2A55); + //entity("andd", 0x2A5C); + //entity("andslope", 0x2A58); + //entity("andv", 0x2A5A); + //entity("ang", 0x2220); + //entity("ange", 0x29A4); + //entity("angle", 0x2220); + //entity("angmsd", 0x2221); + //entity("angmsdaa", 0x29A8); + //entity("angmsdab", 0x29A9); + //entity("angmsdac", 0x29AA); + //entity("angmsdad", 0x29AB); + //entity("angmsdae", 0x29AC); + //entity("angmsdaf", 0x29AD); + //entity("angmsdag", 0x29AE); + //entity("angmsdah", 0x29AF); + //entity("angrt", 0x221F); + //entity("angrtvb", 0x22BE); + //entity("angrtvbd", 0x299D); + //entity("angsph", 0x2222); + //entity("angst", 0x212B); + //entity("angzarr", 0x237C); + //entity("aogon", 0x0105); + //entity("Aogon", 0x0104); + //entity("aopf", 0x1D552); + //entity("Aopf", 0x1D538); + //entity("ap", 0x2248); + //entity("apacir", 0x2A6F); + //entity("ape", 0x224A); + //entity("apE", 0x2A70); + //entity("apid", 0x224B); + entity("apos", 0x0027); + //entity("ApplyFunction", 0x2061); + //entity("approx", 0x2248); + //entity("approxeq", 0x224A); + entity("aring", 0x00E5); + entity("Aring", 0x00C5); + //entity("ascr", 0x1D4B6); + //entity("Ascr", 0x1D49C); + //entity("Assign", 0x2254); + entity("ast", 0x002A); + //entity("asymp", 0x2248); + //entity("asympeq", 0x224D); + entity("atilde", 0x00E3); + entity("Atilde", 0x00C3); + entity("auml", 0x00E4); + entity("Auml", 0x00C4); + //entity("awconint", 0x2233); + //entity("awint", 0x2A11); + //entity("b.alpha", 0x1D6C2); + //entity("b.beta", 0x1D6C3); + //entity("b.chi", 0x1D6D8); + //entity("b.delta", 0x1D6C5); + //entity("b.Delta", 0x1D6AB); + //entity("b.epsi", 0x1D6C6); + //entity("b.epsiv", 0x1D6DC); + //entity("b.eta", 0x1D6C8); + //entity("b.gamma", 0x1D6C4); + //entity("b.Gamma", 0x1D6AA); + //entity("b.gammad", 0x1D7CB); + //entity("b.Gammad", 0x1D7CA); + //entity("b.iota", 0x1D6CA); + //entity("b.kappa", 0x1D6CB); + //entity("b.kappav", 0x1D6DE); + //entity("b.lambda", 0x1D6CC); + //entity("b.Lambda", 0x1D6B2); + //entity("b.mu", 0x1D6CD); + //entity("b.nu", 0x1D6CE); + //entity("b.omega", 0x1D6DA); + //entity("b.Omega", 0x1D6C0); + //entity("b.phi", 0x1D6D7); + //entity("b.Phi", 0x1D6BD); + //entity("b.phiv", 0x1D6DF); + //entity("b.pi", 0x1D6D1); + //entity("b.Pi", 0x1D6B7); + //entity("b.piv", 0x1D6E1); + //entity("b.psi", 0x1D6D9); + //entity("b.Psi", 0x1D6BF); + //entity("b.rho", 0x1D6D2); + //entity("b.rhov", 0x1D6E0); + //entity("b.sigma", 0x1D6D4); + //entity("b.Sigma", 0x1D6BA); + //entity("b.sigmav", 0x1D6D3); + //entity("b.tau", 0x1D6D5); + //entity("b.Theta", 0x1D6AF); + //entity("b.thetas", 0x1D6C9); + //entity("b.thetav", 0x1D6DD); + //entity("b.upsi", 0x1D6D6); + //entity("b.Upsi", 0x1D6BC); + //entity("b.xi", 0x1D6CF); + //entity("b.Xi", 0x1D6B5); + //entity("b.zeta", 0x1D6C7); + //entity("backcong", 0x224C); + //entity("backepsilon", 0x03F6); + //entity("backprime", 0x2035); + //entity("backsim", 0x223D); + //entity("backsimeq", 0x22CD); + //entity("Backslash", 0x2216); + //entity("Barv", 0x2AE7); + //entity("barvee", 0x22BD); + //entity("barwed", 0x2305); + //entity("Barwed", 0x2306); + //entity("barwedge", 0x2305); + //entity("bbrk", 0x23B5); + //entity("bbrktbrk", 0x23B6); + //entity("bcong", 0x224C); + //entity("bcy", 0x0431); + //entity("Bcy", 0x0411); + //entity("bdquo", 0x201E); + //entity("becaus", 0x2235); + //entity("because", 0x2235); + //entity("bemptyv", 0x29B0); + //entity("bepsi", 0x03F6); + //entity("bernou", 0x212C); + //entity("Bernoullis", 0x212C); + //entity("beta", 0x03B2); + //entity("Beta", 0x0392); + //entity("beth", 0x2136); + //entity("between", 0x226C); + //entity("bfr", 0x1D51F); + //entity("Bfr", 0x1D505); + //entity("bgr", 0x03B2); + //entity("Bgr", 0x0392); + //entity("bigcap", 0x22C2); + //entity("bigcirc", 0x25EF); + //entity("bigcup", 0x22C3); + //entity("bigodot", 0x2A00); + //entity("bigoplus", 0x2A01); + //entity("bigotimes", 0x2A02); + //entity("bigsqcup", 0x2A06); + //entity("bigstar", 0x2605); + //entity("bigtriangledown", 0x25BD); + //entity("bigtriangleup", 0x25B3); + //entity("biguplus", 0x2A04); + //entity("bigvee", 0x22C1); + //entity("bigwedge", 0x22C0); + //entity("bkarow", 0x290D); + //entity("blacklozenge", 0x29EB); + //entity("blacksquare", 0x25AA); + //entity("blacktriangle", 0x25B4); + //entity("blacktriangledown", 0x25BE); + //entity("blacktriangleleft", 0x25C2); + //entity("blacktriangleright", 0x25B8); + //entity("blank", 0x2423); + //entity("blk12", 0x2592); + //entity("blk14", 0x2591); + //entity("blk34", 0x2593); + //entity("block", 0x2588); + //entity("bnot", 0x2310); + //entity("bNot", 0x2AED); + //entity("bopf", 0x1D553); + //entity("Bopf", 0x1D539); + //entity("bot", 0x22A5); + //entity("bottom", 0x22A5); + //entity("bowtie", 0x22C8); + //entity("boxbox", 0x29C9); + //entity("boxdl", 0x2510); + //entity("boxdL", 0x2555); + //entity("boxDl", 0x2556); + //entity("boxDL", 0x2557); + //entity("boxdr", 0x250C); + //entity("boxdR", 0x2552); + //entity("boxDr", 0x2553); + //entity("boxDR", 0x2554); + //entity("boxh", 0x2500); + //entity("boxH", 0x2550); + //entity("boxhd", 0x252C); + //entity("boxhD", 0x2565); + //entity("boxHd", 0x2564); + //entity("boxHD", 0x2566); + //entity("boxhu", 0x2534); + //entity("boxhU", 0x2568); + //entity("boxHu", 0x2567); + //entity("boxHU", 0x2569); + //entity("boxminus", 0x229F); + //entity("boxplus", 0x229E); + //entity("boxtimes", 0x22A0); + //entity("boxul", 0x2518); + //entity("boxuL", 0x255B); + //entity("boxUl", 0x255C); + //entity("boxUL", 0x255D); + //entity("boxur", 0x2514); + //entity("boxuR", 0x2558); + //entity("boxUr", 0x2559); + //entity("boxUR", 0x255A); + //entity("boxv", 0x2502); + //entity("boxV", 0x2551); + //entity("boxvh", 0x253C); + //entity("boxvH", 0x256A); + //entity("boxVh", 0x256B); + //entity("boxVH", 0x256C); + //entity("boxvl", 0x2524); + //entity("boxvL", 0x2561); + //entity("boxVl", 0x2562); + //entity("boxVL", 0x2563); + //entity("boxvr", 0x251C); + //entity("boxvR", 0x255E); + //entity("boxVr", 0x255F); + //entity("boxVR", 0x2560); + //entity("bprime", 0x2035); + //entity("breve", 0x02D8); + entity("brvbar", 0x00A6); + //entity("bscr", 0x1D4B7); + //entity("Bscr", 0x212C); + //entity("bsemi", 0x204F); + //entity("bsim", 0x223D); + //entity("bsime", 0x22CD); + entity("bsol", 0x005C); + //entity("bsolb", 0x29C5); + //entity("bull", 0x2022); + //entity("bullet", 0x2022); + //entity("bump", 0x224E); + //entity("bumpe", 0x224F); + //entity("bumpE", 0x2AAE); + //entity("bumpeq", 0x224F); + //entity("Bumpeq", 0x224E); + //entity("cacute", 0x0107); + //entity("Cacute", 0x0106); + //entity("cap", 0x2229); + //entity("Cap", 0x22D2); + //entity("capand", 0x2A44); + //entity("capbrcup", 0x2A49); + //entity("capcap", 0x2A4B); + //entity("capcup", 0x2A47); + //entity("capdot", 0x2A40); + //entity("CapitalDifferentialD", 0x2145); + //entity("caret", 0x2041); + //entity("caron", 0x02C7); + //entity("Cayleys", 0x212D); + //entity("ccaps", 0x2A4D); + //entity("ccaron", 0x010D); + //entity("Ccaron", 0x010C); + entity("ccedil", 0x00E7); + entity("Ccedil", 0x00C7); + //entity("ccirc", 0x0109); + //entity("Ccirc", 0x0108); + //entity("Cconint", 0x2230); + //entity("ccups", 0x2A4C); + //entity("ccupssm", 0x2A50); + //entity("cdot", 0x010B); + //entity("Cdot", 0x010A); + entity("cedil", 0x00B8); + entity("Cedilla", 0x00B8); + //entity("cemptyv", 0x29B2); + entity("cent", 0x00A2); + entity("centerdot", 0x00B7); + //entity("cfr", 0x1D520); + //entity("Cfr", 0x212D); + //entity("chcy", 0x0447); + //entity("CHcy", 0x0427); + //entity("check", 0x2713); + //entity("checkmark", 0x2713); + //entity("chi", 0x03C7); + //entity("Chi", 0x03A7); + //entity("cir", 0x25CB); + //entity("circ", 0x02C6); + //entity("circeq", 0x2257); + //entity("circlearrowleft", 0x21BA); + //entity("circlearrowright", 0x21BB); + //entity("circledast", 0x229B); + //entity("circledcirc", 0x229A); + //entity("circleddash", 0x229D); + //entity("CircleDot", 0x2299); + entity("circledR", 0x00AE); + //entity("circledS", 0x24C8); + //entity("CircleMinus", 0x2296); + //entity("CirclePlus", 0x2295); + //entity("CircleTimes", 0x2297); + //entity("cire", 0x2257); + //entity("cirE", 0x29C3); + //entity("cirfnint", 0x2A10); + //entity("cirmid", 0x2AEF); + //entity("cirscir", 0x29C2); + //entity("ClockwiseContourIntegral", 0x2232); + //entity("CloseCurlyDoubleQuote", 0x201D); + //entity("CloseCurlyQuote", 0x2019); + //entity("clubs", 0x2663); + //entity("clubsuit", 0x2663); + entity("colon", 0x003A); + //entity("Colon", 0x2237); + //entity("colone", 0x2254); + //entity("Colone", 0x2A74); + //entity("coloneq", 0x2254); + entity("comma", 0x002C); + entity("commat", 0x0040); + //entity("comp", 0x2201); + //entity("compfn", 0x2218); + //entity("complement", 0x2201); + //entity("complexes", 0x2102); + //entity("cong", 0x2245); + //entity("congdot", 0x2A6D); + //entity("Congruent", 0x2261); + //entity("conint", 0x222E); + //entity("Conint", 0x222F); + //entity("ContourIntegral", 0x222E); + //entity("copf", 0x1D554); + //entity("Copf", 0x2102); + //entity("coprod", 0x2210); + //entity("Coproduct", 0x2210); + entity("copy", 0x00A9); + //entity("copysr", 0x2117); + //entity("CounterClockwiseContourIntegral", 0x2233); + //entity("crarr", 0x21B5); + //entity("cross", 0x2717); + //entity("Cross", 0x2A2F); + //entity("cscr", 0x1D4B8); + //entity("Cscr", 0x1D49E); + //entity("csub", 0x2ACF); + //entity("csube", 0x2AD1); + //entity("csup", 0x2AD0); + //entity("csupe", 0x2AD2); + //entity("ctdot", 0x22EF); + //entity("cudarrl", 0x2938); + //entity("cudarrr", 0x2935); + //entity("cuepr", 0x22DE); + //entity("cuesc", 0x22DF); + //entity("cularr", 0x21B6); + //entity("cularrp", 0x293D); + //entity("cup", 0x222A); + //entity("Cup", 0x22D3); + //entity("cupbrcap", 0x2A48); + //entity("cupcap", 0x2A46); + //entity("CupCap", 0x224D); + //entity("cupcup", 0x2A4A); + //entity("cupdot", 0x228D); + //entity("cupor", 0x2A45); + //entity("curarr", 0x21B7); + //entity("curarrm", 0x293C); + //entity("curlyeqprec", 0x22DE); + //entity("curlyeqsucc", 0x22DF); + //entity("curlyvee", 0x22CE); + //entity("curlywedge", 0x22CF); + entity("curren", 0x00A4); + //entity("curvearrowleft", 0x21B6); + //entity("curvearrowright", 0x21B7); + //entity("cuvee", 0x22CE); + //entity("cuwed", 0x22CF); + //entity("cwconint", 0x2232); + //entity("cwint", 0x2231); + //entity("cylcty", 0x232D); + //entity("dagger", 0x2020); + //entity("Dagger", 0x2021); + //entity("daleth", 0x2138); + //entity("darr", 0x2193); + //entity("dArr", 0x21D3); + //entity("Darr", 0x21A1); + //entity("dash", 0x2010); + //entity("dashv", 0x22A3); + //entity("Dashv", 0x2AE4); + //entity("dbkarow", 0x290F); + //entity("dblac", 0x02DD); + //entity("dcaron", 0x010F); + //entity("Dcaron", 0x010E); + //entity("dcy", 0x0434); + //entity("Dcy", 0x0414); + //entity("dd", 0x2146); + //entity("DD", 0x2145); + //entity("ddagger", 0x2021); + //entity("ddarr", 0x21CA); + //entity("DDotrahd", 0x2911); + //entity("ddotseq", 0x2A77); + entity("deg", 0x00B0); + //entity("Del", 0x2207); + //entity("delta", 0x03B4); + //entity("Delta", 0x0394); + //entity("demptyv", 0x29B1); + //entity("dfisht", 0x297F); + //entity("dfr", 0x1D521); + //entity("Dfr", 0x1D507); + //entity("dgr", 0x03B4); + //entity("Dgr", 0x0394); + //entity("dHar", 0x2965); + //entity("dharl", 0x21C3); + //entity("dharr", 0x21C2); + entity("DiacriticalAcute", 0x00B4); + //entity("DiacriticalDot", 0x02D9); + //entity("DiacriticalDoubleAcute", 0x02DD); + entity("DiacriticalGrave", 0x0060); + //entity("DiacriticalTilde", 0x02DC); + //entity("diam", 0x22C4); + //entity("diamond", 0x22C4); + //entity("diamondsuit", 0x2666); + //entity("diams", 0x2666); + entity("die", 0x00A8); + //entity("DifferentialD", 0x2146); + //entity("digamma", 0x03DD); + //entity("disin", 0x22F2); + entity("div", 0x00F7); + entity("divide", 0x00F7); + //entity("divideontimes", 0x22C7); + //entity("divonx", 0x22C7); + //entity("djcy", 0x0452); + //entity("DJcy", 0x0402); + //entity("dlcorn", 0x231E); + //entity("dlcrop", 0x230D); + entity("dollar", 0x0024); + //entity("dopf", 0x1D555); + //entity("Dopf", 0x1D53B); + //entity("dot", 0x02D9); + entity("Dot", 0x00A8); + //entity("doteq", 0x2250); + //entity("doteqdot", 0x2251); + //entity("DotEqual", 0x2250); + //entity("dotminus", 0x2238); + //entity("dotplus", 0x2214); + //entity("dotsquare", 0x22A1); + //entity("doublebarwedge", 0x2306); + //entity("DoubleContourIntegral", 0x222F); + entity("DoubleDot", 0x00A8); + //entity("DoubleDownArrow", 0x21D3); + //entity("DoubleLeftArrow", 0x21D0); + //entity("DoubleLeftRightArrow", 0x21D4); + //entity("DoubleLeftTee", 0x2AE4); + //entity("DoubleLongLeftArrow", 0x27F8); + //entity("DoubleLongLeftRightArrow", 0x27FA); + //entity("DoubleLongRightArrow", 0x27F9); + //entity("DoubleRightArrow", 0x21D2); + //entity("DoubleRightTee", 0x22A8); + //entity("DoubleUpArrow", 0x21D1); + //entity("DoubleUpDownArrow", 0x21D5); + //entity("DoubleVerticalBar", 0x2225); + //entity("downarrow", 0x2193); + //entity("Downarrow", 0x21D3); + //entity("DownArrowBar", 0x2913); + //entity("DownArrowUpArrow", 0x21F5); + //entity("downdownarrows", 0x21CA); + //entity("downharpoonleft", 0x21C3); + //entity("downharpoonright", 0x21C2); + //entity("DownLeftRightVector", 0x2950); + //entity("DownLeftTeeVector", 0x295E); + //entity("DownLeftVector", 0x21BD); + //entity("DownLeftVectorBar", 0x2956); + //entity("DownRightTeeVector", 0x295F); + //entity("DownRightVector", 0x21C1); + //entity("DownRightVectorBar", 0x2957); + //entity("DownTee", 0x22A4); + //entity("DownTeeArrow", 0x21A7); + //entity("drbkarow", 0x2910); + //entity("drcorn", 0x231F); + //entity("drcrop", 0x230C); + //entity("dscr", 0x1D4B9); + //entity("Dscr", 0x1D49F); + //entity("dscy", 0x0455); + //entity("DScy", 0x0405); + //entity("dsol", 0x29F6); + //entity("dstrok", 0x0111); + //entity("Dstrok", 0x0110); + //entity("dtdot", 0x22F1); + //entity("dtri", 0x25BF); + //entity("dtrif", 0x25BE); + //entity("duarr", 0x21F5); + //entity("duhar", 0x296F); + //entity("dwangle", 0x29A6); + //entity("dzcy", 0x045F); + //entity("DZcy", 0x040F); + //entity("dzigrarr", 0x27FF); + //entity("eacgr", 0x03AD); + //entity("Eacgr", 0x0388); + entity("eacute", 0x00E9); + entity("Eacute", 0x00C9); + //entity("easter", 0x2A6E); + //entity("ecaron", 0x011B); + //entity("Ecaron", 0x011A); + //entity("ecir", 0x2256); + entity("ecirc", 0x00EA); + entity("Ecirc", 0x00CA); + //entity("ecolon", 0x2255); + //entity("ecy", 0x044D); + //entity("Ecy", 0x042D); + //entity("eDDot", 0x2A77); + //entity("edot", 0x0117); + //entity("eDot", 0x2251); + //entity("Edot", 0x0116); + //entity("ee", 0x2147); + //entity("eeacgr", 0x03AE); + //entity("EEacgr", 0x0389); + //entity("eegr", 0x03B7); + //entity("EEgr", 0x0397); + //entity("efDot", 0x2252); + //entity("efr", 0x1D522); + //entity("Efr", 0x1D508); + //entity("eg", 0x2A9A); + //entity("egr", 0x03B5); + //entity("Egr", 0x0395); + entity("egrave", 0x00E8); + entity("Egrave", 0x00C8); + //entity("egs", 0x2A96); + //entity("egsdot", 0x2A98); + //entity("el", 0x2A99); + //entity("Element", 0x2208); + //entity("elinters", 0x23E7); + //entity("ell", 0x2113); + //entity("els", 0x2A95); + //entity("elsdot", 0x2A97); + //entity("emacr", 0x0113); + //entity("Emacr", 0x0112); + //entity("empty", 0x2205); + //entity("emptyset", 0x2205); + //entity("EmptySmallSquare", 0x25FB); + //entity("emptyv", 0x2205); + //entity("EmptyVerySmallSquare", 0x25AB); + //entity("emsp", 0x2003); + //entity("emsp13", 0x2004); + //entity("emsp14", 0x2005); + //entity("eng", 0x014B); + //entity("ENG", 0x014A); + //entity("ensp", 0x2002); + //entity("eogon", 0x0119); + //entity("Eogon", 0x0118); + //entity("eopf", 0x1D556); + //entity("Eopf", 0x1D53C); + //entity("epar", 0x22D5); + //entity("eparsl", 0x29E3); + //entity("eplus", 0x2A71); + //entity("epsi", 0x03F5); + //entity("epsilon", 0x03B5); + //entity("Epsilon", 0x0395); + //entity("epsiv", 0x03B5); + //entity("eqcirc", 0x2256); + //entity("eqcolon", 0x2255); + //entity("eqsim", 0x2242); + //entity("eqslantgtr", 0x2A96); + //entity("eqslantless", 0x2A95); + //entity("Equal", 0x2A75); + entity("equals", 0x003D); + //entity("EqualTilde", 0x2242); + //entity("equest", 0x225F); + //entity("Equilibrium", 0x21CC); + //entity("equiv", 0x2261); + //entity("equivDD", 0x2A78); + //entity("eqvparsl", 0x29E5); + //entity("erarr", 0x2971); + //entity("erDot", 0x2253); + //entity("escr", 0x212F); + //entity("Escr", 0x2130); + //entity("esdot", 0x2250); + //entity("esim", 0x2242); + //entity("Esim", 0x2A73); + //entity("eta", 0x03B7); + //entity("Eta", 0x0397); + entity("eth", 0x00F0); + entity("ETH", 0x00D0); + entity("euml", 0x00EB); + entity("Euml", 0x00CB); + //entity("euro", 0x20AC); + entity("excl", 0x0021); + //entity("exist", 0x2203); + //entity("Exists", 0x2203); + //entity("expectation", 0x2130); + //entity("exponentiale", 0x2147); + //entity("fallingdotseq", 0x2252); + //entity("fcy", 0x0444); + //entity("Fcy", 0x0424); + //entity("female", 0x2640); + //entity("ffilig", 0xFB03); + //entity("fflig", 0xFB00); + //entity("ffllig", 0xFB04); + //entity("ffr", 0x1D523); + //entity("Ffr", 0x1D509); + //entity("filig", 0xFB01); + //entity("FilledSmallSquare", 0x25FC); + //entity("FilledVerySmallSquare", 0x25AA); + //entity("flat", 0x266D); + //entity("fllig", 0xFB02); + //entity("fltns", 0x25B1); + //entity("fnof", 0x0192); + //entity("fopf", 0x1D557); + //entity("Fopf", 0x1D53D); + //entity("forall", 0x2200); + //entity("fork", 0x22D4); + //entity("forkv", 0x2AD9); + //entity("Fouriertrf", 0x2131); + //entity("fpartint", 0x2A0D); + entity("frac12", 0x00BD); + //entity("frac13", 0x2153); + entity("frac14", 0x00BC); + //entity("frac15", 0x2155); + //entity("frac16", 0x2159); + //entity("frac18", 0x215B); + //entity("frac23", 0x2154); + //entity("frac25", 0x2156); + entity("frac34", 0x00BE); + //entity("frac35", 0x2157); + //entity("frac38", 0x215C); + //entity("frac45", 0x2158); + //entity("frac56", 0x215A); + //entity("frac58", 0x215D); + //entity("frac78", 0x215E); + //entity("frasl", 0x2044); + //entity("frown", 0x2322); + //entity("fscr", 0x1D4BB); + //entity("Fscr", 0x2131); + //entity("gacute", 0x01F5); + //entity("gamma", 0x03B3); + //entity("Gamma", 0x0393); + //entity("gammad", 0x03DD); + //entity("Gammad", 0x03DC); + //entity("gap", 0x2A86); + //entity("gbreve", 0x011F); + //entity("Gbreve", 0x011E); + //entity("Gcedil", 0x0122); + //entity("gcirc", 0x011D); + //entity("Gcirc", 0x011C); + //entity("gcy", 0x0433); + //entity("Gcy", 0x0413); + //entity("gdot", 0x0121); + //entity("Gdot", 0x0120); + //entity("ge", 0x2265); + //entity("gE", 0x2267); + //entity("gel", 0x22DB); + //entity("gEl", 0x2A8C); + //entity("geq", 0x2265); + //entity("geqq", 0x2267); + //entity("geqslant", 0x2A7E); + //entity("ges", 0x2A7E); + //entity("gescc", 0x2AA9); + //entity("gesdot", 0x2A80); + //entity("gesdoto", 0x2A82); + //entity("gesdotol", 0x2A84); + //entity("gesles", 0x2A94); + //entity("gfr", 0x1D524); + //entity("Gfr", 0x1D50A); + //entity("gg", 0x226B); + //entity("Gg", 0x22D9); + //entity("ggg", 0x22D9); + //entity("ggr", 0x03B3); + //entity("Ggr", 0x0393); + //entity("gimel", 0x2137); + //entity("gjcy", 0x0453); + //entity("GJcy", 0x0403); + //entity("gl", 0x2277); + //entity("gla", 0x2AA5); + //entity("glE", 0x2A92); + //entity("glj", 0x2AA4); + //entity("gnap", 0x2A8A); + //entity("gnapprox", 0x2A8A); + //entity("gne", 0x2A88); + //entity("gnE", 0x2269); + //entity("gneq", 0x2A88); + //entity("gneqq", 0x2269); + //entity("gnsim", 0x22E7); + //entity("gopf", 0x1D558); + //entity("Gopf", 0x1D53E); + entity("grave", 0x0060); + //entity("GreaterEqual", 0x2265); + //entity("GreaterEqualLess", 0x22DB); + //entity("GreaterFullEqual", 0x2267); + //entity("GreaterGreater", 0x2AA2); + //entity("GreaterLess", 0x2277); + //entity("GreaterSlantEqual", 0x2A7E); + //entity("GreaterTilde", 0x2273); + //entity("gscr", 0x210A); + //entity("Gscr", 0x1D4A2); + //entity("gsim", 0x2273); + //entity("gsime", 0x2A8E); + //entity("gsiml", 0x2A90); + entity("gt", 0x003E); + //entity("Gt", 0x226B); + //entity("gtcc", 0x2AA7); + //entity("gtcir", 0x2A7A); + //entity("gtdot", 0x22D7); + //entity("gtlPar", 0x2995); + //entity("gtquest", 0x2A7C); + //entity("gtrapprox", 0x2A86); + //entity("gtrarr", 0x2978); + //entity("gtrdot", 0x22D7); + //entity("gtreqless", 0x22DB); + //entity("gtreqqless", 0x2A8C); + //entity("gtrless", 0x2277); + //entity("gtrsim", 0x2273); + //entity("Hacek", 0x02C7); + //entity("hairsp", 0x200A); + entity("half", 0x00BD); + //entity("hamilt", 0x210B); + //entity("hardcy", 0x044A); + //entity("HARDcy", 0x042A); + //entity("harr", 0x2194); + //entity("hArr", 0x21D4); + //entity("harrcir", 0x2948); + //entity("harrw", 0x21AD); + entity("Hat", 0x005E); + //entity("hbar", 0x210F); + //entity("hcirc", 0x0125); + //entity("Hcirc", 0x0124); + //entity("hearts", 0x2665); + //entity("heartsuit", 0x2665); + //entity("hellip", 0x2026); + //entity("hercon", 0x22B9); + //entity("hfr", 0x1D525); + //entity("Hfr", 0x210C); + //entity("HilbertSpace", 0x210B); + //entity("hksearow", 0x2925); + //entity("hkswarow", 0x2926); + //entity("hoarr", 0x21FF); + //entity("homtht", 0x223B); + //entity("hookleftarrow", 0x21A9); + //entity("hookrightarrow", 0x21AA); + //entity("hopf", 0x1D559); + //entity("Hopf", 0x210D); + //entity("horbar", 0x2015); + //entity("HorizontalLine", 0x2500); + //entity("hscr", 0x1D4BD); + //entity("Hscr", 0x210B); + //entity("hslash", 0x210F); + //entity("hstrok", 0x0127); + //entity("Hstrok", 0x0126); + //entity("HumpDownHump", 0x224E); + //entity("HumpEqual", 0x224F); + //entity("hybull", 0x2043); + //entity("hyphen", 0x2010); + //entity("iacgr", 0x03AF); + //entity("Iacgr", 0x038A); + entity("iacute", 0x00ED); + entity("Iacute", 0x00CD); + //entity("ic", 0x2063); + entity("icirc", 0x00EE); + entity("Icirc", 0x00CE); + //entity("icy", 0x0438); + //entity("Icy", 0x0418); + //entity("idiagr", 0x0390); + //entity("idigr", 0x03CA); + //entity("Idigr", 0x03AA); + //entity("Idot", 0x0130); + //entity("iecy", 0x0435); + //entity("IEcy", 0x0415); + entity("iexcl", 0x00A1); + //entity("iff", 0x21D4); + //entity("ifr", 0x1D526); + //entity("Ifr", 0x2111); + //entity("igr", 0x03B9); + //entity("Igr", 0x0399); + entity("igrave", 0x00EC); + entity("Igrave", 0x00CC); + //entity("ii", 0x2148); + //entity("iiiint", 0x2A0C); + //entity("iiint", 0x222D); + //entity("iinfin", 0x29DC); + //entity("iiota", 0x2129); + //entity("ijlig", 0x0133); + //entity("IJlig", 0x0132); + //entity("Im", 0x2111); + //entity("imacr", 0x012B); + //entity("Imacr", 0x012A); + //entity("image", 0x2111); + //entity("ImaginaryI", 0x2148); + //entity("imagline", 0x2110); + //entity("imagpart", 0x2111); + //entity("imath", 0x0131); + //entity("imof", 0x22B7); + //entity("imped", 0x01B5); + //entity("Implies", 0x21D2); + //entity("in", 0x2208); + //entity("incare", 0x2105); + //entity("infin", 0x221E); + //entity("infintie", 0x29DD); + //entity("inodot", 0x0131); + //entity("int", 0x222B); + //entity("Int", 0x222C); + //entity("intcal", 0x22BA); + //entity("integers", 0x2124); + //entity("Integral", 0x222B); + //entity("intercal", 0x22BA); + //entity("Intersection", 0x22C2); + //entity("intlarhk", 0x2A17); + //entity("intprod", 0x2A3C); + //entity("InvisibleComma", 0x2063); + //entity("InvisibleTimes", 0x2062); + //entity("iocy", 0x0451); + //entity("IOcy", 0x0401); + //entity("iogon", 0x012F); + //entity("Iogon", 0x012E); + //entity("iopf", 0x1D55A); + //entity("Iopf", 0x1D540); + //entity("iota", 0x03B9); + //entity("Iota", 0x0399); + //entity("iprod", 0x2A3C); + entity("iquest", 0x00BF); + //entity("iscr", 0x1D4BE); + //entity("Iscr", 0x2110); + //entity("isin", 0x2208); + //entity("isindot", 0x22F5); + //entity("isinE", 0x22F9); + //entity("isins", 0x22F4); + //entity("isinsv", 0x22F3); + //entity("isinv", 0x2208); + //entity("it", 0x2062); + //entity("itilde", 0x0129); + //entity("Itilde", 0x0128); + //entity("iukcy", 0x0456); + //entity("Iukcy", 0x0406); + entity("iuml", 0x00EF); + entity("Iuml", 0x00CF); + //entity("jcirc", 0x0135); + //entity("Jcirc", 0x0134); + //entity("jcy", 0x0439); + //entity("Jcy", 0x0419); + //entity("jfr", 0x1D527); + //entity("Jfr", 0x1D50D); + //entity("jmath", 0x0237); + //entity("jopf", 0x1D55B); + //entity("Jopf", 0x1D541); + //entity("jscr", 0x1D4BF); + //entity("Jscr", 0x1D4A5); + //entity("jsercy", 0x0458); + //entity("Jsercy", 0x0408); + //entity("jukcy", 0x0454); + //entity("Jukcy", 0x0404); + //entity("kappa", 0x03BA); + //entity("Kappa", 0x039A); + //entity("kappav", 0x03F0); + //entity("kcedil", 0x0137); + //entity("Kcedil", 0x0136); + //entity("kcy", 0x043A); + //entity("Kcy", 0x041A); + //entity("kfr", 0x1D528); + //entity("Kfr", 0x1D50E); + //entity("kgr", 0x03BA); + //entity("Kgr", 0x039A); + //entity("kgreen", 0x0138); + //entity("khcy", 0x0445); + //entity("KHcy", 0x0425); + //entity("khgr", 0x03C7); + //entity("KHgr", 0x03A7); + //entity("kjcy", 0x045C); + //entity("KJcy", 0x040C); + //entity("kopf", 0x1D55C); + //entity("Kopf", 0x1D542); + //entity("kscr", 0x1D4C0); + //entity("Kscr", 0x1D4A6); + //entity("lAarr", 0x21DA); + //entity("lacute", 0x013A); + //entity("Lacute", 0x0139); + //entity("laemptyv", 0x29B4); + //entity("lagran", 0x2112); + //entity("lambda", 0x03BB); + //entity("Lambda", 0x039B); + //entity("lang", 0x2329); + //entity("Lang", 0x27EA); + //entity("langd", 0x2991); + //entity("langle", 0x2329); + //entity("lap", 0x2A85); + //entity("Laplacetrf", 0x2112); + entity("laquo", 0x00AB); + //entity("larr", 0x2190); + //entity("lArr", 0x21D0); + //entity("Larr", 0x219E); + //entity("larrb", 0x21E4); + //entity("larrbfs", 0x291F); + //entity("larrfs", 0x291D); + //entity("larrhk", 0x21A9); + //entity("larrlp", 0x21AB); + //entity("larrpl", 0x2939); + //entity("larrsim", 0x2973); + //entity("larrtl", 0x21A2); + //entity("lat", 0x2AAB); + //entity("latail", 0x2919); + //entity("lAtail", 0x291B); + //entity("late", 0x2AAD); + //entity("lbarr", 0x290C); + //entity("lBarr", 0x290E); + //entity("lbbrk", 0x2997); + entity("lbrace", 0x007B); + entity("lbrack", 0x005B); + //entity("lbrke", 0x298B); + //entity("lbrksld", 0x298F); + //entity("lbrkslu", 0x298D); + //entity("lcaron", 0x013E); + //entity("Lcaron", 0x013D); + //entity("lcedil", 0x013C); + //entity("Lcedil", 0x013B); + //entity("lceil", 0x2308); + entity("lcub", 0x007B); + //entity("lcy", 0x043B); + //entity("Lcy", 0x041B); + //entity("ldca", 0x2936); + //entity("ldquo", 0x201C); + //entity("ldquor", 0x201E); + //entity("ldrdhar", 0x2967); + //entity("ldrushar", 0x294B); + //entity("ldsh", 0x21B2); + //entity("le", 0x2264); + //entity("lE", 0x2266); + //entity("LeftAngleBracket", 0x2329); + //entity("leftarrow", 0x2190); + //entity("Leftarrow", 0x21D0); + //entity("LeftArrowBar", 0x21E4); + //entity("LeftArrowRightArrow", 0x21C6); + //entity("leftarrowtail", 0x21A2); + //entity("LeftCeiling", 0x2308); + //entity("LeftDoubleBracket", 0x27E6); + //entity("LeftDownTeeVector", 0x2961); + //entity("LeftDownVector", 0x21C3); + //entity("LeftDownVectorBar", 0x2959); + //entity("LeftFloor", 0x230A); + //entity("leftharpoondown", 0x21BD); + //entity("leftharpoonup", 0x21BC); + //entity("leftleftarrows", 0x21C7); + //entity("leftrightarrow", 0x2194); + //entity("Leftrightarrow", 0x21D4); + //entity("leftrightarrows", 0x21C6); + //entity("leftrightharpoons", 0x21CB); + //entity("leftrightsquigarrow", 0x21AD); + //entity("LeftRightVector", 0x294E); + //entity("LeftTee", 0x22A3); + //entity("LeftTeeArrow", 0x21A4); + //entity("LeftTeeVector", 0x295A); + //entity("leftthreetimes", 0x22CB); + //entity("LeftTriangle", 0x22B2); + //entity("LeftTriangleBar", 0x29CF); + //entity("LeftTriangleEqual", 0x22B4); + //entity("LeftUpDownVector", 0x2951); + //entity("LeftUpTeeVector", 0x2960); + //entity("LeftUpVector", 0x21BF); + //entity("LeftUpVectorBar", 0x2958); + //entity("LeftVector", 0x21BC); + //entity("LeftVectorBar", 0x2952); + //entity("leg", 0x22DA); + //entity("lEg", 0x2A8B); + //entity("leq", 0x2264); + //entity("leqq", 0x2266); + //entity("leqslant", 0x2A7D); + //entity("les", 0x2A7D); + //entity("lescc", 0x2AA8); + //entity("lesdot", 0x2A7F); + //entity("lesdoto", 0x2A81); + //entity("lesdotor", 0x2A83); + //entity("lesges", 0x2A93); + //entity("lessapprox", 0x2A85); + //entity("lessdot", 0x22D6); + //entity("lesseqgtr", 0x22DA); + //entity("lesseqqgtr", 0x2A8B); + //entity("LessEqualGreater", 0x22DA); + //entity("LessFullEqual", 0x2266); + //entity("LessGreater", 0x2276); + //entity("lessgtr", 0x2276); + //entity("LessLess", 0x2AA1); + //entity("lesssim", 0x2272); + //entity("LessSlantEqual", 0x2A7D); + //entity("LessTilde", 0x2272); + //entity("lfisht", 0x297C); + //entity("lfloor", 0x230A); + //entity("lfr", 0x1D529); + //entity("Lfr", 0x1D50F); + //entity("lg", 0x2276); + //entity("lgE", 0x2A91); + //entity("lgr", 0x03BB); + //entity("Lgr", 0x039B); + //entity("lHar", 0x2962); + //entity("lhard", 0x21BD); + //entity("lharu", 0x21BC); + //entity("lharul", 0x296A); + //entity("lhblk", 0x2584); + //entity("ljcy", 0x0459); + //entity("LJcy", 0x0409); + //entity("ll", 0x226A); + //entity("Ll", 0x22D8); + //entity("llarr", 0x21C7); + //entity("llcorner", 0x231E); + //entity("Lleftarrow", 0x21DA); + //entity("llhard", 0x296B); + //entity("lltri", 0x25FA); + //entity("lmidot", 0x0140); + //entity("Lmidot", 0x013F); + //entity("lmoust", 0x23B0); + //entity("lmoustache", 0x23B0); + //entity("lnap", 0x2A89); + //entity("lnapprox", 0x2A89); + //entity("lne", 0x2A87); + //entity("lnE", 0x2268); + //entity("lneq", 0x2A87); + //entity("lneqq", 0x2268); + //entity("lnsim", 0x22E6); + //entity("loang", 0x27EC); + //entity("loarr", 0x21FD); + //entity("lobrk", 0x27E6); + //entity("longleftarrow", 0x27F5); + //entity("Longleftarrow", 0x27F8); + //entity("longleftrightarrow", 0x27F7); + //entity("Longleftrightarrow", 0x27FA); + //entity("longmapsto", 0x27FC); + //entity("longrightarrow", 0x27F6); + //entity("Longrightarrow", 0x27F9); + //entity("looparrowleft", 0x21AB); + //entity("looparrowright", 0x21AC); + //entity("lopar", 0x2985); + //entity("lopf", 0x1D55D); + //entity("Lopf", 0x1D543); + //entity("loplus", 0x2A2D); + //entity("lotimes", 0x2A34); + //entity("lowast", 0x2217); + entity("lowbar", 0x005F); + //entity("LowerLeftArrow", 0x2199); + //entity("LowerRightArrow", 0x2198); + //entity("loz", 0x25CA); + //entity("lozenge", 0x25CA); + //entity("lozf", 0x29EB); + entity("lpar", 0x0028); + //entity("lparlt", 0x2993); + //entity("lrarr", 0x21C6); + //entity("lrcorner", 0x231F); + //entity("lrhar", 0x21CB); + //entity("lrhard", 0x296D); + //entity("lrm", 0x200E); + //entity("lrtri", 0x22BF); + //entity("lsaquo", 0x2039); + //entity("lscr", 0x1D4C1); + //entity("Lscr", 0x2112); + //entity("lsh", 0x21B0); + //entity("lsim", 0x2272); + //entity("lsime", 0x2A8D); + //entity("lsimg", 0x2A8F); + entity("lsqb", 0x005B); + //entity("lsquo", 0x2018); + //entity("lsquor", 0x201A); + //entity("lstrok", 0x0142); + //entity("Lstrok", 0x0141); + entity("lt", 0x003C); + //entity("Lt", 0x226A); + //entity("ltcc", 0x2AA6); + //entity("ltcir", 0x2A79); + //entity("ltdot", 0x22D6); + //entity("lthree", 0x22CB); + //entity("ltimes", 0x22C9); + //entity("ltlarr", 0x2976); + //entity("ltquest", 0x2A7B); + //entity("ltri", 0x25C3); + //entity("ltrie", 0x22B4); + //entity("ltrif", 0x25C2); + //entity("ltrPar", 0x2996); + //entity("lurdshar", 0x294A); + //entity("luruhar", 0x2966); + entity("macr", 0x00AF); + //entity("male", 0x2642); + //entity("malt", 0x2720); + //entity("maltese", 0x2720); + //entity("map", 0x21A6); + //entity("Map", 0x2905); + //entity("mapsto", 0x21A6); + //entity("mapstodown", 0x21A7); + //entity("mapstoleft", 0x21A4); + //entity("mapstoup", 0x21A5); + //entity("marker", 0x25AE); + //entity("mcomma", 0x2A29); + //entity("mcy", 0x043C); + //entity("Mcy", 0x041C); + //entity("mdash", 0x2014); + //entity("mDDot", 0x223A); + //entity("measuredangle", 0x2221); + //entity("MediumSpace", 0x205F); + //entity("Mellintrf", 0x2133); + //entity("mfr", 0x1D52A); + //entity("Mfr", 0x1D510); + //entity("mgr", 0x03BC); + //entity("Mgr", 0x039C); + //entity("mho", 0x2127); + entity("micro", 0x00B5); + //entity("mid", 0x2223); + //entity("midast", 0x002A); + //entity("midcir", 0x2AF0); + entity("middot", 0x00B7); + //entity("minus", 0x2212); + //entity("minusb", 0x229F); + //entity("minusd", 0x2238); + //entity("minusdu", 0x2A2A); + //entity("MinusPlus", 0x2213); + //entity("mlcp", 0x2ADB); + //entity("mldr", 0x2026); + //entity("mnplus", 0x2213); + //entity("models", 0x22A7); + //entity("mopf", 0x1D55E); + //entity("Mopf", 0x1D544); + //entity("mp", 0x2213); + //entity("mscr", 0x1D4C2); + //entity("Mscr", 0x2133); + //entity("mstpos", 0x223E); + //entity("mu", 0x03BC); + //entity("Mu", 0x039C); + //entity("multimap", 0x22B8); + //entity("mumap", 0x22B8); + //entity("nabla", 0x2207); + //entity("nacute", 0x0144); + //entity("Nacute", 0x0143); + //entity("nap", 0x2249); + //entity("napos", 0x0149); + //entity("napprox", 0x2249); + //entity("natur", 0x266E); + //entity("natural", 0x266E); + //entity("naturals", 0x2115); + entity("nbsp", 0x00A0); + //entity("ncap", 0x2A43); + //entity("ncaron", 0x0148); + //entity("Ncaron", 0x0147); + //entity("ncedil", 0x0146); + //entity("Ncedil", 0x0145); + //entity("ncong", 0x2247); + //entity("ncup", 0x2A42); + //entity("ncy", 0x043D); + //entity("Ncy", 0x041D); + //entity("ndash", 0x2013); + //entity("ne", 0x2260); + //entity("nearhk", 0x2924); + //entity("nearr", 0x2197); + //entity("neArr", 0x21D7); + //entity("nearrow", 0x2197); + //entity("NegativeMediumSpace", 0x200B); + //entity("NegativeThickSpace", 0x200B); + //entity("NegativeThinSpace", 0x200B); + //entity("NegativeVeryThinSpace", 0x200B); + //entity("nequiv", 0x2262); + //entity("nesear", 0x2928); + //entity("NestedGreaterGreater", 0x226B); + //entity("NestedLessLess", 0x226A); + entity("NewLine", 0x000A); + //entity("nexist", 0x2204); + //entity("nexists", 0x2204); + //entity("nfr", 0x1D52B); + //entity("Nfr", 0x1D511); + //entity("nge", 0x2271); + //entity("ngeq", 0x2271); + //entity("ngr", 0x03BD); + //entity("Ngr", 0x039D); + //entity("ngsim", 0x2275); + //entity("ngt", 0x226F); + //entity("ngtr", 0x226F); + //entity("nharr", 0x21AE); + //entity("nhArr", 0x21CE); + //entity("nhpar", 0x2AF2); + //entity("ni", 0x220B); + //entity("nis", 0x22FC); + //entity("nisd", 0x22FA); + //entity("niv", 0x220B); + //entity("njcy", 0x045A); + //entity("NJcy", 0x040A); + //entity("nlarr", 0x219A); + //entity("nlArr", 0x21CD); + //entity("nldr", 0x2025); + //entity("nle", 0x2270); + //entity("nleftarrow", 0x219A); + //entity("nLeftarrow", 0x21CD); + //entity("nleftrightarrow", 0x21AE); + //entity("nLeftrightarrow", 0x21CE); + //entity("nleq", 0x2270); + //entity("nless", 0x226E); + //entity("nlsim", 0x2274); + //entity("nlt", 0x226E); + //entity("nltri", 0x22EA); + //entity("nltrie", 0x22EC); + //entity("nmid", 0x2224); + //entity("NoBreak", 0x2060); + entity("NonBreakingSpace", 0x00A0); + //entity("nopf", 0x1D55F); + //entity("Nopf", 0x2115); + entity("not", 0x00AC); + //entity("Not", 0x2AEC); + //entity("NotCongruent", 0x2262); + //entity("NotCupCap", 0x226D); + //entity("NotDoubleVerticalBar", 0x2226); + //entity("NotElement", 0x2209); + //entity("NotEqual", 0x2260); + //entity("NotExists", 0x2204); + //entity("NotGreater", 0x226F); + //entity("NotGreaterEqual", 0x2271); + //entity("NotGreaterLess", 0x2279); + //entity("NotGreaterTilde", 0x2275); + //entity("notin", 0x2209); + //entity("notinva", 0x2209); + //entity("notinvb", 0x22F7); + //entity("notinvc", 0x22F6); + //entity("NotLeftTriangle", 0x22EA); + //entity("NotLeftTriangleEqual", 0x22EC); + //entity("NotLess", 0x226E); + //entity("NotLessEqual", 0x2270); + //entity("NotLessGreater", 0x2278); + //entity("NotLessTilde", 0x2274); + //entity("notni", 0x220C); + //entity("notniva", 0x220C); + //entity("notnivb", 0x22FE); + //entity("notnivc", 0x22FD); + //entity("NotPrecedes", 0x2280); + //entity("NotPrecedesSlantEqual", 0x22E0); + //entity("NotReverseElement", 0x220C); + //entity("NotRightTriangle", 0x22EB); + //entity("NotRightTriangleEqual", 0x22ED); + //entity("NotSquareSubsetEqual", 0x22E2); + //entity("NotSquareSupersetEqual", 0x22E3); + //entity("NotSubsetEqual", 0x2288); + //entity("NotSucceeds", 0x2281); + //entity("NotSucceedsSlantEqual", 0x22E1); + //entity("NotSupersetEqual", 0x2289); + //entity("NotTilde", 0x2241); + //entity("NotTildeEqual", 0x2244); + //entity("NotTildeFullEqual", 0x2247); + //entity("NotTildeTilde", 0x2249); + //entity("NotVerticalBar", 0x2224); + //entity("npar", 0x2226); + //entity("nparallel", 0x2226); + //entity("npolint", 0x2A14); + //entity("npr", 0x2280); + //entity("nprcue", 0x22E0); + //entity("nprec", 0x2280); + //entity("nrarr", 0x219B); + //entity("nrArr", 0x21CF); + //entity("nrightarrow", 0x219B); + //entity("nRightarrow", 0x21CF); + //entity("nrtri", 0x22EB); + //entity("nrtrie", 0x22ED); + //entity("nsc", 0x2281); + //entity("nsccue", 0x22E1); + //entity("nscr", 0x1D4C3); + //entity("Nscr", 0x1D4A9); + //entity("nshortmid", 0x2224); + //entity("nshortparallel", 0x2226); + //entity("nsim", 0x2241); + //entity("nsime", 0x2244); + //entity("nsimeq", 0x2244); + //entity("nsmid", 0x2224); + //entity("nspar", 0x2226); + //entity("nsqsube", 0x22E2); + //entity("nsqsupe", 0x22E3); + //entity("nsub", 0x2284); + //entity("nsube", 0x2288); + //entity("nsubseteq", 0x2288); + //entity("nsucc", 0x2281); + //entity("nsup", 0x2285); + //entity("nsupe", 0x2289); + //entity("nsupseteq", 0x2289); + //entity("ntgl", 0x2279); + entity("ntilde", 0x00F1); + entity("Ntilde", 0x00D1); + //entity("ntlg", 0x2278); + //entity("ntriangleleft", 0x22EA); + //entity("ntrianglelefteq", 0x22EC); + //entity("ntriangleright", 0x22EB); + //entity("ntrianglerighteq", 0x22ED); + //entity("nu", 0x03BD); + //entity("Nu", 0x039D); + entity("num", 0x0023); + //entity("numero", 0x2116); + //entity("numsp", 0x2007); + //entity("nvdash", 0x22AC); + //entity("nvDash", 0x22AD); + //entity("nVdash", 0x22AE); + //entity("nVDash", 0x22AF); + //entity("nvHarr", 0x2904); + //entity("nvinfin", 0x29DE); + //entity("nvlArr", 0x2902); + //entity("nvrArr", 0x2903); + //entity("nwarhk", 0x2923); + //entity("nwarr", 0x2196); + //entity("nwArr", 0x21D6); + //entity("nwarrow", 0x2196); + //entity("nwnear", 0x2927); + //entity("oacgr", 0x03CC); + //entity("Oacgr", 0x038C); + entity("oacute", 0x00F3); + entity("Oacute", 0x00D3); + //entity("oast", 0x229B); + //entity("ocir", 0x229A); + entity("ocirc", 0x00F4); + entity("Ocirc", 0x00D4); + //entity("ocy", 0x043E); + //entity("Ocy", 0x041E); + //entity("odash", 0x229D); + //entity("odblac", 0x0151); + //entity("Odblac", 0x0150); + //entity("odiv", 0x2A38); + //entity("odot", 0x2299); + //entity("odsold", 0x29BC); + //entity("oelig", 0x0153); + //entity("OElig", 0x0152); + //entity("ofcir", 0x29BF); + //entity("ofr", 0x1D52C); + //entity("Ofr", 0x1D512); + //entity("ogon", 0x02DB); + //entity("ogr", 0x03BF); + //entity("Ogr", 0x039F); + entity("ograve", 0x00F2); + entity("Ograve", 0x00D2); + //entity("ogt", 0x29C1); + //entity("ohacgr", 0x03CE); + //entity("OHacgr", 0x038F); + //entity("ohbar", 0x29B5); + //entity("ohgr", 0x03C9); + //entity("OHgr", 0x03A9); + //entity("ohm", 0x2126); + //entity("oint", 0x222E); + //entity("olarr", 0x21BA); + //entity("olcir", 0x29BE); + //entity("olcross", 0x29BB); + //entity("oline", 0x203E); + //entity("olt", 0x29C0); + //entity("omacr", 0x014D); + //entity("Omacr", 0x014C); + //entity("omega", 0x03C9); + //entity("Omega", 0x03A9); + //entity("omicron", 0x03BF); + //entity("Omicron", 0x039F); + //entity("omid", 0x29B6); + //entity("ominus", 0x2296); + //entity("oopf", 0x1D560); + //entity("Oopf", 0x1D546); + //entity("opar", 0x29B7); + //entity("OpenCurlyDoubleQuote", 0x201C); + //entity("OpenCurlyQuote", 0x2018); + //entity("operp", 0x29B9); + //entity("oplus", 0x2295); + //entity("or", 0x2228); + //entity("Or", 0x2A54); + //entity("orarr", 0x21BB); + //entity("ord", 0x2A5D); + //entity("order", 0x2134); + //entity("orderof", 0x2134); + entity("ordf", 0x00AA); + entity("ordm", 0x00BA); + //entity("origof", 0x22B6); + //entity("oror", 0x2A56); + //entity("orslope", 0x2A57); + //entity("orv", 0x2A5B); + //entity("oS", 0x24C8); + //entity("oscr", 0x2134); + //entity("Oscr", 0x1D4AA); + entity("oslash", 0x00F8); + entity("Oslash", 0x00D8); + //entity("osol", 0x2298); + entity("otilde", 0x00F5); + entity("Otilde", 0x00D5); + //entity("otimes", 0x2297); + //entity("Otimes", 0x2A37); + //entity("otimesas", 0x2A36); + entity("ouml", 0x00F6); + entity("Ouml", 0x00D6); + //entity("ovbar", 0x233D); + entity("OverBar", 0x00AF); + //entity("OverBrace", 0xFE37); + //entity("OverBracket", 0x23B4); + //entity("OverParenthesis", 0xFE35); + //entity("par", 0x2225); + entity("para", 0x00B6); + //entity("parallel", 0x2225); + //entity("parsim", 0x2AF3); + //entity("parsl", 0x2AFD); + //entity("part", 0x2202); + //entity("PartialD", 0x2202); + //entity("pcy", 0x043F); + //entity("Pcy", 0x041F); + entity("percnt", 0x0025); + entity("period", 0x002E); + //entity("permil", 0x2030); + //entity("perp", 0x22A5); + //entity("pertenk", 0x2031); + //entity("pfr", 0x1D52D); + //entity("Pfr", 0x1D513); + //entity("pgr", 0x03C0); + //entity("Pgr", 0x03A0); + //entity("phgr", 0x03C6); + //entity("PHgr", 0x03A6); + //entity("phi", 0x03D5); + //entity("Phi", 0x03A6); + //entity("phiv", 0x03C6); + //entity("phmmat", 0x2133); + //entity("phone", 0x260E); + //entity("pi", 0x03C0); + //entity("Pi", 0x03A0); + //entity("pitchfork", 0x22D4); + //entity("piv", 0x03D6); + //entity("planck", 0x210F); + //entity("planckh", 0x210E); + //entity("plankv", 0x210F); + entity("plus", 0x002B); + //entity("plusacir", 0x2A23); + //entity("plusb", 0x229E); + //entity("pluscir", 0x2A22); + //entity("plusdo", 0x2214); + //entity("plusdu", 0x2A25); + //entity("pluse", 0x2A72); + entity("PlusMinus", 0x00B1); + entity("plusmn", 0x00B1); + //entity("plussim", 0x2A26); + //entity("plustwo", 0x2A27); + entity("pm", 0x00B1); + //entity("Poincareplane", 0x210C); + //entity("pointint", 0x2A15); + //entity("popf", 0x1D561); + //entity("Popf", 0x2119); + entity("pound", 0x00A3); + //entity("pr", 0x227A); + //entity("Pr", 0x2ABB); + //entity("prap", 0x2AB7); + //entity("prcue", 0x227C); + //entity("pre", 0x2AAF); + //entity("prE", 0x2AB3); + //entity("prec", 0x227A); + //entity("precapprox", 0x2AB7); + //entity("preccurlyeq", 0x227C); + //entity("Precedes", 0x227A); + //entity("PrecedesEqual", 0x2AAF); + //entity("PrecedesSlantEqual", 0x227C); + //entity("PrecedesTilde", 0x227E); + //entity("preceq", 0x2AAF); + //entity("precnapprox", 0x2AB9); + //entity("precneqq", 0x2AB5); + //entity("precnsim", 0x22E8); + //entity("precsim", 0x227E); + //entity("prime", 0x2032); + //entity("Prime", 0x2033); + //entity("primes", 0x2119); + //entity("prnap", 0x2AB9); + //entity("prnE", 0x2AB5); + //entity("prnsim", 0x22E8); + //entity("prod", 0x220F); + //entity("Product", 0x220F); + //entity("profalar", 0x232E); + //entity("profline", 0x2312); + //entity("profsurf", 0x2313); + //entity("prop", 0x221D); + //entity("Proportion", 0x2237); + //entity("Proportional", 0x221D); + //entity("propto", 0x221D); + //entity("prsim", 0x227E); + //entity("prurel", 0x22B0); + //entity("pscr", 0x1D4C5); + //entity("Pscr", 0x1D4AB); + //entity("psgr", 0x03C8); + //entity("PSgr", 0x03A8); + //entity("psi", 0x03C8); + //entity("Psi", 0x03A8); + //entity("puncsp", 0x2008); + //entity("qfr", 0x1D52E); + //entity("Qfr", 0x1D514); + //entity("qint", 0x2A0C); + //entity("qopf", 0x1D562); + //entity("Qopf", 0x211A); + //entity("qprime", 0x2057); + //entity("qscr", 0x1D4C6); + //entity("Qscr", 0x1D4AC); + //entity("quaternions", 0x210D); + //entity("quatint", 0x2A16); + entity("quest", 0x003F); + //entity("questeq", 0x225F); + entity("quot", 0x0022); + //entity("rAarr", 0x21DB); + //entity("race", 0x29DA); + //entity("racute", 0x0155); + //entity("Racute", 0x0154); + //entity("radic", 0x221A); + //entity("raemptyv", 0x29B3); + //entity("rang", 0x232A); + //entity("Rang", 0x27EB); + //entity("rangd", 0x2992); + //entity("range", 0x29A5); + //entity("rangle", 0x232A); + entity("raquo", 0x00BB); + //entity("rarr", 0x2192); + //entity("rArr", 0x21D2); + //entity("Rarr", 0x21A0); + //entity("rarrap", 0x2975); + //entity("rarrb", 0x21E5); + //entity("rarrbfs", 0x2920); + //entity("rarrc", 0x2933); + //entity("rarrfs", 0x291E); + //entity("rarrhk", 0x21AA); + //entity("rarrlp", 0x21AC); + //entity("rarrpl", 0x2945); + //entity("rarrsim", 0x2974); + //entity("rarrtl", 0x21A3); + //entity("Rarrtl", 0x2916); + //entity("rarrw", 0x219D); + //entity("ratail", 0x291A); + //entity("rAtail", 0x291C); + //entity("ratio", 0x2236); + //entity("rationals", 0x211A); + //entity("rbarr", 0x290D); + //entity("rBarr", 0x290F); + //entity("RBarr", 0x2910); + //entity("rbbrk", 0x2998); + entity("rbrace", 0x007D); + entity("rbrack", 0x005D); + //entity("rbrke", 0x298C); + //entity("rbrksld", 0x298E); + //entity("rbrkslu", 0x2990); + //entity("rcaron", 0x0159); + //entity("Rcaron", 0x0158); + //entity("rcedil", 0x0157); + //entity("Rcedil", 0x0156); + //entity("rceil", 0x2309); + entity("rcub", 0x007D); + //entity("rcy", 0x0440); + //entity("Rcy", 0x0420); + //entity("rdca", 0x2937); + //entity("rdldhar", 0x2969); + //entity("rdquo", 0x201D); + //entity("rdquor", 0x201D); + //entity("rdsh", 0x21B3); + //entity("Re", 0x211C); + //entity("real", 0x211C); + //entity("realine", 0x211B); + //entity("realpart", 0x211C); + //entity("reals", 0x211D); + //entity("rect", 0x25AD); + entity("reg", 0x00AE); + //entity("ReverseElement", 0x220B); + //entity("ReverseEquilibrium", 0x21CB); + //entity("ReverseUpEquilibrium", 0x296F); + //entity("rfisht", 0x297D); + //entity("rfloor", 0x230B); + //entity("rfr", 0x1D52F); + //entity("Rfr", 0x211C); + //entity("rgr", 0x03C1); + //entity("Rgr", 0x03A1); + //entity("rHar", 0x2964); + //entity("rhard", 0x21C1); + //entity("rharu", 0x21C0); + //entity("rharul", 0x296C); + //entity("rho", 0x03C1); + //entity("Rho", 0x03A1); + //entity("rhov", 0x03F1); + //entity("RightAngleBracket", 0x232A); + //entity("rightarrow", 0x2192); + //entity("Rightarrow", 0x21D2); + //entity("RightArrowBar", 0x21E5); + //entity("RightArrowLeftArrow", 0x21C4); + //entity("rightarrowtail", 0x21A3); + //entity("RightCeiling", 0x2309); + //entity("RightDoubleBracket", 0x27E7); + //entity("RightDownTeeVector", 0x295D); + //entity("RightDownVector", 0x21C2); + //entity("RightDownVectorBar", 0x2955); + //entity("RightFloor", 0x230B); + //entity("rightharpoondown", 0x21C1); + //entity("rightharpoonup", 0x21C0); + //entity("rightleftarrows", 0x21C4); + //entity("rightleftharpoons", 0x21CC); + //entity("rightrightarrows", 0x21C9); + //entity("rightsquigarrow", 0x219D); + //entity("RightTee", 0x22A2); + //entity("RightTeeArrow", 0x21A6); + //entity("RightTeeVector", 0x295B); + //entity("rightthreetimes", 0x22CC); + //entity("RightTriangle", 0x22B3); + //entity("RightTriangleBar", 0x29D0); + //entity("RightTriangleEqual", 0x22B5); + //entity("RightUpDownVector", 0x294F); + //entity("RightUpTeeVector", 0x295C); + //entity("RightUpVector", 0x21BE); + //entity("RightUpVectorBar", 0x2954); + //entity("RightVector", 0x21C0); + //entity("RightVectorBar", 0x2953); + //entity("ring", 0x02DA); + //entity("risingdotseq", 0x2253); + //entity("rlarr", 0x21C4); + //entity("rlhar", 0x21CC); + //entity("rlm", 0x200F); + //entity("rmoust", 0x23B1); + //entity("rmoustache", 0x23B1); + //entity("rnmid", 0x2AEE); + //entity("roang", 0x27ED); + //entity("roarr", 0x21FE); + //entity("robrk", 0x27E7); + //entity("ropar", 0x2986); + //entity("ropf", 0x1D563); + //entity("Ropf", 0x211D); + //entity("roplus", 0x2A2E); + //entity("rotimes", 0x2A35); + //entity("RoundImplies", 0x2970); + entity("rpar", 0x0029); + //entity("rpargt", 0x2994); + //entity("rppolint", 0x2A12); + //entity("rrarr", 0x21C9); + //entity("Rrightarrow", 0x21DB); + //entity("rsaquo", 0x203A); + //entity("rscr", 0x1D4C7); + //entity("Rscr", 0x211B); + //entity("rsh", 0x21B1); + entity("rsqb", 0x005D); + //entity("rsquo", 0x2019); + //entity("rsquor", 0x2019); + //entity("rthree", 0x22CC); + //entity("rtimes", 0x22CA); + //entity("rtri", 0x25B9); + //entity("rtrie", 0x22B5); + //entity("rtrif", 0x25B8); + //entity("rtriltri", 0x29CE); + //entity("RuleDelayed", 0x29F4); + //entity("ruluhar", 0x2968); + //entity("rx", 0x211E); + //entity("sacute", 0x015B); + //entity("Sacute", 0x015A); + //entity("sbquo", 0x201A); + //entity("sc", 0x227B); + //entity("Sc", 0x2ABC); + //entity("scap", 0x2AB8); + //entity("scaron", 0x0161); + //entity("Scaron", 0x0160); + //entity("sccue", 0x227D); + //entity("sce", 0x2AB0); + //entity("scE", 0x2AB4); + //entity("scedil", 0x015F); + //entity("Scedil", 0x015E); + //entity("scirc", 0x015D); + //entity("Scirc", 0x015C); + //entity("scnap", 0x2ABA); + //entity("scnE", 0x2AB6); + //entity("scnsim", 0x22E9); + //entity("scpolint", 0x2A13); + //entity("scsim", 0x227F); + //entity("scy", 0x0441); + //entity("Scy", 0x0421); + //entity("sdot", 0x22C5); + //entity("sdotb", 0x22A1); + //entity("sdote", 0x2A66); + //entity("searhk", 0x2925); + //entity("searr", 0x2198); + //entity("seArr", 0x21D8); + //entity("searrow", 0x2198); + entity("sect", 0x00A7); + entity("semi", 0x003B); + //entity("seswar", 0x2929); + //entity("setminus", 0x2216); + //entity("setmn", 0x2216); + //entity("sext", 0x2736); + //entity("sfgr", 0x03C2); + //entity("sfr", 0x1D530); + //entity("Sfr", 0x1D516); + //entity("sfrown", 0x2322); + //entity("sgr", 0x03C3); + //entity("Sgr", 0x03A3); + //entity("sharp", 0x266F); + //entity("shchcy", 0x0449); + //entity("SHCHcy", 0x0429); + //entity("shcy", 0x0448); + //entity("SHcy", 0x0428); + //entity("ShortDownArrow", 0x2193); + //entity("ShortLeftArrow", 0x2190); + //entity("shortmid", 0x2223); + //entity("shortparallel", 0x2225); + //entity("ShortRightArrow", 0x2192); + //entity("ShortUpArrow", 0x2191); + entity("shy", 0x00AD); + //entity("sigma", 0x03C3); + //entity("Sigma", 0x03A3); + //entity("sigmaf", 0x03C2); + //entity("sigmav", 0x03C2); + //entity("sim", 0x223C); + //entity("simdot", 0x2A6A); + //entity("sime", 0x2243); + //entity("simeq", 0x2243); + //entity("simg", 0x2A9E); + //entity("simgE", 0x2AA0); + //entity("siml", 0x2A9D); + //entity("simlE", 0x2A9F); + //entity("simne", 0x2246); + //entity("simplus", 0x2A24); + //entity("simrarr", 0x2972); + //entity("slarr", 0x2190); + //entity("SmallCircle", 0x2218); + //entity("smallsetminus", 0x2216); + //entity("smashp", 0x2A33); + //entity("smeparsl", 0x29E4); + //entity("smid", 0x2223); + //entity("smile", 0x2323); + //entity("smt", 0x2AAA); + //entity("smte", 0x2AAC); + //entity("softcy", 0x044C); + //entity("SOFTcy", 0x042C); + entity("sol", 0x002F); + //entity("solb", 0x29C4); + //entity("solbar", 0x233F); + //entity("sopf", 0x1D564); + //entity("Sopf", 0x1D54A); + //entity("spades", 0x2660); + //entity("spadesuit", 0x2660); + //entity("spar", 0x2225); + //entity("sqcap", 0x2293); + //entity("sqcup", 0x2294); + //entity("Sqrt", 0x221A); + //entity("sqsub", 0x228F); + //entity("sqsube", 0x2291); + //entity("sqsubset", 0x228F); + //entity("sqsubseteq", 0x2291); + //entity("sqsup", 0x2290); + //entity("sqsupe", 0x2292); + //entity("sqsupset", 0x2290); + //entity("sqsupseteq", 0x2292); + //entity("squ", 0x25A1); + //entity("square", 0x25A1); + //entity("SquareIntersection", 0x2293); + //entity("SquareSubset", 0x228F); + //entity("SquareSubsetEqual", 0x2291); + //entity("SquareSuperset", 0x2290); + //entity("SquareSupersetEqual", 0x2292); + //entity("SquareUnion", 0x2294); + //entity("squarf", 0x25AA); + //entity("squf", 0x25AA); + //entity("srarr", 0x2192); + //entity("sscr", 0x1D4C8); + //entity("Sscr", 0x1D4AE); + //entity("ssetmn", 0x2216); + //entity("ssmile", 0x2323); + //entity("sstarf", 0x22C6); + //entity("star", 0x2606); + //entity("Star", 0x22C6); + //entity("starf", 0x2605); + //entity("straightepsilon", 0x03F5); + //entity("straightphi", 0x03D5); + entity("strns", 0x00AF); + //entity("sub", 0x2282); + //entity("Sub", 0x22D0); + //entity("subdot", 0x2ABD); + //entity("sube", 0x2286); + //entity("subE", 0x2AC5); + //entity("subedot", 0x2AC3); + //entity("submult", 0x2AC1); + //entity("subne", 0x228A); + //entity("subnE", 0x2ACB); + //entity("subplus", 0x2ABF); + //entity("subrarr", 0x2979); + //entity("subset", 0x2282); + //entity("Subset", 0x22D0); + //entity("subseteq", 0x2286); + //entity("subseteqq", 0x2AC5); + //entity("SubsetEqual", 0x2286); + //entity("subsetneq", 0x228A); + //entity("subsetneqq", 0x2ACB); + //entity("subsim", 0x2AC7); + //entity("subsub", 0x2AD5); + //entity("subsup", 0x2AD3); + //entity("succ", 0x227B); + //entity("succapprox", 0x2AB8); + //entity("succcurlyeq", 0x227D); + //entity("Succeeds", 0x227B); + //entity("SucceedsEqual", 0x2AB0); + //entity("SucceedsSlantEqual", 0x227D); + //entity("SucceedsTilde", 0x227F); + //entity("succeq", 0x2AB0); + //entity("succnapprox", 0x2ABA); + //entity("succneqq", 0x2AB6); + //entity("succnsim", 0x22E9); + //entity("succsim", 0x227F); + //entity("SuchThat", 0x220B); + //entity("sum", 0x2211); + //entity("sung", 0x266A); + //entity("sup", 0x2283); + //entity("Sup", 0x22D1); + entity("sup1", 0x00B9); + entity("sup2", 0x00B2); + entity("sup3", 0x00B3); + //entity("supdot", 0x2ABE); + //entity("supdsub", 0x2AD8); + //entity("supe", 0x2287); + //entity("supE", 0x2AC6); + //entity("supedot", 0x2AC4); + //entity("Superset", 0x2283); + //entity("SupersetEqual", 0x2287); + //entity("suphsub", 0x2AD7); + //entity("suplarr", 0x297B); + //entity("supmult", 0x2AC2); + //entity("supne", 0x228B); + //entity("supnE", 0x2ACC); + //entity("supplus", 0x2AC0); + //entity("supset", 0x2283); + //entity("Supset", 0x22D1); + //entity("supseteq", 0x2287); + //entity("supseteqq", 0x2AC6); + //entity("supsetneq", 0x228B); + //entity("supsetneqq", 0x2ACC); + //entity("supsim", 0x2AC8); + //entity("supsub", 0x2AD4); + //entity("supsup", 0x2AD6); + //entity("swarhk", 0x2926); + //entity("swarr", 0x2199); + //entity("swArr", 0x21D9); + //entity("swarrow", 0x2199); + //entity("swnwar", 0x292A); + entity("szlig", 0x00DF); + entity("Tab", 0x0009); + //entity("target", 0x2316); + //entity("tau", 0x03C4); + //entity("Tau", 0x03A4); + //entity("tbrk", 0x23B4); + //entity("tcaron", 0x0165); + //entity("Tcaron", 0x0164); + //entity("tcedil", 0x0163); + //entity("Tcedil", 0x0162); + //entity("tcy", 0x0442); + //entity("Tcy", 0x0422); + //entity("telrec", 0x2315); + //entity("tfr", 0x1D531); + //entity("Tfr", 0x1D517); + //entity("tgr", 0x03C4); + //entity("Tgr", 0x03A4); + //entity("there4", 0x2234); + //entity("therefore", 0x2234); + //entity("theta", 0x03B8); + //entity("Theta", 0x0398); + //entity("thetasym", 0x03D1); + //entity("thetav", 0x03D1); + //entity("thgr", 0x03B8); + //entity("THgr", 0x0398); + //entity("thickapprox", 0x2248); + //entity("thicksim", 0x223C); + //entity("thinsp", 0x2009); + //entity("ThinSpace", 0x2009); + //entity("thkap", 0x2248); + //entity("thksim", 0x223C); + entity("thorn", 0x00FE); + entity("THORN", 0x00DE); + //entity("tilde", 0x02DC); + //entity("Tilde", 0x223C); + //entity("TildeEqual", 0x2243); + //entity("TildeFullEqual", 0x2245); + //entity("TildeTilde", 0x2248); + entity("times", 0x00D7); + //entity("timesb", 0x22A0); + //entity("timesbar", 0x2A31); + //entity("timesd", 0x2A30); + //entity("tint", 0x222D); + //entity("toea", 0x2928); + //entity("top", 0x22A4); + //entity("topbot", 0x2336); + //entity("topcir", 0x2AF1); + //entity("topf", 0x1D565); + //entity("Topf", 0x1D54B); + //entity("topfork", 0x2ADA); + //entity("tosa", 0x2929); + //entity("tprime", 0x2034); + //entity("trade", 0x2122); + //entity("triangle", 0x25B5); + //entity("triangledown", 0x25BF); + //entity("triangleleft", 0x25C3); + //entity("trianglelefteq", 0x22B4); + //entity("triangleq", 0x225C); + //entity("triangleright", 0x25B9); + //entity("trianglerighteq", 0x22B5); + //entity("tridot", 0x25EC); + //entity("trie", 0x225C); + //entity("triminus", 0x2A3A); + //entity("triplus", 0x2A39); + //entity("trisb", 0x29CD); + //entity("tritime", 0x2A3B); + //entity("trpezium", 0x23E2); + //entity("tscr", 0x1D4C9); + //entity("Tscr", 0x1D4AF); + //entity("tscy", 0x0446); + //entity("TScy", 0x0426); + //entity("tshcy", 0x045B); + //entity("TSHcy", 0x040B); + //entity("tstrok", 0x0167); + //entity("Tstrok", 0x0166); + //entity("twixt", 0x226C); + //entity("twoheadleftarrow", 0x219E); + //entity("twoheadrightarrow", 0x21A0); + //entity("uacgr", 0x03CD); + //entity("Uacgr", 0x038E); + entity("uacute", 0x00FA); + entity("Uacute", 0x00DA); + //entity("uarr", 0x2191); + //entity("uArr", 0x21D1); + //entity("Uarr", 0x219F); + //entity("Uarrocir", 0x2949); + //entity("ubrcy", 0x045E); + //entity("Ubrcy", 0x040E); + //entity("ubreve", 0x016D); + //entity("Ubreve", 0x016C); + entity("ucirc", 0x00FB); + entity("Ucirc", 0x00DB); + //entity("ucy", 0x0443); + //entity("Ucy", 0x0423); + //entity("udarr", 0x21C5); + //entity("udblac", 0x0171); + //entity("Udblac", 0x0170); + //entity("udhar", 0x296E); + //entity("udiagr", 0x03B0); + //entity("udigr", 0x03CB); + //entity("Udigr", 0x03AB); + //entity("ufisht", 0x297E); + //entity("ufr", 0x1D532); + //entity("Ufr", 0x1D518); + //entity("ugr", 0x03C5); + //entity("Ugr", 0x03A5); + entity("ugrave", 0x00F9); + entity("Ugrave", 0x00D9); + //entity("uHar", 0x2963); + //entity("uharl", 0x21BF); + //entity("uharr", 0x21BE); + //entity("uhblk", 0x2580); + //entity("ulcorn", 0x231C); + //entity("ulcorner", 0x231C); + //entity("ulcrop", 0x230F); + //entity("ultri", 0x25F8); + //entity("umacr", 0x016B); + //entity("Umacr", 0x016A); + entity("uml", 0x00A8); + //entity("UnderBrace", 0xFE38); + //entity("UnderBracket", 0x23B5); + //entity("UnderParenthesis", 0xFE36); + //entity("Union", 0x22C3); + //entity("UnionPlus", 0x228E); + //entity("uogon", 0x0173); + //entity("Uogon", 0x0172); + //entity("uopf", 0x1D566); + //entity("Uopf", 0x1D54C); + //entity("uparrow", 0x2191); + //entity("Uparrow", 0x21D1); + //entity("UpArrowBar", 0x2912); + //entity("UpArrowDownArrow", 0x21C5); + //entity("updownarrow", 0x2195); + //entity("Updownarrow", 0x21D5); + //entity("UpEquilibrium", 0x296E); + //entity("upharpoonleft", 0x21BF); + //entity("upharpoonright", 0x21BE); + //entity("uplus", 0x228E); + //entity("UpperLeftArrow", 0x2196); + //entity("UpperRightArrow", 0x2197); + //entity("upsi", 0x03C5); + //entity("Upsi", 0x03D2); + //entity("upsih", 0x03D2); + //entity("upsilon", 0x03C5); + //entity("Upsilon", 0x03A5); + //entity("UpTee", 0x22A5); + //entity("UpTeeArrow", 0x21A5); + //entity("upuparrows", 0x21C8); + //entity("urcorn", 0x231D); + //entity("urcorner", 0x231D); + //entity("urcrop", 0x230E); + //entity("uring", 0x016F); + //entity("Uring", 0x016E); + //entity("urtri", 0x25F9); + //entity("uscr", 0x1D4CA); + //entity("Uscr", 0x1D4B0); + //entity("utdot", 0x22F0); + //entity("utilde", 0x0169); + //entity("Utilde", 0x0168); + //entity("utri", 0x25B5); + //entity("utrif", 0x25B4); + //entity("uuarr", 0x21C8); + entity("uuml", 0x00FC); + entity("Uuml", 0x00DC); + //entity("uwangle", 0x29A7); + //entity("vangrt", 0x299C); + //entity("varepsilon", 0x03B5); + //entity("varkappa", 0x03F0); + //entity("varnothing", 0x2205); + //entity("varphi", 0x03C6); + //entity("varpi", 0x03D6); + //entity("varpropto", 0x221D); + //entity("varr", 0x2195); + //entity("vArr", 0x21D5); + //entity("varrho", 0x03F1); + //entity("varsigma", 0x03C2); + //entity("vartheta", 0x03D1); + //entity("vartriangleleft", 0x22B2); + //entity("vartriangleright", 0x22B3); + //entity("vBar", 0x2AE8); + //entity("Vbar", 0x2AEB); + //entity("vBarv", 0x2AE9); + //entity("vcy", 0x0432); + //entity("Vcy", 0x0412); + //entity("vdash", 0x22A2); + //entity("vDash", 0x22A8); + //entity("Vdash", 0x22A9); + //entity("VDash", 0x22AB); + //entity("Vdashl", 0x2AE6); + //entity("vee", 0x2228); + //entity("Vee", 0x22C1); + //entity("veebar", 0x22BB); + //entity("veeeq", 0x225A); + //entity("vellip", 0x22EE); + entity("verbar", 0x007C); + //entity("Verbar", 0x2016); + entity("vert", 0x007C); + //entity("Vert", 0x2016); + //entity("VerticalBar", 0x2223); + entity("VerticalLine", 0x007C); + //entity("VerticalSeparator", 0x2758); + //entity("VerticalTilde", 0x2240); + //entity("VeryThinSpace", 0x200A); + //entity("vfr", 0x1D533); + //entity("Vfr", 0x1D519); + //entity("vltri", 0x22B2); + //entity("vopf", 0x1D567); + //entity("Vopf", 0x1D54D); + //entity("vprop", 0x221D); + //entity("vrtri", 0x22B3); + //entity("vscr", 0x1D4CB); + //entity("Vscr", 0x1D4B1); + //entity("Vvdash", 0x22AA); + //entity("vzigzag", 0x299A); + //entity("wcirc", 0x0175); + //entity("Wcirc", 0x0174); + //entity("wedbar", 0x2A5F); + //entity("wedge", 0x2227); + //entity("Wedge", 0x22C0); + //entity("wedgeq", 0x2259); + //entity("weierp", 0x2118); + //entity("wfr", 0x1D534); + //entity("Wfr", 0x1D51A); + //entity("wopf", 0x1D568); + //entity("Wopf", 0x1D54E); + //entity("wp", 0x2118); + //entity("wr", 0x2240); + //entity("wreath", 0x2240); + //entity("wscr", 0x1D4CC); + //entity("Wscr", 0x1D4B2); + //entity("xcap", 0x22C2); + //entity("xcirc", 0x25EF); + //entity("xcup", 0x22C3); + //entity("xdtri", 0x25BD); + //entity("xfr", 0x1D535); + //entity("Xfr", 0x1D51B); + //entity("xgr", 0x03BE); + //entity("Xgr", 0x039E); + //entity("xharr", 0x27F7); + //entity("xhArr", 0x27FA); + //entity("xi", 0x03BE); + //entity("Xi", 0x039E); + //entity("xlarr", 0x27F5); + //entity("xlArr", 0x27F8); + //entity("xmap", 0x27FC); + //entity("xnis", 0x22FB); + //entity("xodot", 0x2A00); + //entity("xopf", 0x1D569); + //entity("Xopf", 0x1D54F); + //entity("xoplus", 0x2A01); + //entity("xotime", 0x2A02); + //entity("xrarr", 0x27F6); + //entity("xrArr", 0x27F9); + //entity("xscr", 0x1D4CD); + //entity("Xscr", 0x1D4B3); + //entity("xsqcup", 0x2A06); + //entity("xuplus", 0x2A04); + //entity("xutri", 0x25B3); + //entity("xvee", 0x22C1); + //entity("xwedge", 0x22C0); + entity("yacute", 0x00FD); + entity("Yacute", 0x00DD); + //entity("yacy", 0x044F); + //entity("YAcy", 0x042F); + //entity("ycirc", 0x0177); + //entity("Ycirc", 0x0176); + //entity("ycy", 0x044B); + //entity("Ycy", 0x042B); + entity("yen", 0x00A5); + //entity("yfr", 0x1D536); + //entity("Yfr", 0x1D51C); + //entity("yicy", 0x0457); + //entity("YIcy", 0x0407); + //entity("yopf", 0x1D56A); + //entity("Yopf", 0x1D550); + //entity("yscr", 0x1D4CE); + //entity("Yscr", 0x1D4B4); + //entity("yucy", 0x044E); + //entity("YUcy", 0x042E); + entity("yuml", 0x00FF); + //entity("Yuml", 0x0178); + //entity("zacute", 0x017A); + //entity("Zacute", 0x0179); + //entity("zcaron", 0x017E); + //entity("Zcaron", 0x017D); + //entity("zcy", 0x0437); + //entity("Zcy", 0x0417); + //entity("zdot", 0x017C); + //entity("Zdot", 0x017B); + //entity("zeetrf", 0x2128); + //entity("ZeroWidthSpace", 0x200B); + //entity("zeta", 0x03B6); + //entity("Zeta", 0x0396); + //entity("zfr", 0x1D537); + //entity("Zfr", 0x2128); + //entity("zgr", 0x03B6); + //entity("Zgr", 0x0396); + //entity("zhcy", 0x0436); + //entity("ZHcy", 0x0416); + //entity("zigrarr", 0x21DD); + //entity("zopf", 0x1D56B); + //entity("Zopf", 0x2124); + //entity("zscr", 0x1D4CF); + //entity("Zscr", 0x1D4B5); + //entity("zwj", 0x200D); + //entity("zwnj", 0x200C); + } // entities + + HTMLSchema(const HTMLSchema&); + HTMLSchema& operator=(const HTMLSchema&); + bool operator==(const HTMLSchema&) const; +}; // class HTMLSchema + +} // namespace SAX + +} // namespace Arabica +#endif + diff --git a/include/XML/XMLCharacterClasses.hpp b/include/XML/XMLCharacterClasses.hpp index 90967a96..f78bdfd0 100644 --- a/include/XML/XMLCharacterClasses.hpp +++ b/include/XML/XMLCharacterClasses.hpp @@ -19,6 +19,7 @@ namespace XML bool is_digit(wchar_t c); bool is_combining_char(wchar_t c); bool is_extender(wchar_t c); + bool is_letter_or_digit(wchar_t c); } // namespace XML } // namespace Arabica diff --git a/src/XML/XMLCharacterClasses.cpp b/src/XML/XMLCharacterClasses.cpp index 51de0b3d..ec8a8f76 100644 --- a/src/XML/XMLCharacterClasses.cpp +++ b/src/XML/XMLCharacterClasses.cpp @@ -268,4 +268,10 @@ bool Arabica::XML::is_extender(wchar_t c) ((c >= 0x30FC) && (c <= 0x30FE)); } // is_extender +bool Arabica::XML::is_letter_or_digit(wchar_t c) +{ + return is_letter(c) || is_digit(c); +} // is_letter_or_digit + + // end of file