diff --git a/DOM/Utils/Stream.h b/DOM/Utils/Stream.h new file mode 100644 index 00000000..81456c96 --- /dev/null +++ b/DOM/Utils/Stream.h @@ -0,0 +1,209 @@ +#ifndef ARABICA_DOM_UTILS_STREAM_H +#define ARABICA_DOM_UTILS_STREAM_H +/////////////////////////////////////////////////////////////////////// +// +// DOM/Utils/Stream.h +// +// Written by Jez Higgins +// Copyright 2003 Jez UK Ltd, http://www.jezuk.co.uk +// +// Provides streaming operator<< for DOM::Nodes. Fully parameterised so +// will work with wide and narrow char types, so long as an operator<< +// exists for the DOM::Node's string_type. +// +// e.g. +// SAX2DOM::Parser domParser; +// SAX::wInputSource is; +// ... load instance from somewhere ... +// domParser.parse(is); +// DOM::Document doc = domParser.getDocument(); +// ... manipulate doc ... +// std::wcout << doc; +// +/////////////////////////////////////////////////////////////////////// +// $Id$ +/////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +namespace DOM +{ + + namespace StreamImpl + { + template + class escaper + { + private: + typedef char_type charT; + typedef traits_type traitsT; + typedef std::basic_ostream ostreamT; + typedef Unicode UnicodeT; + + public: + escaper(ostreamT& stream) : stream_(stream) { } + void operator()(charT ch) + { + if(ch == UnicodeT::LESS_THAN_SIGN) + { + stream_ << UnicodeT::AMPERSAND + << UnicodeT::LOWERCASE_L + << UnicodeT::LOWERCASE_T + << UnicodeT::SEMI_COLON; + return; + } // if(ch == UnicodeT::LESS_THAN_SIGN) + if(ch == UnicodeT::GREATER_THAN_SIGN) + { + stream_ << UnicodeT::AMPERSAND + << UnicodeT::LOWERCASE_G + << UnicodeT::LOWERCASE_T + << UnicodeT::SEMI_COLON; + return; + } // if(ch == UnicodeT::GREATER_THAN_SIGN) + if(ch == UnicodeT::AMPERSAND) + { + stream_ << UnicodeT::AMPERSAND + << UnicodeT::LOWERCASE_A + << UnicodeT::LOWERCASE_M + << UnicodeT::LOWERCASE_P + << UnicodeT::SEMI_COLON; + return; + } // if(ch == case UnicodeT::AMPERSAND) + if(ch == UnicodeT::QUOTATION_MARK) + { + stream_ << UnicodeT::AMPERSAND + << UnicodeT::LOWERCASE_Q + << UnicodeT::LOWERCASE_U + << UnicodeT::LOWERCASE_O + << UnicodeT::LOWERCASE_T + << UnicodeT::SEMI_COLON; + return; + } // if(ch == UnicodeT::QUOTATION_MARK) + stream_ << ch; + } // operator() + + private: + ostreamT& stream_; + }; // escaper + + template + void streamChildren(std::basic_ostream& stream, DOM::Node& node) + { + DOM::Node child = node.getFirstChild(); + while(child != 0) + { + stream << child; + child = child.getNextSibling(); + } // while + } // streamChildren + + } // namespace StreamImpl + +template +std::basic_ostream& +operator<<(std::basic_ostream& stream, + DOM::Node& node) +{ + typedef Unicode UnicodeT; + + switch(node.getNodeType()) + { + case DOM::Node::DOCUMENT_NODE: + stream << UnicodeT::LESS_THAN_SIGN + << UnicodeT::QUESTION_MARK + << UnicodeT::LOWERCASE_X + << UnicodeT::LOWERCASE_M + << UnicodeT::LOWERCASE_L + << UnicodeT::SPACE + << UnicodeT::LOWERCASE_V + << UnicodeT::LOWERCASE_E + << UnicodeT::LOWERCASE_R + << UnicodeT::LOWERCASE_S + << UnicodeT::LOWERCASE_I + << UnicodeT::LOWERCASE_O + << UnicodeT::LOWERCASE_N + << UnicodeT::EQUALS_SIGN + << UnicodeT::QUOTATION_MARK + << UnicodeT::NUMBER_1 + << UnicodeT::FULL_STOP + << UnicodeT::NUMBER_0 + << UnicodeT::QUOTATION_MARK + << UnicodeT::QUESTION_MARK + << UnicodeT::GREATER_THAN_SIGN + << std::endl; + StreamImpl::streamChildren(stream, node); + break; + case DOM::Node::ELEMENT_NODE: + { + stream << UnicodeT::LESS_THAN_SIGN << node.getNodeName(); + DOM::NamedNodeMap attrs = node.getAttributes(); + for(unsigned int a = 0; a < attrs.getLength(); ++a) + { + DOM::Node attr = attrs.item(a); + stream << UnicodeT::SPACE + << attr.getNodeName() + << UnicodeT::EQUALS_SIGN + << UnicodeT::QUOTATION_MARK; + stringT value = attr.getNodeValue(); + std::for_each(value.begin(), value.end(), StreamImpl::escaper(stream)); + stream << UnicodeT::QUOTATION_MARK; + } + stream << UnicodeT::GREATER_THAN_SIGN; + StreamImpl::streamChildren(stream, node); + stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::SLASH << node.getNodeName() << UnicodeT::GREATER_THAN_SIGN; + } + break; + case DOM::Node::TEXT_NODE: + { + stringT value = node.getNodeValue(); + std::for_each(value.begin(), value.end(), StreamImpl::escaper(stream)); + } + break; + case DOM::Node::ENTITY_REFERENCE_NODE: + stream << UnicodeT::AMPERSAND + << node.getNodeName() + << UnicodeT::SEMI_COLON; + break; + case DOM::Node::CDATA_SECTION_NODE: + stream << UnicodeT::LESS_THAN_SIGN + << UnicodeT::EXCLAMATION_MARK + << UnicodeT::LEFT_SQUARE_BRACKET + << UnicodeT::CAPITAL_C + << UnicodeT::CAPITAL_D + << UnicodeT::CAPITAL_A + << UnicodeT::CAPITAL_T + << UnicodeT::CAPITAL_A + << UnicodeT::LEFT_SQUARE_BRACKET + << node.getNodeValue() + << UnicodeT::RIGHT_SQUARE_BRACKET + << UnicodeT::RIGHT_SQUARE_BRACKET + << UnicodeT::GREATER_THAN_SIGN; + break; + case DOM::Node::PROCESSING_INSTRUCTION_NODE: + stream << UnicodeT::LESS_THAN_SIGN + << UnicodeT::QUESTION_MARK + << node.getNodeName() + << UnicodeT::SPACE + << node.getNodeValue() + << UnicodeT::QUESTION_MARK + << UnicodeT::GREATER_THAN_SIGN; + break; + case DOM::Node::COMMENT_NODE: + stream << UnicodeT::LESS_THAN_SIGN + << UnicodeT::EXCLAMATION_MARK + << UnicodeT::HYPHEN_MINUS + << UnicodeT::HYPHEN_MINUS + << node.getNodeValue() + << UnicodeT::HYPHEN_MINUS + << UnicodeT::HYPHEN_MINUS + << UnicodeT::GREATER_THAN_SIGN; + break; + } // switch + + return stream; +} // operator<< + +} // namespace DOM +#endif