#ifndef ARABICA_DOM_UTILS_STREAM_H #define ARABICA_DOM_UTILS_STREAM_H /////////////////////////////////////////////////////////////////////// // // DOM/Utils/Stream.h // // Written by Jez Higgins // Copyright 2003 Jez UK Ltd, http://www.jezuk.co.uk // // Provides streaming operator<< for DOM::Nodes. Fully parameterised so // will work with wide and narrow char types, so long as an operator<< // exists for the DOM::Node's string_type. // // e.g. // SAX2DOM::Parser domParser; // SAX::wInputSource is; // ... load instance from somewhere ... // domParser.parse(is); // DOM::Document doc = domParser.getDocument(); // ... manipulate doc ... // std::wcout << doc; // /////////////////////////////////////////////////////////////////////// // $Id$ /////////////////////////////////////////////////////////////////////// #include #include #include namespace DOM { namespace StreamImpl { template class escaper { private: typedef char_type charT; typedef traits_type traitsT; typedef std::basic_ostream ostreamT; typedef Arabica::Unicode UnicodeT; public: escaper(ostreamT& stream) : stream_(stream) { } void operator()(charT ch) { if(ch == UnicodeT::LESS_THAN_SIGN) { stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_L << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::LESS_THAN_SIGN) if(ch == UnicodeT::GREATER_THAN_SIGN) { stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_G << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::GREATER_THAN_SIGN) if(ch == UnicodeT::AMPERSAND) { stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_A << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_P << UnicodeT::SEMI_COLON; return; } // if(ch == case UnicodeT::AMPERSAND) if(ch == UnicodeT::QUOTATION_MARK) { stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_Q << UnicodeT::LOWERCASE_U << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::QUOTATION_MARK) stream_ << ch; } // operator() private: ostreamT& stream_; }; // escaper template void streamChildren(std::basic_ostream& stream, DOM::Node& node) { DOM::Node child = node.getFirstChild(); while(child != 0) { stream << child; child = child.getNextSibling(); } // while } // streamChildren } // namespace StreamImpl template void prefix_mapper_callback(std::ios_base::event ev, std::ios_base& stream, int index) { if(ev == std::ios_base::erase_event) delete static_cast >*>(stream.pword(index)); } // boing_callback template std::pair is_uri_declared(std::vector >* prefix_stack, const stringT& namespaceURI) { stringT declared_prefix; for(typename std::vector >::reverse_iterator b = prefix_stack->rbegin(), e = prefix_stack->rend(); b != e; ++b) { typename std::map::const_iterator p = b->find(namespaceURI); if(p != b->end()) return std::make_pair(true, p->second); } // for ... return std::make_pair(false, stringT()); } // prefix_is_declared template void check_and_output_node_name(std::basic_ostream& stream, DOM::Node& node, std::vector >* prefix_stack) { std::map& current = *(prefix_stack->rbegin()); stringT namespaceURI = node.getNamespaceURI(); if(!namespaceURI.empty()) { std::pair prefix = is_uri_declared(prefix_stack, namespaceURI); if(!prefix.first) current[namespaceURI] = prefix.second = node.getPrefix(); if(!prefix.second.empty()) stream << prefix.second << Arabica::Unicode::COLON; stream << node.getLocalName(); } else stream << node.getNodeName(); } // check_and_output_node_name template int prefix_mapper(std::basic_ostream& stream, DOM::Node& node) { typedef Arabica::Unicode UnicodeT; static const int index = std::ios_base::xalloc(); std::vector >* prefix_stack; if(stream.pword(index) != 0) { prefix_stack = static_cast >*>(stream.pword(index)); prefix_stack->push_back(std::map()); } else { prefix_stack = new std::vector >; stream.pword(index) = prefix_stack; std::map prefixes; for(DOM::Node p = node.getParentNode(); p.getNodeType() == DOM::Node_base::ELEMENT_NODE; p = p.getParentNode()) { if(p.getNamespaceURI().empty()) continue; if(prefixes.find(p.getNamespaceURI()) == prefixes.end()) prefixes[p.getNamespaceURI()] = p.getPrefix(); } // for ... prefix_stack->push_back(prefixes); } // if ... std::map& current = *(prefix_stack->rbegin()); // is element namespace URI declared? check_and_output_node_name(stream, node, prefix_stack); DOM::NamedNodeMap attrs = node.getAttributes(); for(unsigned int a = 0; a < attrs.getLength(); ++a) { DOM::Node attr = attrs.item(a); if((attr.getNodeName() == "xmlns") || (attr.getPrefix() == "xmlns")) continue; stream << UnicodeT::SPACE; check_and_output_node_name(stream, attr, prefix_stack); stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; stringT value = attr.getNodeValue(); std::for_each(value.begin(), value.end(), StreamImpl::escaper(stream)); stream << UnicodeT::QUOTATION_MARK; } for(typename std::map::const_iterator i = current.begin(), e = current.end(); i != e; ++i) { stream << UnicodeT::SPACE << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::LOWERCASE_N << UnicodeT::LOWERCASE_S; if(!(i->second.empty())) stream << UnicodeT::COLON << i->second; stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; std::for_each(i->first.begin(), i->first.end(), StreamImpl::escaper(stream)); stream << UnicodeT::QUOTATION_MARK; } // for ... return index; } // prefix_mapper template void prefix_mapper_pop(std::basic_ostream& stream, DOM::Node node, int index) { std::vector >* prefix_stack = static_cast >*>(stream.pword(index)); check_and_output_node_name(stream, node, prefix_stack); prefix_stack->pop_back(); if(prefix_stack->empty()) { delete static_cast >*>(stream.pword(index)); stream.pword(index) = 0; } // if ... } // prefix_mapper_pop template std::basic_ostream& operator<<(std::basic_ostream& stream, DOM::Node& node) { typedef Arabica::Unicode UnicodeT; switch(node.getNodeType()) { case DOM::Node::DOCUMENT_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::SPACE << UnicodeT::LOWERCASE_V << UnicodeT::LOWERCASE_E << UnicodeT::LOWERCASE_R << UnicodeT::LOWERCASE_S << UnicodeT::LOWERCASE_I << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_N << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK << UnicodeT::NUMBER_1 << UnicodeT::FULL_STOP << UnicodeT::NUMBER_0 << UnicodeT::QUOTATION_MARK << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN << std::endl; StreamImpl::streamChildren(stream, node); break; case DOM::Node::ELEMENT_NODE: { stream << UnicodeT::LESS_THAN_SIGN; int index = prefix_mapper(stream, node); stream << UnicodeT::GREATER_THAN_SIGN; StreamImpl::streamChildren(stream, node); stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::SLASH; prefix_mapper_pop(stream, node, index); stream << UnicodeT::GREATER_THAN_SIGN; } break; case DOM::Node::TEXT_NODE: { stringT value = node.getNodeValue(); std::for_each(value.begin(), value.end(), StreamImpl::escaper(stream)); } break; case DOM::Node::ENTITY_REFERENCE_NODE: stream << UnicodeT::AMPERSAND << node.getNodeName() << UnicodeT::SEMI_COLON; break; case DOM::Node::CDATA_SECTION_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::LEFT_SQUARE_BRACKET << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::LEFT_SQUARE_BRACKET << node.getNodeValue() << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::GREATER_THAN_SIGN; break; case DOM::Node::PROCESSING_INSTRUCTION_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << node.getNodeName() << UnicodeT::SPACE << node.getNodeValue() << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN; break; case DOM::Node::COMMENT_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << node.getNodeValue() << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << UnicodeT::GREATER_THAN_SIGN; break; default: // keep GCC quiet break; } // switch return stream; } // operator<< } // namespace DOM #endif