#ifndef ARABICA_DOM_UTILS_STREAM_H #define ARABICA_DOM_UTILS_STREAM_H /////////////////////////////////////////////////////////////////////// // // DOM/Utils/Stream.h // // Written by Jez Higgins // Copyright 2003-2005 Jez UK Ltd, http://www.jezuk.co.uk // // Provides streaming operator<< for DOM::Nodes. Fully parameterised so // will work with wide and narrow char types, so long as an operator<< // exists for the DOM::Node's string_type. // // e.g. // Arabica::SAX2DOM::Parser domParser; // SAX::wInputSource is; // ... load instance from somewhere ... // domParser.parse(is); // DOM::Document doc = domParser.getDocument(); // ... manipulate doc ... // std::wcout << doc; // /////////////////////////////////////////////////////////////////////// // $Id$ /////////////////////////////////////////////////////////////////////// #include #include #include #include namespace Arabica { namespace DOM { namespace StreamImpl { template void streamChildren(std::basic_ostream& stream, const DOM::Node& node) { DOM::Node child = node.getFirstChild(); while(child != 0) { stream << child; child = child.getNextSibling(); } // while } // streamChildren template void prefix_mapper_callback(std::ios_base::event ev, std::ios_base& stream, int index) { if(ev == std::ios_base::erase_event) delete static_cast >*>(stream.pword(index)); } // boing_callback template std::pair is_uri_declared(std::vector >* prefix_stack, const stringT& namespaceURI) { stringT declared_prefix; for(typename std::vector >::reverse_iterator b = prefix_stack->rbegin(), e = prefix_stack->rend(); b != e; ++b) { typename std::map::const_iterator p = b->find(namespaceURI); if(p != b->end()) return std::make_pair(true, p->second); } // for ... return std::make_pair(false, stringT()); } // prefix_is_declared template void check_and_output_node_name(std::basic_ostream& stream, const DOM::Node& node, std::vector >* prefix_stack) { std::map& current = *(prefix_stack->rbegin()); stringT namespaceURI = node.getNamespaceURI(); if(!namespaceURI.empty()) { std::pair prefix = is_uri_declared(prefix_stack, namespaceURI); if(!prefix.first) current[namespaceURI] = prefix.second = node.getPrefix(); if(!prefix.second.empty()) stream << prefix.second << Arabica::text::Unicode::COLON; stream << node.getLocalName(); } else stream << node.getNodeName(); } // check_and_output_node_name template bool isXmlns(const stringT& str) { typedef Arabica::text::Unicode UnicodeT; if(str.size() != 5) return false; if((str[0] == UnicodeT::LOWERCASE_X) && (str[1] == UnicodeT::LOWERCASE_M) && (str[2] == UnicodeT::LOWERCASE_L) && (str[3] == UnicodeT::LOWERCASE_N) && (str[4] == UnicodeT::LOWERCASE_S)) return true; return false; } // isXmlns template int prefix_mapper(std::basic_ostream& stream, const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; static const int index = std::ios_base::xalloc(); std::vector >* prefix_stack; if(stream.pword(index) != 0) { prefix_stack = static_cast >*>(stream.pword(index)); prefix_stack->push_back(std::map()); } else { prefix_stack = new std::vector >; stream.pword(index) = prefix_stack; std::map prefixes; for(DOM::Node p = node.getParentNode(); p.getNodeType() == DOM::Node_base::ELEMENT_NODE; p = p.getParentNode()) { if(p.getNamespaceURI().empty()) continue; if(prefixes.find(p.getNamespaceURI()) == prefixes.end()) prefixes[p.getNamespaceURI()] = p.getPrefix(); } // for ... prefix_stack->push_back(prefixes); } // if ... std::map& current = *(prefix_stack->rbegin()); // is element namespace URI declared? check_and_output_node_name(stream, node, prefix_stack); DOM::NamedNodeMap attrs = node.getAttributes(); std::vector names; for(unsigned int a = 0; a < attrs.getLength(); ++a) names.push_back(attrs.item(a).getNodeName()); std::sort(names.begin(), names.end()); for(typename std::vector::const_iterator a = names.begin(), ae = names.end(); a != ae; ++a) { DOM::Node attr = attrs.getNamedItem(*a); if(isXmlns(attr.getNodeName()) || isXmlns(attr.getPrefix())) continue; stream << UnicodeT::SPACE; check_and_output_node_name(stream, attr, prefix_stack); stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; stringT value = attr.getNodeValue(); std::for_each(value.begin(), value.end(), Arabica::XML::attribute_escaper(stream)); stream << UnicodeT::QUOTATION_MARK; } for(typename std::map::const_iterator i = current.begin(), e = current.end(); i != e; ++i) { stream << UnicodeT::SPACE << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::LOWERCASE_N << UnicodeT::LOWERCASE_S; if(!(i->second.empty())) stream << UnicodeT::COLON << i->second; stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; std::for_each(i->first.begin(), i->first.end(), Arabica::XML::attribute_escaper(stream)); stream << UnicodeT::QUOTATION_MARK; } // for ... return index; } // prefix_mapper template void prefix_mapper_pop(std::basic_ostream& stream, const DOM::Node& node, int index, bool output) { std::vector >* prefix_stack = static_cast >*>(stream.pword(index)); if(output) check_and_output_node_name(stream, node, prefix_stack); prefix_stack->pop_back(); if(prefix_stack->empty()) { delete static_cast >*>(stream.pword(index)); stream.pword(index) = 0; } // if ... } // prefix_mapper_pop } // namespace StreamImpl template std::basic_ostream& operator<<(std::basic_ostream& stream, const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; switch(node.getNodeType()) { case DOM::Node::DOCUMENT_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::SPACE << UnicodeT::LOWERCASE_V << UnicodeT::LOWERCASE_E << UnicodeT::LOWERCASE_R << UnicodeT::LOWERCASE_S << UnicodeT::LOWERCASE_I << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_N << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK << UnicodeT::NUMBER_1 << UnicodeT::FULL_STOP << UnicodeT::NUMBER_0 << UnicodeT::QUOTATION_MARK << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN << std::endl; case DOM::Node::DOCUMENT_FRAGMENT_NODE: StreamImpl::streamChildren(stream, node); break; case DOM::Node::ELEMENT_NODE: { stream << UnicodeT::LESS_THAN_SIGN; int index = StreamImpl::prefix_mapper(stream, node); if(node.hasChildNodes()) { stream << UnicodeT::GREATER_THAN_SIGN; StreamImpl::streamChildren(stream, node); stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::SLASH; StreamImpl::prefix_mapper_pop(stream, node, index, true); stream << UnicodeT::GREATER_THAN_SIGN; } else { StreamImpl::prefix_mapper_pop(stream, node, index, false); stream << UnicodeT::SLASH << UnicodeT::GREATER_THAN_SIGN; } } break; case DOM::Node::TEXT_NODE: { stringT value = node.getNodeValue(); std::for_each(value.begin(), value.end(), Arabica::XML::text_escaper(stream)); } break; case DOM::Node::ENTITY_REFERENCE_NODE: stream << UnicodeT::AMPERSAND << node.getNodeName() << UnicodeT::SEMI_COLON; break; case DOM::Node::CDATA_SECTION_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::LEFT_SQUARE_BRACKET << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::LEFT_SQUARE_BRACKET << node.getNodeValue() << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::GREATER_THAN_SIGN; break; case DOM::Node::PROCESSING_INSTRUCTION_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << node.getNodeName() << UnicodeT::SPACE << node.getNodeValue() << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN; break; case DOM::Node::COMMENT_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << node.getNodeValue() << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << UnicodeT::GREATER_THAN_SIGN; break; default: // keep GCC quiet break; } // switch return stream; } // operator<< } // namespace DOM } // namespace Arabica #endif