#ifndef ARABICA_DOM_UTILS_STREAM_H #define ARABICA_DOM_UTILS_STREAM_H /////////////////////////////////////////////////////////////////////// // // DOM/Utils/Stream.h // // Written by Jez Higgins // Copyright 2003-2005 Jez UK Ltd, http://www.jezuk.co.uk // // Provides streaming operator<< for DOM::Nodes. Fully parameterised so // will work with wide and narrow char types, so long as an operator<< // exists for the DOM::Node's string_type. // // e.g. // Arabica::SAX2DOM::Parser domParser; // SAX::wInputSource is; // ... load instance from somewhere ... // domParser.parse(is); // DOM::Document doc = domParser.getDocument(); // ... manipulate doc ... // std::wcout << doc; // /////////////////////////////////////////////////////////////////////// #include #include #include #include #include namespace Arabica { namespace DOM { namespace StreamImpl { template void prefix_mapper_callback(std::ios_base::event ev, std::ios_base& stream, int index) { if(ev == std::ios_base::erase_event) delete static_cast >*>(stream.pword(index)); } // boing_callback template std::pair is_uri_declared(std::vector >* prefix_stack, const stringT& namespaceURI) { stringT declared_prefix; for(typename std::vector >::reverse_iterator b = prefix_stack->rbegin(), e = prefix_stack->rend(); b != e; ++b) { typename std::map::const_iterator p = b->find(namespaceURI); if(p != b->end()) return std::make_pair(true, p->second); } // for ... return std::make_pair(false, stringT()); } // is_uri_declared template void check_and_output_node_name(std::basic_ostream& stream, const DOM::Node& node, std::vector >* prefix_stack, const bool is_attribute, const long index) { std::map& current = *(prefix_stack->rbegin()); stringT namespaceURI = node.getNamespaceURI(); if(!string_adaptorT::empty(namespaceURI)) { std::pair prefix = is_uri_declared(prefix_stack, namespaceURI); if(!prefix.first) { current[namespaceURI] = prefix.second = node.getPrefix(); if(is_attribute && string_adaptorT::empty(prefix.second)) { std::ostringstream os; os << 'a' << stream.iword(index)++; current[namespaceURI] = prefix.second = string_adaptorT::construct_from_utf8(os.str().c_str()); } // if ... } // if ... if(!string_adaptorT::empty(prefix.second)) stream << prefix.second << Arabica::text::Unicode::COLON; stream << node.getLocalName(); } else stream << node.getNodeName(); } // check_and_output_node_name template bool isXmlns(const stringT& str) { typedef Arabica::text::Unicode UnicodeT; if(string_adaptorT::length(str) != 5) return false; typename string_adaptorT::const_iterator ci = string_adaptorT::begin(str); if((*ci == UnicodeT::LOWERCASE_X) && (*(ci+1) == UnicodeT::LOWERCASE_M) && (*(ci+2) == UnicodeT::LOWERCASE_L) && (*(ci+3) == UnicodeT::LOWERCASE_N) && (*(ci+4) == UnicodeT::LOWERCASE_S)) return true; return false; } // isXmlns template bool compare_nodes_by_name(const DOM::Node& lhs, const DOM::Node& rhs) { const stringT& lhsURI = lhs.getNamespaceURI(); const stringT& rhsURI = rhs.getNamespaceURI(); if(lhsURI == rhsURI) return lhs.getLocalName() < rhs.getLocalName(); return lhsURI < rhsURI; } // class compare_nodes_by_name template int prefix_mapper(std::basic_ostream& stream, const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; static const int index = std::ios_base::xalloc(); std::vector >* prefix_stack; if(stream.pword(index) != 0) { prefix_stack = static_cast >*>(stream.pword(index)); prefix_stack->push_back(std::map()); } else { prefix_stack = new std::vector >; stream.pword(index) = prefix_stack; stream.iword(index) = 0; std::map prefixes; for(DOM::Node p = node.getParentNode(); p.getNodeType() == DOM::Node_base::ELEMENT_NODE; p = p.getParentNode()) { if(string_adaptorT::empty(p.getNamespaceURI())) continue; if(prefixes.find(p.getNamespaceURI()) == prefixes.end()) prefixes[p.getNamespaceURI()] = p.getPrefix(); } // for ... prefix_stack->push_back(prefixes); } // if ... std::map& current = *(prefix_stack->rbegin()); // is element namespace URI declared? check_and_output_node_name(stream, node, prefix_stack, false, index); const DOM::NamedNodeMap attrs = node.getAttributes(); std::vector > attrNodes; for(unsigned int a = 0; a < attrs.getLength(); ++a) attrNodes.push_back(attrs.item(a)); std::sort(attrNodes.begin(), attrNodes.end(), compare_nodes_by_name); for(unsigned int a = 0; a < attrNodes.size(); ++a) { //DOM::Node attr = attrs.getNamedItem(*a); const DOM::Node attr = attrNodes[a]; if(string_adaptorT::empty(attr.getNodeName()) || isXmlns(attr.getNodeName()) || isXmlns(attr.getPrefix())) continue; stream << UnicodeT::SPACE; check_and_output_node_name(stream, attr, prefix_stack, true, index); stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; stringT value = attr.getNodeValue(); std::for_each(string_adaptorT::begin(value), string_adaptorT::end(value), Arabica::XML::attribute_escaper(stream)); stream << UnicodeT::QUOTATION_MARK; } for(typename std::map::const_iterator i = current.begin(), e = current.end(); i != e; ++i) { stream << UnicodeT::SPACE << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::LOWERCASE_N << UnicodeT::LOWERCASE_S; if(!(string_adaptorT::empty(i->second))) stream << UnicodeT::COLON << i->second; stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; std::for_each(string_adaptorT::begin(i->first), string_adaptorT::end(i->first), Arabica::XML::attribute_escaper(stream)); stream << UnicodeT::QUOTATION_MARK; } // for ... return index; } // prefix_mapper template void prefix_mapper_pop(std::basic_ostream& stream, const DOM::Node& node, int index, bool output) { std::vector >* prefix_stack = static_cast >*>(stream.pword(index)); if(output) check_and_output_node_name(stream, node, prefix_stack, false, index); prefix_stack->pop_back(); if(prefix_stack->empty()) { delete static_cast >*>(stream.pword(index)); stream.pword(index) = 0; } // if ... } // prefix_mapper_pop ////////////////////////////////////// ////////////////////////////////////// template void streamChildren(std::basic_ostream& stream, const DOM::Node& node) { DOM::Node child = node.getFirstChild(); while(child != 0) { stream << child; child = child.getNextSibling(); } // while } // streamChildren template void streamElement(std::basic_ostream& stream, const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; stream << UnicodeT::LESS_THAN_SIGN; int index = StreamImpl::prefix_mapper(stream, node); if(node.hasChildNodes()) { stream << UnicodeT::GREATER_THAN_SIGN; StreamImpl::streamChildren(stream, node); stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::SLASH; StreamImpl::prefix_mapper_pop(stream, node, index, true); stream << UnicodeT::GREATER_THAN_SIGN; } else { StreamImpl::prefix_mapper_pop(stream, node, index, false); stream << UnicodeT::SLASH << UnicodeT::GREATER_THAN_SIGN; } } // streamElement template void startCDATA(std::basic_ostream& stream) { typedef Arabica::text::Unicode UnicodeT; stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::LEFT_SQUARE_BRACKET << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::LEFT_SQUARE_BRACKET; } // startCDATA template void endCDATA(std::basic_ostream& stream) { typedef Arabica::text::Unicode UnicodeT; stream << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::GREATER_THAN_SIGN; } // endCDATA template void streamCDATA(std::basic_ostream& stream, const DOM::Node& node) { typedef typename string_adaptorT::size_type size_type; static stringT endMarker = string_adaptorT::construct_from_utf8("]]>"); StreamImpl::startCDATA(stream); const stringT value = node.getNodeValue(); size_type breakAt = string_adaptorT::find(value, endMarker); if(breakAt == string_adaptorT::npos()) stream << value; else { size_type start = 0; do { breakAt += 2; stream << string_adaptorT::substr(value, start, breakAt); StreamImpl::endCDATA(stream); start = breakAt; StreamImpl::startCDATA(stream); breakAt = string_adaptorT::find(value, endMarker, breakAt); } while(breakAt != string_adaptorT::npos()); stream << string_adaptorT::substr(value, start); } // if ... StreamImpl::endCDATA(stream); } // streamCDATA } // namespace StreamImpl template std::basic_ostream& operator<<(std::basic_ostream& stream, const DOM::Node& node) { typedef Arabica::text::Unicode UnicodeT; switch(node.getNodeType()) { case DOM::Node_base::DOCUMENT_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::SPACE << UnicodeT::LOWERCASE_V << UnicodeT::LOWERCASE_E << UnicodeT::LOWERCASE_R << UnicodeT::LOWERCASE_S << UnicodeT::LOWERCASE_I << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_N << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK << UnicodeT::NUMBER_1 << UnicodeT::FULL_STOP << UnicodeT::NUMBER_0 << UnicodeT::QUOTATION_MARK << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN << std::endl; case DOM::Node_base::DOCUMENT_FRAGMENT_NODE: StreamImpl::streamChildren(stream, node); break; case DOM::Node_base::ELEMENT_NODE: StreamImpl::streamElement(stream, node); break; case DOM::Node_base::TEXT_NODE: { stringT value = node.getNodeValue(); std::for_each(string_adaptorT::begin(value), string_adaptorT::end(value), Arabica::XML::text_escaper(stream)); } break; case DOM::Node_base::ENTITY_REFERENCE_NODE: stream << UnicodeT::AMPERSAND << node.getNodeName() << UnicodeT::SEMI_COLON; break; case DOM::Node_base::CDATA_SECTION_NODE: StreamImpl::streamCDATA(stream, node); break; case DOM::Node_base::PROCESSING_INSTRUCTION_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << node.getNodeName() << UnicodeT::SPACE << node.getNodeValue() << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN; break; case DOM::Node_base::COMMENT_NODE: stream << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << node.getNodeValue() << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << UnicodeT::GREATER_THAN_SIGN; break; default: // keep GCC quiet break; } // switch return stream; } // operator<< } // namespace DOM } // namespace Arabica #endif