#ifndef ARABICA_XPATHIC_XPATH_OBJECT_H #define ARABICA_XPATHIC_XPATH_OBJECT_H #include #include #include #include #include #include #include #ifdef __BORLANDC__ #include #endif #include #include #include #include "xpath_axis_enumerator.hpp" namespace Arabica { namespace XPath { enum ValueType { ANY , BOOL, NUMBER, STRING, NODE_SET }; // ValueType /////////////////////////////////////////////////////// /////////////////////////////////////////////////////// namespace impl { template DOM::Node node_parent_or_owner(const DOM::Node& node) { if(node.getNodeType() == DOM::Node_base::ATTRIBUTE_NODE) return (static_cast >(node)).getOwnerElement(); return node.getParentNode(); } // node_parent_or_owner template unsigned int node_attribute_index(const DOM::Attr& attr) { DOM::NamedNodeMap attrs = attr.getOwnerElement().getAttributes(); unsigned int p = 0; for(unsigned int pe = attrs.getLength(); p != pe; ++p) if(attrs.item(p) == attr) break; return p+1; } // node_attribute_index template unsigned int node_child_position(const DOM::Node& node) { switch(node.getNodeType()) { case NAMESPACE_NODE_TYPE: return 0; case DOM::Node_base::ATTRIBUTE_NODE: return node_attribute_index(static_cast >(node)); default: { unsigned int pos = 0; DOM::Node n = node; do { n = n.getPreviousSibling(); pos += 1000; } while(n != 0); return pos; } // default } // switch ... } // node_child_position template DOM::Node ultimate_parent(const DOM::Node& origin) { DOM::Node n = origin; DOM::Node p = node_parent_or_owner(n); while(p != 0) { n = p; p = node_parent_or_owner(n); } // while ... return n; } // ultimate_parent template int resolve_different_subtrees(const DOM::Node& lhs, const DOM::Node& rhs) { // if we have something in the document, and a document fragment, // sort the doc ahead of the fragment DOM::Node lp = ultimate_parent(lhs); if(lp.getNodeType() == DOM::Node_base::DOCUMENT_NODE) return -1; DOM::Node rp = ultimate_parent(rhs); if(rp.getNodeType() == DOM::Node_base::DOCUMENT_NODE) return 1; // otherwise, sort the frags return (lp.underlying_impl() < lp.underlying_impl()) ? -1 : 1; } // resolve_different_subtrees template std::vector node_position(const DOM::Node& node) { std::vector pos; DOM::Node n = node; do { pos.push_back(node_child_position(n)); n = node_parent_or_owner(n); } while(n != 0); return pos; } // node_position template DOM::Node get_owner_document(const DOM::Node& node) { if(node.getNodeType() == DOM::Node_base::DOCUMENT_NODE) return node; return node.getOwnerDocument(); } // get_owner_document template int compareNodes(const DOM::Node& lhs, const DOM::Node& rhs) { if(lhs == rhs) return 0; // different documents if(get_owner_document(lhs) != get_owner_document(rhs)) return (get_owner_document(lhs).underlying_impl() < get_owner_document(rhs).underlying_impl()) ? 1 : -1; // ok, nodes belong to the same document, but do they belong to the document itself, or a document fragment, // or is it just floating free? if they both belong to a document fragment, is it the same fragment? if(ultimate_parent(lhs) != ultimate_parent(rhs)) return resolve_different_subtrees(lhs, rhs); std::vector pos1 = node_position(lhs); std::vector pos2 = node_position(rhs); std::vector::const_reverse_iterator l = pos1.rbegin(), le = pos1.rend(); std::vector::const_reverse_iterator r = pos2.rbegin(), re = pos2.rend(); while(l != le && r != re) { if(*l != *r) return *l - *r; ++l; ++r; } // while if(l != le) return 1; if(r != re) return -1; return 0; } // compareNodes template bool nodes_less_than(const DOM::Node& n1, const DOM::Node& n2) { return compareNodes(n1, n2) < 0; } // nodes_less_than } // namespace impl /////////////////////////////////////////////////////////// /////////////////////////////////////////////////////// template > class NodeSet { public: typedef typename std::vector >::const_iterator const_iterator; typedef typename std::vector >::iterator iterator; typedef typename std::vector >::value_type value_type; NodeSet() : nodes_(), forward_(true), sorted_(false) { } // NodeSet NodeSet(bool forward) : nodes_(), forward_(forward), sorted_(false) { } // NodeSet NodeSet(const NodeSet& rhs) : nodes_(rhs.nodes_), forward_(rhs.forward_), sorted_(rhs.sorted_) { } // NodeSet NodeSet& operator=(const NodeSet& rhs) { nodes_ = rhs.nodes_; forward_ = rhs.forward_; sorted_ = rhs.sorted_; return *this; } // operator= void swap(NodeSet& rhs) { nodes_.swap(rhs.nodes_); std::swap(forward_, rhs.forward_); std::swap(sorted_, rhs.sorted_); } // swap const_iterator begin() const { return nodes_.begin(); } const_iterator end() const { return nodes_.end(); } iterator begin() { return nodes_.begin(); } iterator end() { return nodes_.end(); } const DOM::Node& operator[](size_t i) const { return nodes_[i]; } size_t size() const { return nodes_.size(); } bool empty() const { return nodes_.empty(); } template void insert(iterator position, InputIterator first, InputIterator last) { sorted_ = false; nodes_.insert(position, first, last); } // insert void push_back(const DOM::Node& node) { nodes_.push_back(node); sorted_ = false; } // push_back bool forward() const { return sorted_ && forward_; } bool reverse() const { return sorted_ && !forward_; } void forward(bool forward) { if(forward_ == forward) return; forward_ = forward; sorted_ = false; } // forward void to_document_order() { sort(); if(!forward_) { std::reverse(nodes_.begin(), nodes_.end()); forward_ = true; } // if(!forward_) } // to_document_order void sort() { if(sorted_) return; if(forward_) std::sort(nodes_.begin(), nodes_.end(), impl::nodes_less_than); else std::sort(nodes_.rbegin(), nodes_.rend(), impl::nodes_less_than); nodes_.erase(std::unique(nodes_.begin(), nodes_.end()), nodes_.end()); sorted_ = true; } // sort const DOM::Node& top() { sort(); if(forward_) return (*this)[0]; return (*this)[nodes_.size()-1]; } // top() private: std::vector > nodes_; bool forward_; bool sorted_; }; // NodeSet template > class XPathValue_impl { protected: XPathValue_impl() { } public: virtual ~XPathValue_impl() { } virtual bool asBool() const = 0; virtual double asNumber() const = 0; virtual string_type asString() const = 0; virtual const NodeSet& asNodeSet() const = 0; virtual ValueType type() const = 0; private: XPathValue_impl(const XPathValue_impl&); bool operator==(const XPathValue_impl&) const; XPathValue_impl& operator=(const XPathValue_impl&); }; // class XPathValue_impl template class XPathValuePtr; template > class XPathValue { public: explicit XPathValue() : ptr_() { } explicit XPathValue(const XPathValue_impl* v) : ptr_(v) { } XPathValue(const XPathValue& rhs) : ptr_(rhs.ptr_) { } XPathValue& operator=(const XPathValue& rhs) { ptr_ = rhs.ptr_; return *this; } // operator= bool asBool() const { return ptr_->asBool(); } double asNumber() const { return ptr_->asNumber(); } string_type asString() const { return ptr_->asString(); } const NodeSet& asNodeSet() const { return ptr_->asNodeSet(); } ValueType type() const { return ptr_->type(); } operator bool() const { return ptr_.get(); } bool operator==(int dummy) const { return (dummy == 0) && (ptr_.get() == 0); } bool operator!=(int dummy) const { return !(operator==(dummy)); } private: bool operator==(const XPathValue&) const; typedef boost::shared_ptr > ValuePtr; ValuePtr ptr_; explicit XPathValue(ValuePtr ptr) : ptr_(ptr) { } friend class XPathValuePtr; }; // class XPathValue template > class XPathValuePtr { public: explicit XPathValuePtr() : ptr_() { } explicit XPathValuePtr(const XPathValue_impl* v) : ptr_(v) { } XPathValuePtr(const XPathValue& rhs) : ptr_(rhs.ptr_) { } XPathValuePtr(const XPathValuePtr& rhs) : ptr_(rhs.ptr_) { } XPathValuePtr& operator=(const XPathValue& rhs) { ptr_ = rhs.ptr_; return *this; } // operator= XPathValuePtr& operator=(const XPathValuePtr& rhs) { ptr_ = rhs.ptr_; return *this; } // operator= const XPathValue_impl* operator->() const { return ptr_.get(); } operator bool() const { return (ptr_.get() != 0); } operator XPathValue() const { return XPathValue(ptr_); } private: bool operator==(const XPathValuePtr&) const; typedef boost::shared_ptr > ValuePtr; ValuePtr ptr_; }; // class XPathValuePtr //////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// const double NaN = std::sqrt(-2.0); const double Zero = 0.0; const double Negative_Zero = -Zero; const double Infinity = HUGE_VAL; const double Negative_Infinity = -Infinity; inline bool isNaN(double value) { return (value != value); } inline bool isInfinity(double value) { return (value == Infinity); } inline bool isNegativeInfinity(double value) { return (value == Negative_Infinity); } inline bool isInfinite(double value) { return isInfinity(value) || isNegativeInfinity(value); } namespace impl { inline double roundNumber(double value) { if(!(isNaN(value) || isInfinite(value) || (std::fabs(value) == 0))) if((value < 0.0) && (value > -0.5)) value = -0.0; else value = std::floor(value + 0.5); return value; } // roundNumber template double stringAsNumber(const string_type& str) { try { return boost::lexical_cast(Arabica::text::normalize_whitespace(str)); } // try catch(const boost::bad_lexical_cast&) { return NaN; } // catch } // stringAsNumber template bool nodeIsText(const DOM::Node& node) { return (node.getNodeType() == DOM::Node_base::TEXT_NODE) || (node.getNodeType() == DOM::Node_base::CDATA_SECTION_NODE); } // nodeIsText template string_type nodeStringValue(const DOM::Node& node) { switch(node.getNodeType()) { case DOM::Node_base::DOCUMENT_NODE: case DOM::Node_base::DOCUMENT_FRAGMENT_NODE: case DOM::Node_base::ELEMENT_NODE: { std::basic_ostringstream os; AxisEnumerator ae(node, DESCENDANT); while(*ae != 0) { if(nodeIsText(*ae)) os << ae->getNodeValue(); ++ae; } // while return string_adaptor::construct(os.str().c_str()); } // case case DOM::Node_base::ATTRIBUTE_NODE: case DOM::Node_base::PROCESSING_INSTRUCTION_NODE: case DOM::Node_base::COMMENT_NODE: case NAMESPACE_NODE_TYPE: return node.getNodeValue(); case DOM::Node_base::TEXT_NODE: case DOM::Node_base::CDATA_SECTION_NODE: { DOM::Node next = node.getNextSibling(); if((next == 0) || !nodeIsText(next)) return node.getNodeValue(); std::basic_ostringstream os; os << node.getNodeValue() << nodeStringValue(next); return string_adaptor::construct(os.str().c_str()); } // case default: throw std::runtime_error("Don't know how to calculate string-value of " + string_adaptor().asStdString(node.getNodeName())); } // switch } // nodeStringValue template double nodeNumberValue(const DOM::Node& node) { return stringAsNumber(nodeStringValue(node)); } // nodeNumberValue } // namespace impl //////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// namespace impl { template struct value_of_node { RT operator()(const DOM::Node& node) { return nodeStringValue(node); } }; template struct value_of_node { double operator()(const DOM::Node& node) { return nodeNumberValue(node); } }; template class compareNodeWith { typedef typename Op::first_argument_type T; public: compareNodeWith(const T& value) : value_(value) { } compareNodeWith(const compareNodeWith& rhs) : value_(rhs.value_) { } bool operator()(const DOM::Node& node) { value_of_node nv; return Op()(nv(node), value_); } // operator() private: T value_; bool operator==(const compareNodeWith&); compareNodeWith& operator=(const compareNodeWith&); }; // class compareNodeWith template bool nodeSetsCompare(const XPathValue& lhs, const XPathValue& rhs) { const NodeSet& lns = lhs.asNodeSet(); const NodeSet& rns = rhs.asNodeSet(); if((lns.size() == 0) || (rns.size() == 0)) return false; std::set values; typename NodeSet::const_iterator l = lns.begin(); string_type lvalue = nodeStringValue(*l); predicate1 p1; for(typename NodeSet::const_iterator r = rns.begin(), rend = rns.end(); r != rend; ++r) { string_type rvalue = nodeStringValue(*r); if(p1(lvalue, rvalue)) return true; values.insert(rvalue); } // for ... ++l; predicate2 p2; for(typename NodeSet::const_iterator lend = lns.end(); l != lend; ++l) if(p2(values.find(nodeStringValue(*l)), values.end())) return true; return false; } // nodeSetsEqual template bool nodeSetsEqual(const XPathValue& lhs, const XPathValue& rhs) { return nodeSetsCompare, std::not_equal_to::const_iterator> >(lhs, rhs); } template bool nodeSetsNotEqual(const XPathValue& lhs, const XPathValue& rhs) { return nodeSetsCompare, std::equal_to::const_iterator> >(lhs, rhs); } template bool nodeSetAndValueEqual(const XPathValue& lhs, const XPathValue& rhs) { const NodeSet& lns = lhs.asNodeSet(); switch(rhs.type()) { case BOOL: { bool l = !lns.empty(); bool r = rhs.asBool(); return l == r; } // case BOOL case STRING: return std::find_if(lns.begin(), lns.end(), compareNodeWith, string_type, string_adaptor>(rhs.asString())) != lns.end(); case NUMBER: return std::find_if(lns.begin(), lns.end(), compareNodeWith, string_type, string_adaptor>(rhs.asNumber())) != lns.end(); default: throw std::runtime_error("Node set == not yet implemented for type " + boost::lexical_cast(rhs.type())); } // switch } // nodeSetAndValueEqual template bool nodeSetAndValueNotEqual(const XPathValue& lhs, const XPathValue& rhs) { const NodeSet& lns = lhs.asNodeSet(); switch(rhs.type()) { case BOOL: { bool l = !lns.empty(); bool r = rhs.asBool(); return l != r; } // case BOOL case STRING: return std::find_if(lns.begin(), lns.end(), compareNodeWith, string_type, string_adaptor>(rhs.asString())) != lns.end(); case NUMBER: return std::find_if(lns.begin(), lns.end(), compareNodeWith, string_type, string_adaptor>(rhs.asNumber())) != lns.end(); default: throw std::runtime_error("Node set == not yet implemented for type " + boost::lexical_cast(rhs.type())); } // switch } // nodeSetAndValueNotEqual template double minValue(const NodeSet& ns) { double v = nodeNumberValue(ns[0]); for(typename NodeSet::const_iterator i = ns.begin(), ie = ns.end(); i != ie; ++i) { double vt = nodeNumberValue(*i); if(isNaN(vt)) continue; if(!(vt > v)) // looks weird, but should account for infinity v = vt; } // for ... return v; } // minValue template double maxValue(const NodeSet& ns) { double v = nodeNumberValue(ns[0]); for(typename NodeSet::const_iterator i = ns.begin(), ie = ns.end(); i != ie; ++i) { double vt = nodeNumberValue(*i); if(isNaN(vt)) continue; if(!(vt < v)) v = vt; } // for ... return v; } // maxValue template bool compareNodeSets(const XPathValue& lhs, const XPathValue& rhs) { return Op()(minValue(lhs.asNodeSet()), maxValue(rhs.asNodeSet())); } // compareNodeSets template bool compareNodeSetWith(const XPathValue& lhs, const XPathValue& rhs) { const NodeSet& lns = lhs.asNodeSet(); return std::find_if(lns.begin(), lns.end(), compareNodeWith(rhs.asNumber())) != lns.end(); } // compareNodeSetAndValue template bool areEqual(const XPathValue& lhs, const XPathValue& rhs) { ValueType lt = lhs.type(); ValueType rt = rhs.type(); if((lt == NODE_SET) && (rt == NODE_SET)) return nodeSetsEqual(lhs, rhs); if(lt == NODE_SET) return nodeSetAndValueEqual(lhs, rhs); if(rt == NODE_SET) return nodeSetAndValueEqual(rhs, lhs); if((lt == BOOL) || (rt == BOOL)) return lhs.asBool() == rhs.asBool(); if((lt == NUMBER) || (rt == NUMBER)) return lhs.asNumber() == rhs.asNumber(); if((lt == STRING) || (rt == STRING)) return lhs.asString() == rhs.asString(); return false; } // areEqual template bool areNotEqual(const XPathValue& lhs, const XPathValue& rhs) { ValueType lt = lhs.type(); ValueType rt = rhs.type(); if((lt == NODE_SET) && (rt == NODE_SET)) return nodeSetsNotEqual(lhs, rhs); if(lt == NODE_SET) return nodeSetAndValueNotEqual(lhs, rhs); if(rt == NODE_SET) return nodeSetAndValueNotEqual(rhs, lhs); return !areEqual(lhs, rhs); } // areNotEqual template bool isLessThan(const XPathValue& lhs, const XPathValue& rhs) { ValueType lt = lhs.type(); ValueType rt = rhs.type(); if((lt == NODE_SET) && (rt == NODE_SET)) return compareNodeSets, string_type, string_adaptor>(lhs, rhs); if(lt == NODE_SET) return compareNodeSetWith, string_type, string_adaptor>(lhs, rhs); if(rt == NODE_SET) return compareNodeSetWith, string_type, string_adaptor>(rhs, lhs); return lhs.asNumber() < rhs.asNumber(); } // isLessThan template bool isLessThanEquals(const XPathValue& lhs, const XPathValue& rhs) { ValueType lt = lhs.type(); ValueType rt = rhs.type(); if((lt == NODE_SET) && (rt == NODE_SET)) return compareNodeSets, string_type, string_adaptor>(lhs, rhs); if(lt == NODE_SET) return compareNodeSetWith, string_type, string_adaptor>(lhs, rhs); if(rt == NODE_SET) return compareNodeSetWith, string_type, string_adaptor>(rhs, lhs); return lhs.asNumber() <= rhs.asNumber(); } // isLessThanEquals template bool isGreaterThan(const XPathValue& lhs, const XPathValue& rhs) { return isLessThan(rhs, lhs); } // isGreaterThan template bool isGreaterThanEquals(const XPathValue& lhs, const XPathValue& rhs) { return isLessThanEquals(rhs, lhs); } // isGreaterThanEquals } // namespace impl } // namespace XPath } // namespace Arabica #endif