arabica/XPath/impl/xpath_object.hpp

555 lines
16 KiB
C++
Raw Normal View History

2005-08-04 22:42:30 +02:00
#ifndef ARABICA_XPATHIC_XPATH_OBJECT_H
#define ARABICA_XPATHIC_XPATH_OBJECT_H
#include <string>
#include <vector>
#include <utility>
#include <DOM/Node.h>
2005-08-19 18:39:29 +02:00
#include <DOM/Attr.h>
2005-08-04 22:42:30 +02:00
#include <boost/shared_ptr.hpp>
2005-08-21 14:48:00 +02:00
#include <boost/lexical_cast.hpp>
2005-08-04 22:42:30 +02:00
#include <cmath>
2005-08-21 14:48:00 +02:00
#include <Utils/StringAdaptor.h>
#include "xpath_axis_enumerator.hpp"
2005-08-04 22:42:30 +02:00
namespace Arabica
{
namespace XPath
{
enum ValueType
{
ANY ,
BOOL,
NUMBER,
STRING,
NODE_SET
}; // ValueType
2005-08-19 18:39:29 +02:00
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
2005-08-22 19:59:18 +02:00
namespace impl
{
2005-08-19 18:39:29 +02:00
template<class string_type>
DOM::Node<string_type> node_parent_or_owner(const DOM::Node<string_type>& node)
{
if(node.getNodeType() == DOM::Node_base::ATTRIBUTE_NODE)
return (static_cast<DOM::Attr<string_type> >(node)).getOwnerElement();
return node.getParentNode();
} // node_parent_or_owner
2005-08-04 22:42:30 +02:00
2005-08-19 18:39:29 +02:00
template<class string_type>
unsigned int node_attribute_index(const DOM::Attr<string_type>& attr)
{
DOM::NamedNodeMap<string_type> attrs = attr.getOwnerElement().getAttributes();
unsigned int p = 0;
for(unsigned int pe = attrs.getLength(); p != pe; ++p)
if(attrs.item(p) == attr)
break;
return p;
} // node_attribute_index
template<class string_type>
unsigned int node_child_position(const DOM::Node<string_type>& node)
{
if(node.getNodeType() == DOM::Node_base::ATTRIBUTE_NODE)
return node_attribute_index(static_cast<DOM::Attr<string_type> >(node));
unsigned int pos = 0;
DOM::Node<string_type> n = node;
do
{
n = n.getPreviousSibling();
pos += 1000;
} while(n != 0);
return pos;
} // node_child_position
template<class string_type>
DOM::Node<string_type> ultimate_parent(const DOM::Node<string_type>& origin)
{
DOM::Node<string_type> n = origin;
DOM::Node<string_type> p = node_parent_or_owner(n);
while(p != 0)
{
n = p;
p = node_parent_or_owner(n);
} // while ...
return n;
} // ultimate_parent
template<class string_type>
int resolve_different_subtrees(const DOM::Node<string_type>& lhs, const DOM::Node<string_type>& rhs)
{
// if we have something in the document, and a document fragment,
// sort the doc ahead of the fragment
DOM::Node<string_type> lp = ultimate_parent(lhs);
if(lp.getNodeType() == DOM::Node_base::DOCUMENT_NODE)
return -1;
DOM::Node<string_type> rp = ultimate_parent(rhs);
if(rp.getNodeType() == DOM::Node_base::DOCUMENT_NODE)
return 1;
// otherwise, sort the frags
return (lp.unlying_impl() < lp.unlying_impl()) ? -1 : 1;
} // resolve_different_subtrees
template<class string_type>
std::vector<unsigned int> node_position(const DOM::Node<string_type>& node)
{
std::vector<unsigned int> pos;
DOM::Node<string_type> n = node;
do
{
pos.push_back(node_child_position(n));
n = node_parent_or_owner(n);
} while(n != 0);
return pos;
} // node_position
template<class string_type>
int compareNodes(const DOM::Node<string_type>& lhs, const DOM::Node<string_type>& rhs)
{
if(lhs == rhs)
return 0;
// different documents
if(lhs.getOwnerDocument() != rhs.getOwnerDocument())
return (lhs.getOwnerDocument().unlying_impl() < rhs.getOwnerDocument().unlying_impl()) ? -1 : 1;
// ok, nodes belong to the same document, but do they belong to the document itself, or a document fragment,
// or is it just floating free? if they both belong to a document fragment, is it the same fragment?
if(ultimate_parent(lhs) != ultimate_parent(rhs))
return resolve_different_subtrees(lhs, rhs);
std::vector<unsigned int> pos1 = node_position(lhs);
std::vector<unsigned int> pos2 = node_position(rhs);
std::vector<unsigned int>::const_reverse_iterator l = pos1.rbegin(), le = pos1.rend();
std::vector<unsigned int>::const_reverse_iterator r = pos2.rbegin(), re = pos2.rend();
while(l != le && r != re)
{
if(*l != *r)
return *l - *r;
++l;
++r;
} // while
if(l != le)
return 1;
if(r != re)
return -1;
return 0;
} // compareNodes
template<class string_type>
bool nodes_less_than(const DOM::Node<string_type>& n1, const DOM::Node<string_type>& n2)
{
return compareNodes(n1, n2) < 0;
} // nodes_less_than
2005-08-22 19:59:18 +02:00
} // namespace impl
2005-08-19 18:39:29 +02:00
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////////
template<class string_type>
class NodeSet : public std::vector<DOM::Node<string_type> >
2005-08-04 22:42:30 +02:00
{
private:
typedef std::vector<DOM::Node<string_type> > baseT;
2005-08-04 22:42:30 +02:00
public:
2005-08-05 23:02:24 +02:00
NodeSet() :
std::vector<DOM::Node<string_type> >(),
2005-08-05 23:02:24 +02:00
forward_(true),
sorted_(false)
{
} // NodeSet
NodeSet(bool forward) :
std::vector<DOM::Node<string_type> >(),
2005-08-05 23:02:24 +02:00
forward_(forward),
sorted_(true)
2005-08-05 23:02:24 +02:00
{
} // NodeSet
NodeSet(const NodeSet<string_type>& rhs) :
std::vector<DOM::Node<string_type> >(rhs),
2005-08-05 23:02:24 +02:00
forward_(rhs.forward_),
sorted_(rhs.sorted_)
{
} // NodeSet
NodeSet& operator=(const NodeSet<string_type>& rhs)
2005-08-04 22:42:30 +02:00
{
forward_ = rhs.forward_;
sorted_ = rhs.sorted_;
std::vector<DOM::Node<string_type> >::operator=(rhs);
2005-08-04 22:42:30 +02:00
return *this;
} // operator=
void swap(NodeSet& rhs)
{
std::vector<DOM::Node<string_type> >::swap(rhs);
2005-08-04 22:42:30 +02:00
std::swap(forward_, rhs.forward_);
std::swap(sorted_, rhs.sorted_);
} // swap
bool forward() const { return sorted_ && forward_; }
bool reverse() const { return sorted_ && !forward_; }
void forward(bool forward) { forward_ = forward; sorted_ = true; }
void to_document_order()
{
if(!sorted_)
{
2005-08-22 19:59:18 +02:00
std::sort(baseT::begin(), baseT::end(), impl::nodes_less_than<string_type>);
2005-08-04 22:42:30 +02:00
sorted_ = true;
forward_ = true;
} // if(!sorted)
if(!forward_)
{
std::reverse(baseT::begin(), baseT::end());
2005-08-04 22:42:30 +02:00
forward_ = true;
} // if(!forward_)
} // to_document_order
DOM::Node<string_type> top() const
2005-08-04 22:42:30 +02:00
{
if(forward_)
return (*this)[0];
return (*this)[baseT::size()-1];
2005-08-04 22:42:30 +02:00
} // top()
private:
bool forward_;
bool sorted_;
}; // NodeSet
template<class string_type>
2005-08-04 22:42:30 +02:00
class XPathValue
{
protected:
XPathValue() { }
public:
virtual ~XPathValue() { }
virtual bool asBool() const = 0;
virtual double asNumber() const = 0;
virtual string_type asString() const = 0;
virtual const NodeSet<string_type>& asNodeSet() const = 0;
2005-08-04 22:42:30 +02:00
virtual ValueType type() const = 0;
private:
XPathValue(const XPathValue&);
bool operator==(const XPathValue&);
XPathValue& operator=(const XPathValue&);
}; // class XPathValue
template<class string_type>
class XPathValuePtr : public boost::shared_ptr<const XPathValue<string_type> >
{
public:
explicit XPathValuePtr(const XPathValue<string_type>* v) : boost::shared_ptr<const XPathValue<string_type> >(v) { }
2005-08-19 18:39:29 +02:00
}; // class XPathValuePtr
2005-08-19 18:39:29 +02:00
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
2005-08-04 22:42:30 +02:00
const double NaN = std::sqrt(-2.0);
const double Zero = 0.0;
const double Negative_Zero = -Zero;
const double Infinity = HUGE_VAL;
const double Negative_Infinity = -Infinity;
inline bool isNaN(double value) { return (value != value); }
inline bool isInfinity(double value) { return (value == Infinity); }
inline bool isNegativeInfinity(double value) { return (value == Negative_Infinity); }
inline bool isInfinite(double value) { return isInfinity(value) || isNegativeInfinity(value); }
2005-08-22 20:54:53 +02:00
namespace impl
{
2005-08-04 22:42:30 +02:00
inline double roundNumber(double value)
{
if(!(isNaN(value) || isInfinite(value) || (std::fabs(value) == 0)))
if((value < 0.0) && (value > -0.5))
value = -0.0;
else
value = std::floor(value + 0.5);
return value;
} // roundNumber
template<class string_type>
double stringAsNumber(const string_type& str)
{
try {
return boost::lexical_cast<double>(str);
} // try
catch(const boost::bad_lexical_cast&) {
return NaN;
} // catch
} // stringAsNumber
2005-08-04 22:42:30 +02:00
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
string_type nodeStringValue(const DOM::Node<string_type>& node)
{
switch(node.getNodeType())
{
case DOM::Node_base::DOCUMENT_NODE:
case DOM::Node_base::DOCUMENT_FRAGMENT_NODE:
case DOM::Node_base::ELEMENT_NODE:
{
std::ostringstream os;
2005-08-23 21:19:17 +02:00
AxisEnumerator<string_type, string_adaptor> ae(node, DESCENDANT);
while(*ae != 0)
{
if((ae->getNodeType() == DOM::Node_base::TEXT_NODE) ||
(ae->getNodeType() == DOM::Node_base::CDATA_SECTION_NODE))
os << ae->getNodeValue();
++ae;
} // while
return string_adaptor().makeStringT(os.str().c_str());
} // case
case DOM::Node_base::ATTRIBUTE_NODE:
case DOM::Node_base::PROCESSING_INSTRUCTION_NODE:
case DOM::Node_base::COMMENT_NODE:
case DOM::Node_base::TEXT_NODE:
case DOM::Node_base::CDATA_SECTION_NODE:
return node.getNodeValue();
default:
throw std::runtime_error("Don't know how to calculate string-value of " +
string_adaptor().asStdString(node.getNodeName()));
} // switch
} // nodeStringValue
template<class string_type, class string_adaptor>
double nodeNumberValue(const DOM::Node<string_type>& node)
{
return stringAsNumber(nodeStringValue<string_type, string_adaptor>(node));
} // nodeNumberValue
2005-08-22 20:54:53 +02:00
} // namespace impl
2005-08-19 18:39:29 +02:00
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
2005-08-22 12:35:43 +02:00
namespace impl {
2005-08-23 21:19:17 +02:00
template<typename RT, typename string_type, typename string_adaptor> struct value_of_node {
RT operator()(const DOM::Node<string_type>& node) { return nodeStringValue<string_type, string_adaptor>(node); }
2005-08-22 12:35:43 +02:00
};
2005-08-23 21:19:17 +02:00
template<typename string_type, typename string_adaptor> struct value_of_node<double, string_type, string_adaptor> {
double operator()(const DOM::Node<string_type>& node) { return nodeNumberValue<string_type, string_adaptor>(node); }
2005-08-22 12:35:43 +02:00
};
2005-08-23 21:19:17 +02:00
template<class Op, class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
class compareNodeWith
{
typedef typename Op::first_argument_type T;
2005-08-22 12:15:49 +02:00
public:
compareNodeWith(const T& value) : value_(value) { }
compareNodeWith(const compareNodeWith& rhs) : value_(rhs.value_) { }
bool operator()(const DOM::Node<string_type>& node)
{
2005-08-23 21:19:17 +02:00
value_of_node<T, string_type, string_adaptor> nv;
2005-08-22 12:15:49 +02:00
return Op()(nv(node), value_);
} // operator()
private:
2005-08-19 18:39:29 +02:00
T value_;
bool operator==(const compareNodeWith&);
compareNodeWith& operator=(const compareNodeWith&);
}; // class compareNodeWith
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool nodeSetsEqual(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
const NodeSet<string_type>& lns = lhs->asNodeSet();
const NodeSet<string_type>& rns = rhs->asNodeSet();
if((lns.size() == 0) || (rns.size() == 0))
return false;
std::set<string_type> values;
2005-08-21 14:48:00 +02:00
typename NodeSet<string_type>::const_iterator l = lns.begin();
2005-08-23 21:19:17 +02:00
string_type lvalue = nodeStringValue<string_type, string_adaptor>(*l);
2005-08-19 18:39:29 +02:00
2005-08-21 14:48:00 +02:00
for(typename NodeSet<string_type>::const_iterator r = rns.begin(), rend = rns.end(); r != rend; ++r)
2005-08-19 18:39:29 +02:00
{
2005-08-23 21:19:17 +02:00
string_type rvalue = nodeStringValue<string_type, string_adaptor>(*r);
2005-08-19 18:39:29 +02:00
if(lvalue == rvalue)
return true;
values.insert(rvalue);
} // for ...
++l;
2005-08-21 14:48:00 +02:00
for(typename NodeSet<string_type>::const_iterator lend = lns.end(); l != lend; ++l)
2005-08-23 21:19:17 +02:00
if(values.find(nodeStringValue<string_type, string_adaptor>(*l)) != values.end())
2005-08-19 18:39:29 +02:00
return true;
return false;
} // nodeSetsEqual
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool nodeSetAndValueEqual(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
const NodeSet<string_type>& lns = lhs->asNodeSet();
switch(rhs->type())
{
case BOOL:
{
bool l = !lns.empty();
bool r = rhs->asBool();
return l == r;
} // case BOOL
case STRING:
return std::find_if(lns.begin(),
lns.end(),
2005-08-23 21:19:17 +02:00
compareNodeWith<std::equal_to<string_type>, string_type, string_adaptor>(rhs->asString())) != lns.end();
2005-08-19 18:39:29 +02:00
case NUMBER:
return std::find_if(lns.begin(),
lns.end(),
2005-08-23 21:19:17 +02:00
compareNodeWith<std::equal_to<double>, string_type, string_adaptor>(rhs->asNumber())) != lns.end();
2005-08-19 18:39:29 +02:00
default:
throw std::runtime_error("Node set == not yet implemented for type " + boost::lexical_cast<std::string>(rhs->type()));
} // switch
} // nodeSetAndValueEqual
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
double minValue(const NodeSet<string_type>& ns)
{
2005-08-23 21:19:17 +02:00
double v = nodeNumberValue<string_type, string_adaptor>(ns[0]);
2005-08-21 14:48:00 +02:00
for(typename NodeSet<string_type>::const_iterator i = ns.begin(), ie = ns.end(); i != ie; ++i)
2005-08-19 18:39:29 +02:00
{
2005-08-23 21:19:17 +02:00
double vt = nodeNumberValue<string_type, string_adaptor>(*i);
2005-08-19 18:39:29 +02:00
if(isNaN(vt))
continue;
if(!(vt > v)) // looks weird, but should account for infinity
v = vt;
} // for ...
return v;
} // minValue
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
double maxValue(const NodeSet<string_type>& ns)
{
2005-08-23 21:19:17 +02:00
double v = nodeNumberValue<string_type, string_adaptor>(ns[0]);
2005-08-21 14:48:00 +02:00
for(typename NodeSet<string_type>::const_iterator i = ns.begin(), ie = ns.end(); i != ie; ++i)
2005-08-19 18:39:29 +02:00
{
2005-08-23 21:19:17 +02:00
double vt = nodeNumberValue<string_type, string_adaptor>(*i);
2005-08-19 18:39:29 +02:00
if(isNaN(vt))
continue;
if(!(vt < v))
v = vt;
} // for ...
return v;
} // maxValue
2005-08-23 21:19:17 +02:00
template<class Op, class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool compareNodeSets(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
2005-08-23 21:19:17 +02:00
return Op()(minValue<string_type, string_adaptor>(lhs->asNodeSet()), maxValue<string_type, string_adaptor>(rhs->asNodeSet()));
2005-08-19 18:39:29 +02:00
} // compareNodeSets
2005-08-23 21:19:17 +02:00
template<class Op, class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool compareNodeSetWith(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
const NodeSet<string_type>& lns = lhs->asNodeSet();
return std::find_if(lns.begin(),
lns.end(),
2005-08-23 21:19:17 +02:00
compareNodeWith<Op, string_type, string_adaptor>(rhs->asNumber())) != lns.end();
2005-08-19 18:39:29 +02:00
} // compareNodeSetAndValue
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool areEqual(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
ValueType lt = lhs->type();
ValueType rt = rhs->type();
if((lt == NODE_SET) && (rt == NODE_SET))
2005-08-23 21:19:17 +02:00
return nodeSetsEqual<string_type, string_adaptor>(lhs, rhs);
2005-08-19 18:39:29 +02:00
if(lt == NODE_SET)
2005-08-23 21:19:17 +02:00
return nodeSetAndValueEqual<string_type, string_adaptor>(lhs, rhs);
2005-08-19 18:39:29 +02:00
if(rt == NODE_SET)
2005-08-23 21:19:17 +02:00
return nodeSetAndValueEqual<string_type, string_adaptor>(rhs, lhs);
2005-08-19 18:39:29 +02:00
if((lt == BOOL) || (rt == BOOL))
return lhs->asBool() == rhs->asBool();
if((lt == NUMBER) || (rt == NUMBER))
return lhs->asNumber() == rhs->asNumber();
if((lt == STRING) || (rt == STRING))
return lhs->asString() == rhs->asString();
return false;
} // areEquals
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool isLessThan(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
ValueType lt = lhs->type();
ValueType rt = rhs->type();
if((lt == NODE_SET) && (rt == NODE_SET))
2005-08-23 21:19:17 +02:00
return compareNodeSets<std::less<double>, string_type, string_adaptor>(lhs, rhs);
2005-08-19 18:39:29 +02:00
if(lt == NODE_SET)
2005-08-23 21:19:17 +02:00
return compareNodeSetWith<std::less<double>, string_type, string_adaptor>(lhs, rhs);
2005-08-19 18:39:29 +02:00
if(rt == NODE_SET)
2005-08-23 21:19:17 +02:00
return compareNodeSetWith<std::greater<double>, string_type, string_adaptor>(rhs, lhs);
2005-08-19 18:39:29 +02:00
return lhs->asNumber() < rhs->asNumber();
} // isLessThan
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool isLessThanEquals(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
ValueType lt = lhs->type();
ValueType rt = rhs->type();
if((lt == NODE_SET) && (rt == NODE_SET))
2005-08-23 21:19:17 +02:00
return compareNodeSets<std::less_equal<double>, string_type, string_adaptor>(lhs, rhs);
2005-08-19 18:39:29 +02:00
if(lt == NODE_SET)
2005-08-23 21:19:17 +02:00
return compareNodeSetWith<std::less_equal<double>, string_type, string_adaptor>(lhs, rhs);
2005-08-19 18:39:29 +02:00
if(rt == NODE_SET)
2005-08-23 21:19:17 +02:00
return compareNodeSetWith<std::greater_equal<double>, string_type, string_adaptor>(rhs, lhs);
2005-08-19 18:39:29 +02:00
return lhs->asNumber() <= rhs->asNumber();
} // isLessThanEquals
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool isGreaterThan(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
2005-08-23 21:19:17 +02:00
return isLessThan<string_type, string_adaptor>(rhs, lhs);
2005-08-19 18:39:29 +02:00
} // isGreaterThan
2005-08-23 21:19:17 +02:00
template<class string_type, class string_adaptor>
2005-08-19 18:39:29 +02:00
bool isGreaterThanEquals(const XPathValuePtr<string_type>& lhs, const XPathValuePtr<string_type>& rhs)
{
2005-08-23 21:19:17 +02:00
return isLessThanEquals<string_type, string_adaptor>(rhs, lhs);
2005-08-19 18:39:29 +02:00
} // isGreaterThanEquals
2005-08-04 22:42:30 +02:00
} // namespace impl
2005-08-04 22:42:30 +02:00
} // namespace XPath
} // namespace Arabica
#endif