#ifndef ARABICA_XPATHIC_XPATH_PARSER_HPP #define ARABICA_XPATHIC_XPATH_PARSER_HPP #include #include #include #include #include #include "xpath_object.hpp" #include "xpath_expression.hpp" #include "xpath_ast.hpp" #include "xpath_grammar.hpp" #include "xpath_namespace_context.hpp" #include "xpath_function_resolver.hpp" #include "xpath_variable_resolver.hpp" #include "xpath_resolver_holder.hpp" namespace Arabica { namespace XPath { class SyntaxException : public std::runtime_error { public: SyntaxException(const std::string& thing) : std::runtime_error("Bad XPath: " + thing) { } }; // class SyntaxException class RuntimeException : public std::runtime_error { public: RuntimeException(const std::string& thing) : std::runtime_error("Cannot evaluate XPath: " + thing) { } }; // class RuntimeException class UnsupportedException : public std::runtime_error { public: UnsupportedException(const std::string& thing) : std::runtime_error("Sorry, haven't implemented '" + thing + "' yet") { } }; // class UnsupportedException namespace impl { template class CompilationContext; template class StepExpression; template class StepList : public std::vector*> { }; } // namespace impl template > class XPath { public: XPath() { resetNamespaceContext(); resetVariableResolver(); resetFunctionResolver(); } // XPath ~XPath() { } // ~XPath XPathExpressionPtr compile(const string_type& xpath) const { return do_compile(xpath, &XPath::parse_xpath); } // compile XPathExpressionPtr compile_expr(const string_type& xpath) const { return do_compile(xpath, &XPath::parse_xpath_expr); } // compile XPathValuePtr evaluate(const string_type& xpath, const DOM::Node& context) const { ExecutionContext executionContext; executionContext.setVariableResolver(getVariableResolver()); return compile(xpath)->evaluate(context, executionContext); } // evaluate XPathValuePtr evaluate_expr(const string_type& xpath, const DOM::Node& context) const { ExecutionContext executionContext; executionContext.setVariableResolver(getVariableResolver()); return compile_expr(xpath)->evaluate(context, executionContext); } // evaluate_expr void setNamespaceContext(const NamespaceContext& namespaceContext) { namespaceContext_.set(namespaceContext); } void setNamespaceContext(NamespaceContextPtr namespaceContext) { namespaceContext_.set(namespaceContext); } const NamespaceContext& getNamespaceContext() const { return namespaceContext_.get(); } void resetNamespaceContext() { namespaceContext_.set(NamespaceContextPtr(new NullNamespaceContext())); } void setVariableResolver(const VariableResolver& variableResolver) { variableResolver_.set(variableResolver); } void setVariableResolver(VariableResolverPtr variableResolver) { variableResolver_.set(variableResolver); } const VariableResolver& getVariableResolver() const { return variableResolver_.get(); } void resetVariableResolver() { variableResolver_.set(VariableResolverPtr(new NullVariableResolver())); } void setFunctionResolver(const FunctionResolver& functionResolver) { functionResolver_.set(functionResolver); } void setFunctionResolver(FunctionResolverPtr functionResolver) { functionResolver_.set(functionResolver); } const FunctionResolver& getFunctionResolver() const { return functionResolver_.get(); } void resetFunctionResolver() { functionResolver_.set(FunctionResolverPtr(new NullFunctionResolver())); } private: XPathExpressionPtr do_compile(const string_type& xpath, typename impl::types::tree_info_t(XPath::*fn)(const string_type& str) const) const { typename impl::types::tree_info_t ast; try { ast = (this->*fn)(xpath); if(!ast.full) throw SyntaxException(string_adaptor().asStdString(xpath)); impl::CompilationContext context(*this, getNamespaceContext(), getFunctionResolver()); return XPathExpressionPtr(compile_expression(ast.trees.begin(), context)); } // try catch(std::exception& ex) { throw ex; } // catch catch(...) { throw SyntaxException(string_adaptor().asStdString(xpath)); } // catch } // do_compile typename impl::types::tree_info_t parse_xpath(const string_type& str) const { typename impl::types::str_iter_t first = string_adaptor::begin(str), last = string_adaptor::end(str); return ast_parse(first, last, xpathg_); } // parse_xpath typename impl::types::tree_info_t parse_xpath_expr(const string_type& str) const { typename impl::types::str_iter_t first = string_adaptor::begin(str), last = string_adaptor::end(str); return ast_parse(first, last, xpathge_); } // parse_xpath impl::xpath_grammar xpathg_; impl::xpath_grammar_expr xpathge_; impl::ResolverHolder > namespaceContext_; impl::ResolverHolder > variableResolver_; impl::ResolverHolder > functionResolver_; ///////////////////////////////////////////////////////////////////////////////// public: static XPathExpression* compile_expression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { long id = impl::getNodeId(i); if(XPath::factory().find(id) == XPath::factory().end()) { //XPath::dump(i, 0); throw UnsupportedException(string_adaptor().asStdString(XPath::names()[id])); } return XPath::factory()[id](i, context); } // compile_expression private: static XPathExpression* createAbsoluteLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createRelativeLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createSingleStepRelativeLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createExpression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createFunction(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createBinaryExpression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createLiteral(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createNumber(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createVariable(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createSingleStepAbsoluteLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createUnaryExpression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static XPathExpression* createUnaryNegativeExpr(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static impl::StepList createStepList(typename impl::types::node_iter_t const& from, typename impl::types::node_iter_t const& to, impl::CompilationContext& context); typedef XPathExpression* (*compileFn)(typename impl::types::node_iter_t const& i, impl::CompilationContext& context); static std::map& factory() { static std::map f = init_createFunctions(); return f; } // factory static std::map& names() { static std::map n = init_debugNames(); return n; } // names static const std::map init_createFunctions() { std::map factory; factory[impl::AbsoluteLocationPath_id] = createAbsoluteLocationPath; factory[impl::RelativeLocationPath_id] = createRelativeLocationPath; factory[impl::AbbreviatedAbsoluteLocationPath_id] = createAbsoluteLocationPath; factory[impl::Step_id] = createRelativeLocationPath; factory[impl::PathExpr_id] = createRelativeLocationPath; factory[impl::FilterExpr_id] = createRelativeLocationPath; factory[impl::PrimaryExpr_id] = createExpression; factory[impl::FunctionCall_id] = createFunction; factory[impl::AdditiveExpr_id] = createBinaryExpression; factory[impl::MultiplicativeExpr_id] = createBinaryExpression; factory[impl::EqualityExpr_id] = createBinaryExpression; factory[impl::RelationalExpr_id] = createBinaryExpression; factory[impl::OrExpr_id] = createBinaryExpression; factory[impl::AndExpr_id] = createBinaryExpression; factory[impl::UnionExpr_id] = createBinaryExpression; factory[impl::Literal_id] = createLiteral; factory[impl::Number_id] = createNumber; factory[impl::Digits_id] = createNumber; factory[impl::VariableReference_id] = createVariable; factory[impl::NodeTest_id] = createSingleStepRelativeLocationPath; factory[impl::QName_id] = createSingleStepRelativeLocationPath; factory[impl::NCName_id] = createSingleStepRelativeLocationPath; factory[impl::AnyName_id] = createSingleStepRelativeLocationPath; factory[impl::Text_id] = createSingleStepRelativeLocationPath; factory[impl::Comment_id] = createSingleStepRelativeLocationPath; factory[impl::ProcessingInstruction_id] = createSingleStepRelativeLocationPath; factory[impl::Slash_id] = createSingleStepAbsoluteLocationPath; factory[impl::SelfSelect_id] = createSingleStepRelativeLocationPath; factory[impl::ParentSelect_id] = createSingleStepRelativeLocationPath; factory[impl::UnaryExpr_id] = createUnaryExpression; factory[impl::UnaryMinusOperator_id] = createUnaryNegativeExpr; return factory; } // init_createFunctions static const std::map init_debugNames() { std::map names; typedef string_adaptor SA; names[impl::LocationPath_id] = SA::construct_from_utf8("LocationPath"); names[impl::AbsoluteLocationPath_id] = SA::construct_from_utf8("AbsoluteLocationPath"); names[impl::RelativeLocationPath_id] = SA::construct_from_utf8("RelativeLocationPath"); names[impl::Step_id] = SA::construct_from_utf8("Step"); names[impl::AxisSpecifier_id] = SA::construct_from_utf8("AxisSpecifier"); names[impl::NodeTest_id] = SA::construct_from_utf8("NodeTest"); names[impl::Predicate_id] = SA::construct_from_utf8("Predicate"); names[impl::PredicateExpr_id] = SA::construct_from_utf8("PredicateExpr"); names[impl::AbbreviatedAbsoluteLocationPath_id] = SA::construct_from_utf8("AbbreviatedAbsoluteLocationPath"); names[impl::AbbreviatedStep_id] = SA::construct_from_utf8("AbbreviatedStep"); names[impl::AbbreviatedAxisSpecifier_id] = SA::construct_from_utf8("AbbreviatedAxisSpecifier"); names[impl::Expr_id] = SA::construct_from_utf8("Expr"); names[impl::PrimaryExpr_id] = SA::construct_from_utf8("PrimaryExpr"); names[impl::FunctionCall_id] = SA::construct_from_utf8("FunctionCall"); names[impl::Argument_id] = SA::construct_from_utf8("Argument"); names[impl::UnionExpr_id] = SA::construct_from_utf8("UnionExpr"); names[impl::PathExpr_id] = SA::construct_from_utf8("PathExpr"); names[impl::FilterExpr_id] = SA::construct_from_utf8("FilterExpr"); names[impl::OrExpr_id] = SA::construct_from_utf8("OrExpr"); names[impl::AndExpr_id] = SA::construct_from_utf8("AndExpr"); names[impl::EqualityExpr_id] = SA::construct_from_utf8("EqualityExpr"); names[impl::RelationalExpr_id] = SA::construct_from_utf8("RelationalExpr"); names[impl::AdditiveExpr_id] = SA::construct_from_utf8("AdditiveExpr"); names[impl::MultiplicativeExpr_id] = SA::construct_from_utf8("MultiplicativeExpr"); names[impl::UnaryExpr_id] = SA::construct_from_utf8("UnaryExpr"); names[impl::Literal_id] = SA::construct_from_utf8("Literal"); names[impl::Number_id] = SA::construct_from_utf8("Number"); names[impl::Digits_id] = SA::construct_from_utf8("Digits"); names[impl::MultiplyOperator_id] = SA::construct_from_utf8("MultiplyOperator"); names[impl::FunctionName_id] = SA::construct_from_utf8("FunctionName"); names[impl::VariableReference_id] = SA::construct_from_utf8("VariableReference"); names[impl::NameTest_id] = SA::construct_from_utf8("NameTest"); names[impl::S_id] = SA::construct_from_utf8("S"); names[impl::NodeType_id] = SA::construct_from_utf8("NodeType"); names[impl::AxisName_id] = SA::construct_from_utf8("AxisName"); names[impl::QName_id] = SA::construct_from_utf8("QName"); names[impl::Prefix_id] = SA::construct_from_utf8("Prefix"); names[impl::LocalPart_id] = SA::construct_from_utf8("LocalPart"); names[impl::NCName_id] = SA::construct_from_utf8("NCName"); names[impl::NCNameChar_id] = SA::construct_from_utf8("NCNameChar"); names[impl::Slash_id] = SA::construct_from_utf8("/"); names[impl::SlashSlash_id] = SA::construct_from_utf8("//"); names[impl::AncestorOrSelf_id] = SA::construct_from_utf8("ancestor-or-self::"); names[impl::Ancestor_id] = SA::construct_from_utf8("ancestor::"); names[impl::Attribute_id] = SA::construct_from_utf8("attribute::"); names[impl::Child_id] = SA::construct_from_utf8("child::"); names[impl::DescendantOrSelf_id] = SA::construct_from_utf8("descendant-or-self::"); names[impl::Descendant_id] = SA::construct_from_utf8("descendant::"); names[impl::FollowingSibling_id] = SA::construct_from_utf8("following-sibling::"); names[impl::Following_id] = SA::construct_from_utf8("following::"); names[impl::Namespace_id] = SA::construct_from_utf8("namespace::"); names[impl::Parent_id] = SA::construct_from_utf8("parent::"); names[impl::PrecedingSibling_id] = SA::construct_from_utf8("preceding-sibling::"); names[impl::Preceding_id] = SA::construct_from_utf8("preceding::"); names[impl::Self_id] = SA::construct_from_utf8("self::"); names[impl::Comment_id] = SA::construct_from_utf8("comment()"); names[impl::Text_id] = SA::construct_from_utf8("text()"); names[impl::ProcessingInstruction_id] = SA::construct_from_utf8("processing-instruction()"); names[impl::Node_id] = SA::construct_from_utf8("node()"); names[impl::AnyName_id] = SA::construct_from_utf8("AnyName"); names[impl::SelfSelect_id] = SA::construct_from_utf8("SelfSelect"); names[impl::ParentSelect_id] = SA::construct_from_utf8("ParentSelect"); names[impl::LeftSquare_id] = SA::construct_from_utf8("["); names[impl::RightSquare_id] = SA::construct_from_utf8("]"); names[impl::LeftBracket_id] = SA::construct_from_utf8("("); names[impl::RightBracket_id] = SA::construct_from_utf8(")"); names[impl::PlusOperator_id] = SA::construct_from_utf8("+"); names[impl::MinusOperator_id] = SA::construct_from_utf8("-"); names[impl::ModOperator_id] = SA::construct_from_utf8("mod"); names[impl::DivOperator_id] = SA::construct_from_utf8("div"); names[impl::EqualsOperator_id] = SA::construct_from_utf8("="); names[impl::NotEqualsOperator_id] = SA::construct_from_utf8("!="); names[impl::LessThanOperator_id] = SA::construct_from_utf8("<"); names[impl::LessThanEqualsOperator_id] = SA::construct_from_utf8("<="); names[impl::GreaterThanOperator_id] = SA::construct_from_utf8(">"); names[impl::GreaterThanEqualsOperator_id] = SA::construct_from_utf8(">="); names[impl::OrOperator_id] = SA::construct_from_utf8("or"); names[impl::AndOperator_id] = SA::construct_from_utf8("and"); names[impl::UnionOperator_id] = SA::construct_from_utf8("union"); names[impl::UnaryMinusOperator_id] = SA::construct_from_utf8("minus"); return names; } // init_debugNames /* static void dump(typename impl::types::node_iter_t const& i, int depth) { long id = static_cast(i->value.id().to_long()); for(int d = 0; d < depth; ++d) std::cerr << ' '; std::cerr << names()[id] << " - " << std::string(i->value.begin(), i->value.end()) << std::endl; for(typename impl::types::node_iter_t c = i->children.begin(); c != i->children.end(); ++c) dump(c, depth+2); } // dump */ XPath(const XPath&); XPath& operator=(const XPath&); bool operator==(const XPath&) const; }; // class XPath } // namespace XPath } // namespace Arabica #include #include "xpath_value.hpp" #include "xpath_arithmetic.hpp" #include "xpath_relational.hpp" #include "xpath_logical.hpp" #include "xpath_step.hpp" #include "xpath_compile_context.hpp" #include "xpath_variable.hpp" #include "xpath_function_holder.hpp" #include "xpath_union.hpp" namespace Arabica { namespace XPath { template XPathExpression* XPath::createAbsoluteLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { return new impl::AbsoluteLocationPath(createStepList(i->children.begin(), i->children.end(), context)); } // createAbsoluteLocationPath template XPathExpression* XPath::createRelativeLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { return new impl::RelativeLocationPath(createStepList(i->children.begin(), i->children.end(), context)); } // createRelativeLocationPath template XPathExpression* XPath::createSingleStepRelativeLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { typename impl::types::node_iter_t n = i; return new impl::RelativeLocationPath(impl::StepFactory::createStep(n, context)); } // createSingleStepRelativeLocationPath template XPathExpression* XPath::createExpression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { typename impl::types::node_iter_t c = i->children.begin(); impl::skipWhitespace(c); return XPath::compile_expression(c, context); } // createExpression template XPathExpression* XPath::createFunction(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { typename impl::types::node_iter_t c = i->children.begin(); string_type name = string_adaptor::construct(c->value.begin(), c->value.end()); ++c; impl::skipWhitespace(c); assert(impl::getNodeId(c) == impl::LeftBracket_id); ++c; impl::skipWhitespace(c); std::vector > args; while(impl::getNodeId(c) != impl::RightBracket_id) { XPathExpressionPtr arg(XPath::compile_expression(c++, context)); args.push_back(arg); impl::skipWhitespace(c); } // while ... // maybe trailing whitespace ... return impl::FunctionHolder::createFunction(name, args, context); } // createFunction template XPathExpression* XPath::createBinaryExpression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { typename impl::types::node_iter_t c = i->children.begin(); XPathExpression* p1 = XPath::compile_expression(c, context); ++c; do { long op = impl::getNodeId(c); ++c; XPathExpression* p2 = XPath::compile_expression(c, context); switch(op) { case impl::PlusOperator_id: p1 = new impl::PlusOperator(p1, p2); break; case impl::MinusOperator_id: p1 = new impl::MinusOperator(p1, p2); break; case impl::MultiplyOperator_id: p1 = new impl::MultiplyOperator(p1, p2); break; case impl::DivOperator_id: p1 = new impl::DivideOperator(p1, p2); break; case impl::ModOperator_id: p1 = new impl::ModOperator(p1, p2); break; case impl::EqualsOperator_id: p1 = new impl::EqualsOperator(p1, p2); break; case impl::NotEqualsOperator_id: p1 = new impl::NotEqualsOperator(p1, p2); break; case impl::LessThanOperator_id: p1 = new impl::LessThanOperator(p1, p2); break; case impl::LessThanEqualsOperator_id: p1 = new impl::LessThanEqualsOperator(p1, p2); break; case impl::GreaterThanOperator_id: p1 = new impl::GreaterThanOperator(p1, p2); break; case impl::GreaterThanEqualsOperator_id: p1 = new impl::GreaterThanEqualsOperator(p1, p2); break; case impl::OrOperator_id: p1 = new impl::OrOperator(p1, p2); break; case impl::AndOperator_id: p1 = new impl::AndOperator(p1, p2); break; case impl::UnionOperator_id: p1 = new impl::UnionExpression(p1, p2); break; default: throw UnsupportedException(boost::lexical_cast(op)); } // switch } while(++c != i->children.end()); return p1; } // createBinaryExpression template XPathExpression* XPath::createLiteral(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { string_type str = string_adaptor::construct(i->value.begin(), i->value.end()); return new StringValue(str); } // createLiteral template XPathExpression* XPath::createNumber(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { string_type str = string_adaptor::construct(i->value.begin(), i->value.end()); return new NumericValue(boost::lexical_cast(str)); } // createNumber template XPathExpression* XPath::createVariable(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { string_type str = string_adaptor::construct(i->value.begin()+1, i->value.end()); return new Variable(str); } // createVariable template XPathExpression* XPath::createSingleStepAbsoluteLocationPath(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { typename impl::types::node_iter_t n = i; return new impl::AbsoluteLocationPath(impl::StepFactory::createStep(n, context)); } // createSingleStepAbsoluteLocationPath template XPathExpression* XPath::createUnaryExpression(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { return XPath::compile_expression(i->children.begin(), context); } // createUnaryExpression template XPathExpression* XPath::createUnaryNegativeExpr(typename impl::types::node_iter_t const& i, impl::CompilationContext& context) { return new impl::UnaryNegative(XPath::compile_expression(i+1, context)); } // createUnaryNegativeExpr template impl::StepList XPath::createStepList(typename impl::types::node_iter_t const& from, typename impl::types::node_iter_t const& to, impl::CompilationContext& context) { impl::StepList steps; typename impl::types::node_iter_t c = from; typename impl::types::node_iter_t end = to; while(c != end) switch(impl::getNodeId(c)) { case impl::S_id: case impl::Slash_id: ++c; // just drop it break; case impl::RelativeLocationPath_id: // might get here when handling an absolute path end = c->children.end(); c = c->children.begin(); break; case impl::Step_id: { typename impl::types::node_iter_t step = c->children.begin(); steps.push_back(impl::StepFactory::createStep(step, c->children.end(), context)); ++c; } break; default: steps.push_back(impl::StepFactory::createStep(c, end, context)); } // switch(impl::getNodeId(c)) return steps; } // createStepList } // namespace XPath } // namespace Arabica #endif