#ifndef ARABICA_XPATHIC_XPATH_PARSER_HPP #define ARABICA_XPATHIC_XPATH_PARSER_HPP #include #include #include #include #include #include "xpath_object.hpp" #include "xpath_expression.hpp" #include "xpath_ast.hpp" #include "xpath_grammar.hpp" #include "xpath_namespace_context.hpp" #include "xpath_function_resolver.hpp" #include "xpath_variable_resolver.hpp" #include "xpath_resolver_holder.hpp" namespace Arabica { namespace XPath { class SyntaxException : public std::runtime_error { public: SyntaxException(const std::string& thing) : std::runtime_error("Bad XPath: " + thing) { } }; // class SyntaxException class RuntimeException : public std::runtime_error { public: RuntimeException(const std::string& thing) : std::runtime_error("Cannot evaluate XPath: " + thing) { } }; // class RuntimeException class UnsupportedException : public std::runtime_error { public: UnsupportedException(const std::string& thing) : std::runtime_error("Sorry, haven't implemented '" + thing + "' yet") { } }; // class UnsupportedException template class CompilationContext; class StepExpression; typedef std::vector StepList; template > class XPath { public: XPath() { resetNamespaceContext(); resetVariableResolver(); resetFunctionResolver(); } // XPath ~XPath() { } // ~XPath XPathExpressionPtr compile(const string_type& xpath) const { return do_compile(xpath, &XPath::parse_xpath); } // compile XPathExpressionPtr compile_expr(const string_type& xpath) const { return do_compile(xpath, &XPath::parse_xpath_expr); } // compile XPathValuePtr evaluate(const string_type& xpath, const DOM::Node& context) const { ExecutionContext executionContext; executionContext.setVariableResolver(getVariableResolver()); return compile(xpath)->evaluate(context, executionContext); } // evaluate XPathValuePtr evaluate_expr(const string_type& xpath, const DOM::Node& context) const { ExecutionContext executionContext; executionContext.setVariableResolver(getVariableResolver()); return compile_expr(xpath)->evaluate(context, executionContext); } // evaluate_expr void setNamespaceContext(const NamespaceContext& namespaceContext) { namespaceContext_.set(namespaceContext); } void setNamespaceContext(NamespaceContextPtr namespaceContext) { namespaceContext_.set(namespaceContext); } const NamespaceContext& getNamespaceContext() const { return namespaceContext_.get(); } void resetNamespaceContext() { namespaceContext_.set(NamespaceContextPtr(new NullNamespaceContext())); } void setVariableResolver(const VariableResolver& variableResolver) { variableResolver_.set(variableResolver); } void setVariableResolver(VariableResolverPtr variableResolver) { variableResolver_.set(variableResolver); } const VariableResolver& getVariableResolver() const { return variableResolver_.get(); } void resetVariableResolver() { variableResolver_.set(VariableResolverPtr(new NullVariableResolver())); } void setFunctionResolver(const FunctionResolver& functionResolver) { functionResolver_.set(functionResolver); } void setFunctionResolver(FunctionResolverPtr functionResolver) { functionResolver_.set(functionResolver); } const FunctionResolver& getFunctionResolver() const { return functionResolver_.get(); } void resetFunctionResolver() { functionResolver_.set(FunctionResolverPtr(new NullFunctionResolver())); } private: XPathExpressionPtr do_compile(const string_type& xpath, tree_info_t(XPath::*fn)(const string_type& str) const) const { tree_info_t ast; try { ast = (this->*fn)(xpath); if(!ast.full) throw SyntaxException(xpath); CompilationContext context(*this, getNamespaceContext(), getFunctionResolver()); return XPathExpressionPtr(compile_expression(ast.trees.begin(), context)); } // try catch(std::exception& ex) { throw ex; } // catch catch(...) { throw SyntaxException(xpath); } // catch } // do_compile tree_info_t parse_xpath(const string_type& str) const { str_iter_t first = str.begin(), last = str.end(); return ast_parse(first, last, xpathg_); } // parse_xpath tree_info_t parse_xpath_expr(const string_type& str) const { str_iter_t first = str.begin(), last = str.end(); return ast_parse(first, last, xpathge_); } // parse_xpath xpath_grammar xpathg_; xpath_grammar_expr xpathge_; ResolverHolder > namespaceContext_; ResolverHolder > variableResolver_; ResolverHolder functionResolver_; ///////////////////////////////////////////////////////////////////////////////// public: static XPathExpression* compile_expression(node_iter_t const& i, CompilationContext& context) { long id = getNodeId(i); if(XPath::factory().find(id) == XPath::factory().end()) { XPath::dump(i, 0); throw UnsupportedException(XPath::names()[id]); } return XPath::factory()[id](i, context); } // compile_expression private: static XPathExpression* createAbsoluteLocationPath(node_iter_t const& i, CompilationContext& context); static XPathExpression* createRelativeLocationPath(node_iter_t const& i, CompilationContext& context); static XPathExpression* createSingleStepRelativeLocationPath(node_iter_t const& i, CompilationContext& context); static XPathExpression* createExpression(node_iter_t const& i, CompilationContext& context); static XPathExpression* createFunction(node_iter_t const& i, CompilationContext& context); static XPathExpression* createBinaryExpression(node_iter_t const& i, CompilationContext& context); static XPathExpression* createLiteral(node_iter_t const& i, CompilationContext& context); static XPathExpression* createNumber(node_iter_t const& i, CompilationContext& context); static XPathExpression* createVariable(node_iter_t const& i, CompilationContext& context); static XPathExpression* createSingleStepAbsoluteLocationPath(node_iter_t const& i, CompilationContext& context); static XPathExpression* createUnaryExpression(node_iter_t const& i, CompilationContext& context); static XPathExpression* createUnaryNegativeExpr(node_iter_t const& i, CompilationContext& context); static StepList createStepList(node_iter_t const& from, node_iter_t const& to, CompilationContext& context); typedef XPathExpression* (*compileFn)(node_iter_t const& i, CompilationContext& context); static std::map& factory() { static std::map f = init_createFunctions(); return f; } // factory static std::map& names() { static std::map n = init_debugNames(); return n; } // names static const std::map init_createFunctions() { std::map factory; factory[impl::AbsoluteLocationPath_id] = createAbsoluteLocationPath; factory[impl::RelativeLocationPath_id] = createRelativeLocationPath; factory[impl::AbbreviatedAbsoluteLocationPath_id] = createAbsoluteLocationPath; factory[impl::Step_id] = createRelativeLocationPath; factory[impl::PathExpr_id] = createRelativeLocationPath; factory[impl::FilterExpr_id] = createRelativeLocationPath; factory[impl::PrimaryExpr_id] = createExpression; factory[impl::FunctionCall_id] = createFunction; factory[impl::AdditiveExpr_id] = createBinaryExpression; factory[impl::MultiplicativeExpr_id] = createBinaryExpression; factory[impl::EqualityExpr_id] = createBinaryExpression; factory[impl::RelationalExpr_id] = createBinaryExpression; factory[impl::OrExpr_id] = createBinaryExpression; factory[impl::AndExpr_id] = createBinaryExpression; factory[impl::UnionExpr_id] = createBinaryExpression; factory[impl::Literal_id] = createLiteral; factory[impl::Number_id] = createNumber; factory[impl::Digits_id] = createNumber; factory[impl::VariableReference_id] = createVariable; factory[impl::NodeTest_id] = createSingleStepRelativeLocationPath; factory[impl::QName_id] = createSingleStepRelativeLocationPath; factory[impl::NCName_id] = createSingleStepRelativeLocationPath; factory[impl::AnyName_id] = createSingleStepRelativeLocationPath; factory[impl::Text_id] = createSingleStepRelativeLocationPath; factory[impl::Comment_id] = createSingleStepRelativeLocationPath; factory[impl::ProcessingInstruction_id] = createSingleStepRelativeLocationPath; factory[impl::Slash_id] = createSingleStepAbsoluteLocationPath; factory[impl::SelfSelect_id] = createSingleStepRelativeLocationPath; factory[impl::ParentSelect_id] = createSingleStepRelativeLocationPath; factory[impl::UnaryExpr_id] = createUnaryExpression; factory[impl::UnaryMinusOperator_id] = createUnaryNegativeExpr; return factory; } // init_createFunctions static const std::map init_debugNames() { std::map names; names[impl::LocationPath_id] = "LocationPath"; names[impl::AbsoluteLocationPath_id] = "AbsoluteLocationPath"; names[impl::RelativeLocationPath_id] = "RelativeLocationPath"; names[impl::Step_id] = "Step"; names[impl::AxisSpecifier_id] = "AxisSpecifier"; names[impl::NodeTest_id] = "NodeTest"; names[impl::Predicate_id] = "Predicate"; names[impl::PredicateExpr_id] = "PredicateExpr"; names[impl::AbbreviatedAbsoluteLocationPath_id] = "AbbreviatedAbsoluteLocationPath"; names[impl::AbbreviatedStep_id] = "AbbreviatedStep"; names[impl::AbbreviatedAxisSpecifier_id] = "AbbreviatedAxisSpecifier"; names[impl::Expr_id] = "Expr"; names[impl::PrimaryExpr_id] = "PrimaryExpr"; names[impl::FunctionCall_id] = "FunctionCall"; names[impl::Argument_id] = "Argument"; names[impl::UnionExpr_id] = "UnionExpr"; names[impl::PathExpr_id] = "PathExpr"; names[impl::FilterExpr_id] = "FilterExpr"; names[impl::OrExpr_id] = "OrExpr"; names[impl::AndExpr_id] = "AndExpr"; names[impl::EqualityExpr_id] = "EqualityExpr"; names[impl::RelationalExpr_id] = "RelationalExpr"; names[impl::AdditiveExpr_id] = "AdditiveExpr"; names[impl::MultiplicativeExpr_id] = "MultiplicativeExpr"; names[impl::UnaryExpr_id] = "UnaryExpr"; names[impl::Literal_id] = "Literal"; names[impl::Number_id] = "Number"; names[impl::Digits_id] = "Digits"; names[impl::MultiplyOperator_id] = "MultiplyOperator"; names[impl::FunctionName_id] = "FunctionName"; names[impl::VariableReference_id] = "VariableReference"; names[impl::NameTest_id] = "NameTest"; names[impl::S_id] = "S"; names[impl::NodeType_id] = "NodeType"; names[impl::AxisName_id] = "AxisName"; names[impl::QName_id] = "QName"; names[impl::Prefix_id] = "Prefix"; names[impl::LocalPart_id] = "LocalPart"; names[impl::NCName_id] = "NCName"; names[impl::NCNameChar_id] = "NCNameChar"; names[impl::Slash_id] = "/"; names[impl::SlashSlash_id] = "//"; names[impl::AncestorOrSelf_id] = "ancestor-or-self::"; names[impl::Ancestor_id] = "ancestor::"; names[impl::Attribute_id] = "attribute::"; names[impl::Child_id] = "child::"; names[impl::DescendantOrSelf_id] = "descendant-or-self::"; names[impl::Descendant_id] = "descendant::"; names[impl::FollowingSibling_id] = "following-sibling::"; names[impl::Following_id] = "following::"; names[impl::Namespace_id] = "namespace::"; names[impl::Parent_id] = "parent::"; names[impl::PrecedingSibling_id] = "preceding-sibling::"; names[impl::Preceding_id] = "preceding::"; names[impl::Self_id] = "self::"; names[impl::Comment_id] = "comment()"; names[impl::Text_id] = "text()"; names[impl::ProcessingInstruction_id] = "processing-instruction()"; names[impl::Node_id] = "node()"; names[impl::AnyName_id] = "AnyName"; names[impl::SelfSelect_id] = "SelfSelect"; names[impl::ParentSelect_id] = "ParentSelect"; names[impl::LeftSquare_id] = "["; names[impl::RightSquare_id] = "]"; names[impl::LeftBracket_id] = "("; names[impl::RightBracket_id] = ")"; names[impl::PlusOperator_id] = "+"; names[impl::MinusOperator_id] = "-"; names[impl::ModOperator_id] = "mod"; names[impl::DivOperator_id] = "div"; names[impl::EqualsOperator_id] = "="; names[impl::NotEqualsOperator_id] = "!="; names[impl::LessThanOperator_id] = "<"; names[impl::LessThanEqualsOperator_id] = "<="; names[impl::GreaterThanOperator_id] = ">"; names[impl::GreaterThanEqualsOperator_id] = ">="; names[impl::OrOperator_id] = "or"; names[impl::AndOperator_id] = "and"; names[impl::UnionOperator_id] = "union"; names[impl::UnaryMinusOperator_id] = "minus"; return names; } // init_debugNames static void dump(node_iter_t const& i, int depth) { long id = static_cast(i->value.id().to_long()); for(int d = 0; d < depth; ++d) std::cerr << ' '; std::cerr << names()[id] << " - " << std::string(i->value.begin(), i->value.end()) << std::endl; for(node_iter_t c = i->children.begin(); c != i->children.end(); ++c) dump(c, depth+2); } // dump XPath(const XPath&); XPath& operator=(const XPath&); bool operator==(const XPath&) const; }; // class XPath } // namespace XPath } // namespace Arabica #include #include "xpath_value.hpp" #include "xpath_arithmetic.hpp" #include "xpath_relational.hpp" #include "xpath_logical.hpp" #include "xpath_step.hpp" #include "xpath_compile_context.hpp" #include "xpath_variable.hpp" #include "xpath_function_holder.hpp" #include "xpath_union.hpp" namespace Arabica { namespace XPath { template XPathExpression* XPath::createAbsoluteLocationPath(node_iter_t const& i, CompilationContext& context) { return new AbsoluteLocationPath(createStepList(i->children.begin(), i->children.end(), context)); } // createAbsoluteLocationPath template XPathExpression* XPath::createRelativeLocationPath(node_iter_t const& i, CompilationContext& context) { return new RelativeLocationPath(createStepList(i->children.begin(), i->children.end(), context)); } // createRelativeLocationPath template XPathExpression* XPath::createSingleStepRelativeLocationPath(node_iter_t const& i, CompilationContext& context) { node_iter_t n = i; return new RelativeLocationPath(StepFactory::createStep(n, context)); } // createSingleStepRelativeLocationPath template XPathExpression* XPath::createExpression(node_iter_t const& i, CompilationContext& context) { node_iter_t c = i->children.begin(); skipWhitespace(c); return XPath::compile_expression(c, context); } // createExpression template XPathExpression* XPath::createFunction(node_iter_t const& i, CompilationContext& context) { node_iter_t c = i->children.begin(); string_type name(c->value.begin(), c->value.end()); ++c; skipWhitespace(c); assert(getNodeId(c) == impl::LeftBracket_id); ++c; skipWhitespace(c); std::vector > args; while(getNodeId(c) != impl::RightBracket_id) { XPathExpressionPtr arg(XPath::compile_expression(c++, context)); args.push_back(arg); skipWhitespace(c); } // while ... // maybe trailing whitespace ... return FunctionHolder::createFunction(name, args, context); } // createFunction template XPathExpression* XPath::createBinaryExpression(node_iter_t const& i, CompilationContext& context) { node_iter_t c = i->children.begin(); XPathExpression* p1 = XPath::compile_expression(c, context); ++c; do { long op = getNodeId(c); ++c; XPathExpression* p2 = XPath::compile_expression(c, context); switch(op) { case impl::PlusOperator_id: p1 = new PlusOperator(p1, p2); break; case impl::MinusOperator_id: p1 = new MinusOperator(p1, p2); break; case impl::MultiplyOperator_id: p1 = new MultiplyOperator(p1, p2); break; case impl::DivOperator_id: p1 = new DivideOperator(p1, p2); break; case impl::ModOperator_id: p1 = new ModOperator(p1, p2); break; case impl::EqualsOperator_id: p1 = new EqualsOperator(p1, p2); break; case impl::NotEqualsOperator_id: p1 = new NotEqualsOperator(p1, p2); break; case impl::LessThanOperator_id: p1 = new LessThanOperator(p1, p2); break; case impl::LessThanEqualsOperator_id: p1 = new LessThanEqualsOperator(p1, p2); break; case impl::GreaterThanOperator_id: p1 = new GreaterThanOperator(p1, p2); break; case impl::GreaterThanEqualsOperator_id: p1 = new GreaterThanEqualsOperator(p1, p2); break; case impl::OrOperator_id: p1 = new OrOperator(p1, p2); break; case impl::AndOperator_id: p1 = new AndOperator(p1, p2); break; case impl::UnionOperator_id: p1 = new UnionExpression(p1, p2); break; default: throw UnsupportedException(boost::lexical_cast(op)); } // switch } while(++c != i->children.end()); return p1; } // createBinaryExpression template XPathExpression* XPath::createLiteral(node_iter_t const& i, CompilationContext& context) { string_type str(i->value.begin(), i->value.end()); return new StringValue(str); } // createLiteral template XPathExpression* XPath::createNumber(node_iter_t const& i, CompilationContext& context) { return new NumericValue(boost::lexical_cast(string_type(i->value.begin(), i->value.end()))); } // createNumber template XPathExpression* XPath::createVariable(node_iter_t const& i, CompilationContext& context) { return new Variable(string_type(i->value.begin()+1, i->value.end())); // skip $ } // createVariable template XPathExpression* XPath::createSingleStepAbsoluteLocationPath(node_iter_t const& i, CompilationContext& context) { node_iter_t n = i; return new AbsoluteLocationPath(StepFactory::createStep(n, context)); } // createSingleStepAbsoluteLocationPath template XPathExpression* XPath::createUnaryExpression(node_iter_t const& i, CompilationContext& context) { return XPath::compile_expression(i->children.begin(), context); } // createUnaryExpression template XPathExpression* XPath::createUnaryNegativeExpr(node_iter_t const& i, CompilationContext& context) { return new UnaryNegative(XPath::compile_expression(i+1, context)); } // createUnaryNegativeExpr template StepList XPath::createStepList(node_iter_t const& from, node_iter_t const& to, CompilationContext& context) { StepList steps; node_iter_t c = from; node_iter_t end = to; while(c != end) switch(getNodeId(c)) { case impl::S_id: case impl::Slash_id: ++c; // just drop it break; case impl::RelativeLocationPath_id: // might get here when handling an absolute path end = c->children.end(); c = c->children.begin(); break; case impl::Step_id: { node_iter_t step = c->children.begin(); steps.push_back(StepFactory::createStep(step, c->children.end(), context)); ++c; } break; default: steps.push_back(StepFactory::createStep(c, end, context)); } // switch(getNodeId(c)) return steps; } // createStepList } // namespace XPath } // namespace Arabica #endif