#ifndef ARABICA_XPATHIC_XPATH_GRAMMER_HPP #define ARABICA_XPATHIC_XPATH_GRAMMER_HPP #ifndef BOOST_SPIRIT_THREADSAFE #define BOOST_SPIRIT_THREADSAFE #endif #include #include #include #include "xpath_ast_ids.hpp" namespace Arabica { namespace XPath { namespace impl { template struct xpath_grammar_definition { xpath_grammar_definition() { using namespace boost::spirit; // [1] LocationPath = AbsoluteLocationPath | RelativeLocationPath; // [2] AbsoluteLocationPath = AbbreviatedAbsoluteLocationPath | (Slash >> !RelativeLocationPath); // [3] RelativeLocationPath = Step >> *((SlashSlash | discard_node_d[Slash]) >> Step); // [4], [5] Step = AxisSpecifier >> NodeTest >> *Predicate | AbbreviatedStep; AxisSpecifier = S >> ( AxisName >> S >> "::" | AbbreviatedAxisSpecifier ) >> S; // [6] AxisName = AncestorOrSelf | Ancestor | Attribute | Child | DescendantOrSelf | Descendant | FollowingSibling | Following | Namespace | Parent | PrecedingSibling | Preceding | Self; // [7] NodeTest = S >>(ProcessingInstruction >> S >> discard_node_d[LeftBracket] >> S >> Literal >> S >> discard_node_d[RightBracket] | NodeType >> S >> discard_node_d[LeftBracket] >> S >> discard_node_d[RightBracket] | NameTest ) >> S; // [8], [9] Predicate = S >> LeftSquare >> PredicateExpr >> RightSquare >> S; PredicateExpr = Expr; // [10] AbbreviatedAbsoluteLocationPath = SlashSlash >> RelativeLocationPath; // [11] AbbreviatedRelativeLocationPath eliminated // [12], [13] AbbreviatedStep = ParentSelect | SelfSelect; AbbreviatedAxisSpecifier = !ch_p('@'); // [14], [15] Expr = OrExpr; PrimaryExpr = discard_node_d[S] >> (VariableReference | Number | FunctionCall | discard_node_d[LeftBracket] >> S >> Expr >> S >> discard_node_d[RightBracket] | Literal) >> discard_node_d[S]; // [16], [17] FunctionCall = FunctionName >> S >> LeftBracket >> !(Argument >> *(discard_node_d[ch_p(',')] >> S >> Argument)) >> S >> RightBracket >> S; Argument = Expr; // [18], [19], [20] // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr UnionExpr = PathExpr >> *(UnionOperator >> PathExpr); /* LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath | FilterExpr '//' RelativeLocationPath */ PathExpr = discard_node_d[S] >> (FilterExpr >> !((SlashSlash | Slash) >> RelativeLocationPath) | LocationPath) >> discard_node_d[S]; // FilterExpr ::= PrimaryExpr | FilterExpr Predicate FilterExpr = PrimaryExpr >> *Predicate; // [21], [22], [23], [24] OrExpr = AndExpr >> *(OrOperator >> AndExpr); AndExpr = EqualityExpr >> *(AndOperator >> EqualityExpr); EqualityExpr = RelationalExpr >> *((EqualsOperator | NotEqualsOperator) >> RelationalExpr); RelationalExpr = AdditiveExpr >> *((LessThanEqualsOperator | GreaterThanEqualsOperator | LessThanOperator |GreaterThanOperator) >> AdditiveExpr); // [25], [26], [27] AdditiveExpr = MultiplicativeExpr >> *((PlusOperator | MinusOperator) >> MultiplicativeExpr); MultiplicativeExpr = UnaryExpr >> *(token_node_d[(MultiplyOperator | DivOperator | ModOperator)] >> UnaryExpr); UnaryExpr = discard_node_d[S] >> *(UnaryMinusOperator) >> UnionExpr; // [28] ExprToken not actually used //[29], [30], [31], Literal = discard_node_d[ch_p('\"')] >> token_node_d[*~ch_p('\"')] >> discard_node_d[ch_p('\"')] | discard_node_d[ch_p('\'')] >> token_node_d[*~ch_p('\'')] >> discard_node_d[ch_p('\'')]; Number = token_node_d[ch_p('.') >> Digits | Digits >> !('.' >> *Digits)]; Digits = token_node_d[+digit_p]; // [32] Operator not actually used // [33] OperatorName not actually used // [34], [35], [36], [37], [38], [39] MultiplyOperator = ch_p('*'); FunctionName = QName - NodeType; VariableReference = ch_p('$') >> QName; NameTest = AnyName | NCName >> discard_node_d[ch_p(':')] >> AnyName | QName; NodeType = Comment | Text | ProcessingInstruction | Node; // These aren't correct to spec yet :) S = *space_p; QName = !(Prefix >> discard_node_d[ch_p(':')]) >> LocalPart; Prefix = NCName; LocalPart = NCName; NCName = token_node_d[(alpha_p | '_') >> *NCNameChar]; NCNameChar = alpha_p | digit_p | '.' | '-' | '_'; // things not defined in the spec, but which are just kind of handy :) Slash = ch_p('/'); SlashSlash = str_p("//"); AncestorOrSelf = str_p("ancestor-or-self"); Ancestor = str_p("ancestor"); Attribute = str_p("attribute"); Child = str_p("child"); DescendantOrSelf = str_p("descendant-or-self"); Descendant = str_p("descendant"); FollowingSibling = str_p("following-sibling"); Following = str_p("following"); Namespace = str_p("namespace"); Parent = str_p("parent"); PrecedingSibling = str_p("preceding-sibling"); Preceding = str_p("preceding"); Self = str_p("self"); Comment = str_p("comment"); Text = str_p("text"); ProcessingInstruction = str_p("processing-instruction"); Node = str_p("node"); AnyName = ch_p('*'); SelfSelect = ch_p('.'); ParentSelect = str_p(".."); LeftSquare = ch_p('['); RightSquare = ch_p(']'); LeftBracket = ch_p('('); RightBracket = ch_p(')'); PlusOperator = ch_p('+'); MinusOperator = ch_p('-'); ModOperator = str_p("mod"); DivOperator = str_p("div"); EqualsOperator = ch_p('='); NotEqualsOperator = str_p("!="); LessThanOperator = ch_p('<'); LessThanEqualsOperator = str_p("<="); GreaterThanOperator = ch_p('>'); GreaterThanEqualsOperator = str_p(">="); OrOperator = str_p("or"); AndOperator = str_p("and"); UnionOperator = ch_p('|'); UnaryMinusOperator = ch_p('-'); } // xpath_grammar_definition boost::spirit::rule > QName; boost::spirit::rule > Prefix; boost::spirit::rule > LocalPart; boost::spirit::rule > NCName; boost::spirit::rule > NCNameChar; boost::spirit::rule > AxisName; boost::spirit::rule > NodeType; boost::spirit::rule > LocationPath; boost::spirit::rule > AbsoluteLocationPath; boost::spirit::rule > RelativeLocationPath; boost::spirit::rule > Step; boost::spirit::rule > AxisSpecifier; boost::spirit::rule > NodeTest; boost::spirit::rule > Predicate; boost::spirit::rule > PredicateExpr; boost::spirit::rule > AbbreviatedAbsoluteLocationPath; boost::spirit::rule > AbbreviatedStep; boost::spirit::rule > AbbreviatedAxisSpecifier; boost::spirit::rule > Expr; boost::spirit::rule > PrimaryExpr; boost::spirit::rule > FunctionCall; boost::spirit::rule > Argument; boost::spirit::rule > UnionExpr; boost::spirit::rule > PathExpr; boost::spirit::rule > FilterExpr; boost::spirit::rule > OrExpr; boost::spirit::rule > AndExpr; boost::spirit::rule > EqualityExpr; boost::spirit::rule > RelationalExpr; boost::spirit::rule > AdditiveExpr; boost::spirit::rule > MultiplicativeExpr; boost::spirit::rule > UnaryExpr; boost::spirit::rule > Literal; boost::spirit::rule > Number; boost::spirit::rule > Digits; boost::spirit::rule > MultiplyOperator; boost::spirit::rule > FunctionName; boost::spirit::rule > VariableReference; boost::spirit::rule > NameTest; boost::spirit::rule > S; // ExprWhitespace // bonus bits boost::spirit::rule > Slash; boost::spirit::rule > SlashSlash; boost::spirit::rule > AncestorOrSelf; boost::spirit::rule > Ancestor; boost::spirit::rule > Attribute; boost::spirit::rule > Child; boost::spirit::rule > DescendantOrSelf; boost::spirit::rule > Descendant; boost::spirit::rule > FollowingSibling; boost::spirit::rule > Following; boost::spirit::rule > Namespace; boost::spirit::rule > Parent; boost::spirit::rule > PrecedingSibling; boost::spirit::rule > Preceding; boost::spirit::rule > Self; boost::spirit::rule > Comment; boost::spirit::rule > Text; boost::spirit::rule > ProcessingInstruction; boost::spirit::rule > Node; boost::spirit::rule > AnyName; boost::spirit::rule > SelfSelect; boost::spirit::rule > ParentSelect; boost::spirit::rule > LeftSquare; boost::spirit::rule > RightSquare; boost::spirit::rule > LeftBracket; boost::spirit::rule > RightBracket; boost::spirit::rule > PlusOperator; boost::spirit::rule > MinusOperator; boost::spirit::rule > ModOperator; boost::spirit::rule > DivOperator; boost::spirit::rule > EqualsOperator; boost::spirit::rule > NotEqualsOperator; boost::spirit::rule > LessThanOperator; boost::spirit::rule > LessThanEqualsOperator; boost::spirit::rule > GreaterThanOperator; boost::spirit::rule > GreaterThanEqualsOperator; boost::spirit::rule > OrOperator; boost::spirit::rule > AndOperator; boost::spirit::rule > UnionOperator; boost::spirit::rule > UnaryMinusOperator; }; // xpath_grammar_definition struct xpath_grammar : public boost::spirit::grammar { template struct definition : public xpath_grammar_definition { definition(xpath_grammar const& /* self */) { } // definition boost::spirit::rule > const& start() const { return xpath_grammar_definition::LocationPath; } // start }; // definition }; // xpath_grammar struct xpath_grammar_expr : public boost::spirit::grammar { template struct definition : public xpath_grammar_definition { definition(xpath_grammar_expr const& /* self */) { } // definition boost::spirit::rule > const& start() const { return xpath_grammar_definition::Expr; } // start }; // definition }; // xpath_grammar_expr struct xpath_grammar_match : public boost::spirit::grammar { template struct definition : public xpath_grammar_definition { definition(xpath_grammar_match const& /* self */) { using namespace boost::spirit; typedef xpath_grammar_definition base; // [1] Pattern ::= LocationPathPattern | Pattern '|' LocationPathPattern Pattern = discard_node_d[base::S] >> LocationPathPattern >> discard_node_d[base::S] >> *(base::UnionOperator >> discard_node_d[base::S] >> LocationPathPattern >> discard_node_d[base::S]); // [2] LocationPathPattern ::= '/' RelativePathPattern? // | IdKeyPattern (('/' | '//') RelativePathPattern)? // | '//'? RelativePathPattern LocationPathPattern = !base::SlashSlash >> RelativePathPattern | base::Slash >> !RelativePathPattern | IdKeyPattern >> !((base::SlashSlash | base::Slash) >> RelativePathPattern); // [3] IdKeyPattern ::= 'id' '(' Literal ')' | 'key' '(' Literal ',' Literal ')' IdKeyPattern = str_p("id") >> base::LeftBracket >> base::Literal >> base::RightBracket | str_p("key") >> base::LeftBracket >> base::Literal >> ',' >> base::Literal >> base::RightBracket; // [4] RelativePathPattern ::= StepPattern // | RelativePathPattern '/' StepPattern // | RelativePathPattern '//' StepPattern RelativePathPattern = StepPattern >> *((base::SlashSlash | base::Slash) >> StepPattern); // [5] StepPattern ::= ChildOrAttributeAxisSpecifier NodeTest Predicate* StepPattern = ChildOrAttributeAxisSpecifier >> (NodeMatchPattern|base::NodeTest) >> *base::Predicate; // [6] ChildOrAttributeAxisSpecifier ::= AbbreviatedAxisSpecifier | ('child' | 'attribute') '::' ChildOrAttributeAxisSpecifier = ((base::Child | base::Attribute) >> discard_node_d[str_p("::")]) | base::AbbreviatedAxisSpecifier; NodeMatchPattern = str_p("node()"); } // definition boost::spirit::rule > const& start() const { return Pattern; } // start boost::spirit::rule > Pattern; boost::spirit::rule > LocationPathPattern; boost::spirit::rule > IdKeyPattern; boost::spirit::rule > RelativePathPattern; boost::spirit::rule > StepPattern; boost::spirit::rule > ChildOrAttributeAxisSpecifier; boost::spirit::rule > NodeMatchPattern; }; // definition }; // xpath_grammar_match struct xpath_grammar_attribute_value : public boost::spirit::grammar { template struct definition : public xpath_grammar_definition { definition(xpath_grammar_attribute_value const& /* self */) { using namespace boost::spirit; typedef xpath_grammar_definition base; AttributeValueTemplate = (DoubleLeftCurly | DoubleRightCurly | EmbeddedExpr | AttrLiteral) >> *(DoubleLeftCurly | DoubleRightCurly | EmbeddedExpr | AttrLiteral); DoubleLeftCurly = str_p("{{"); DoubleRightCurly = str_p("}}"); LeftCurly = ch_p('{'); RightCurly = ch_p('}'); EmbeddedExpr = LeftCurly >> base::Expr >> RightCurly; chset<> brackets("{}"); AttrLiteral = token_node_d[~brackets >> *~brackets]; } // definition boost::spirit::rule > const& start() const { return AttributeValueTemplate; } // start boost::spirit::rule > AttributeValueTemplate; boost::spirit::rule > DoubleLeftCurly; boost::spirit::rule > DoubleRightCurly; boost::spirit::rule > LeftCurly; boost::spirit::rule > RightCurly; boost::spirit::rule > EmbeddedExpr; boost::spirit::rule > AttrLiteral; }; // definition }; // xpath_grammar_attribute_value } // namespace impl } // namespace XPath } // namespace Arabica #endif