%{ /***************************************************************************** * Eliot * Copyright (C) 2005-2007 Antoine Fraboulet * Authors: Antoine Fraboulet * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *****************************************************************************/ #include #include #include #include #include "dic.h" #include "regexp.h" #include "libdic_a-ery.h" #include "libdic_a-erl.h" /* ************************************************** */ /* ************************************************** */ /* ************************************************** */ /** * function prototype for parser generated by bison */ int regexpparse(yyscan_t scanner, NODE** root, struct search_RegE_list_t *list, struct regexp_error_report_t *err); /** * function prototype for error reporting */ void regexperror(YYLTYPE *llocp, yyscan_t scanner, NODE** root, struct search_RegE_list_t *list, struct regexp_error_report_t *err, char const *msg); /* ************************************************** */ /* ************************************************** */ /* ************************************************** */ %} %union { char c; NODE *NODE_TYPE; char letters[DIC_LETTERS]; }; %defines %name-prefix="regexp" %pure-parser %locations %parse-param {yyscan_t yyscanner} %parse-param {NODE **root} %parse-param {struct search_RegE_list_t *list} %parse-param {struct regexp_error_report_t *err} %lex-param {yyscan_t yyscanner} %token LEX_CHAR %token LEX_ALL %token LEX_VOWL %token LEX_CONS %token LEX_USER1 %token LEX_USER2 %token LEX_L_SQBRACKET LEX_R_SQBRACKET %token LEX_L_BRACKET LEX_R_BRACKET %token LEX_HAT %token LEX_QMARK %token LEX_PLUS %token LEX_STAR %token LEX_SHARP %type var %type expr %type vardis %type exprdis %type exprdisnode %start start %% start: LEX_L_BRACKET expr LEX_R_BRACKET LEX_SHARP { NODE* sharp = regexp_createNODE(NODE_VAR,RE_FINAL_TOK,NULL,NULL); *root = regexp_createNODE(NODE_AND,'\0',$2,sharp); YYACCEPT; } ; expr : var { $$=$1; } | expr expr { $$=regexp_createNODE(NODE_AND,'\0',$1,$2); } | var LEX_QMARK { NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL); $$=regexp_createNODE(NODE_OR,'\0',$1,epsilon); } | var LEX_PLUS { $$=regexp_createNODE(NODE_PLUS,'\0',$1,NULL); } | var LEX_STAR { $$=regexp_createNODE(NODE_STAR,'\0',$1,NULL); } /* () */ | LEX_L_BRACKET expr LEX_R_BRACKET { $$=$2; } | LEX_L_BRACKET expr LEX_R_BRACKET LEX_QMARK { NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL); $$=regexp_createNODE(NODE_OR,'\0',$2,epsilon); } | LEX_L_BRACKET expr LEX_R_BRACKET LEX_PLUS { $$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL); } | LEX_L_BRACKET expr LEX_R_BRACKET LEX_STAR { $$=regexp_createNODE(NODE_STAR,'\0',$2,NULL); } /* [] */ | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET { $$=$2; } | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_QMARK { NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL); $$=regexp_createNODE(NODE_OR,'\0',$2,epsilon); } | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_PLUS { $$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL); } | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_STAR { $$=regexp_createNODE(NODE_STAR,'\0',$2,NULL); } ; var : LEX_CHAR { #ifdef DEBUG_RE_PARSE printf("var : lecture %c\n",$1 + 'a' -1); #endif $$=regexp_createNODE(NODE_VAR,$1,NULL,NULL); } | LEX_ALL { $$=regexp_createNODE(NODE_VAR,RE_ALL_MATCH,NULL,NULL); } | LEX_VOWL { $$=regexp_createNODE(NODE_VAR,RE_VOWL_MATCH,NULL,NULL); } | LEX_CONS { $$=regexp_createNODE(NODE_VAR,RE_CONS_MATCH,NULL,NULL); } | LEX_USER1 { $$=regexp_createNODE(NODE_VAR,RE_USR1_MATCH,NULL,NULL); } | LEX_USER2 { $$=regexp_createNODE(NODE_VAR,RE_USR2_MATCH,NULL,NULL); } ; exprdisnode : exprdis { int i,j; #ifdef DEBUG_RE_PARSE printf("exprdisnode : exprdis : "); #endif for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++) { if (list->valid[i] == 0) { list->valid[i] = 1; list->symbl[i] = RE_ALL_MATCH + i; list->letters[i][0] = 0; for(j=1; j < DIC_LETTERS; j++) list->letters[i][j] = $1[j] ? 1 : 0; #ifdef DEBUG_RE_PARSE printf("list %d symbl x%02x : ",i,list->symbl[i]); for(j=0; j < DIC_LETTERS; j++) if (list->letters[i][j]) printf("%c",j+'a'-1); printf("\n"); #endif break; } } $$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL); } | LEX_HAT exprdis { int i,j; #ifdef DEBUG_RE_PARSE printf("exprdisnode : HAT exprdis : "); #endif for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++) { if (list->valid[i] == 0) { list->valid[i] = 1; list->symbl[i] = RE_ALL_MATCH + i; list->letters[i][0] = 0; for(j=1; j < DIC_LETTERS; j++) list->letters[i][j] = $2[j] ? 0 : 1; #ifdef DEBUG_RE_PARSE printf("list %d symbl x%02x : ",i,list->symbl[i]); for(j=0; j < DIC_LETTERS; j++) if (list->letters[i][j]) printf("%c",j+'a'-1); printf("\n"); #endif break; } } $$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL); } ; exprdis: vardis { memcpy($$,$1,sizeof(char)*DIC_LETTERS); } | vardis exprdis { int i; for(i=0; i < DIC_LETTERS; i++) $$[i] = $1[i] | $2[i]; } ; vardis: LEX_CHAR { int c = $1; memset($$,0,sizeof(char)*DIC_LETTERS); #ifdef DEBUG_RE_PARSE printf("vardis : lecture %c\n",c + 'a' -1); #endif $$[c] = 1; } ; %% #define UNUSED __attribute__((unused)) void regexperror(YYLTYPE *llocp, yyscan_t UNUSED yyscanner, NODE UNUSED **root, struct search_RegE_list_t UNUSED *list, struct regexp_error_report_t *err, char const *msg) { err->pos1 = llocp->first_column; err->pos2 = llocp->last_column; strncpy(err->msg,msg,sizeof(err->msg)); } /* * shut down the compiler */ //int yy_init_globals (yyscan_t yyscanner);