2005-04-09 21:16:09 +02:00
|
|
|
%{
|
2005-05-06 01:45:04 +02:00
|
|
|
/* Eliot */
|
|
|
|
/* Copyright (C) 2005 Antoine Fraboulet */
|
|
|
|
/* */
|
|
|
|
/* This file is part of Eliot. */
|
|
|
|
/* */
|
|
|
|
/* Eliot is free software; you can redistribute it and/or modify */
|
|
|
|
/* it under the terms of the GNU General Public License as published by */
|
|
|
|
/* the Free Software Foundation; either version 2 of the License, or */
|
|
|
|
/* (at your option) any later version. */
|
|
|
|
/* */
|
|
|
|
/* Elit is distributed in the hope that it will be useful, */
|
|
|
|
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
|
|
|
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
|
|
|
/* GNU General Public License for more details. */
|
|
|
|
/* */
|
|
|
|
/* You should have received a copy of the GNU General Public License */
|
|
|
|
/* along with this program; if not, write to the Free Software */
|
2005-10-23 16:53:42 +02:00
|
|
|
/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
2005-05-06 01:45:04 +02:00
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
#include <stdio.h>
|
2005-11-05 18:56:22 +01:00
|
|
|
#include <malloc.h>
|
|
|
|
#include <stdlib.h>
|
2005-04-27 19:35:03 +02:00
|
|
|
#include <string.h>
|
2005-04-19 18:26:50 +02:00
|
|
|
|
|
|
|
#include "dic.h"
|
2005-11-05 18:56:22 +01:00
|
|
|
#include "dic_internals.h"
|
2005-04-19 18:26:50 +02:00
|
|
|
|
2005-04-27 19:35:03 +02:00
|
|
|
#include "dic.h"
|
2005-04-09 21:16:09 +02:00
|
|
|
#include "regexp.h"
|
2005-04-27 19:35:03 +02:00
|
|
|
#include "dic_search.h"
|
2005-04-19 18:26:50 +02:00
|
|
|
#include "libdic_a-er.h"
|
2005-04-16 22:55:51 +02:00
|
|
|
#include "scanner.h"
|
2005-04-09 21:16:09 +02:00
|
|
|
|
2005-04-27 19:35:03 +02:00
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
/**
|
2005-04-27 19:35:03 +02:00
|
|
|
* function prototype for parser generated by bison
|
|
|
|
*/
|
2005-11-05 18:56:22 +01:00
|
|
|
int regexpparse(yyscan_t scanner, NODE** root,
|
2005-04-27 19:35:03 +02:00
|
|
|
struct search_RegE_list_t *list,
|
|
|
|
struct regexp_error_report_t *err);
|
|
|
|
/**
|
2005-11-05 18:56:22 +01:00
|
|
|
* function prototype for error reporting
|
2005-04-27 19:35:03 +02:00
|
|
|
*/
|
2005-11-05 18:56:22 +01:00
|
|
|
void regexperror(YYLTYPE *llocp, yyscan_t scanner, NODE** root,
|
|
|
|
struct search_RegE_list_t *list,
|
2005-04-27 19:35:03 +02:00
|
|
|
struct regexp_error_report_t *err,
|
|
|
|
char const *msg);
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
%}
|
2005-04-09 21:16:09 +02:00
|
|
|
%union {
|
2005-04-16 22:55:51 +02:00
|
|
|
char c;
|
2005-04-09 21:16:09 +02:00
|
|
|
NODE *NODE_TYPE;
|
2005-11-05 18:56:22 +01:00
|
|
|
char letters[DIC_LETTERS];
|
2005-04-09 21:16:09 +02:00
|
|
|
};
|
|
|
|
|
2005-04-18 19:40:36 +02:00
|
|
|
%defines
|
2005-04-26 18:31:45 +02:00
|
|
|
%name-prefix="regexp"
|
2005-04-16 22:55:51 +02:00
|
|
|
%pure-parser
|
2005-04-27 19:35:03 +02:00
|
|
|
%locations
|
2005-04-16 22:55:51 +02:00
|
|
|
%parse-param {yyscan_t yyscanner}
|
|
|
|
%parse-param {NODE **root}
|
2005-04-27 19:35:03 +02:00
|
|
|
%parse-param {struct search_RegE_list_t *list}
|
|
|
|
%parse-param {struct regexp_error_report_t *err}
|
2005-04-16 22:55:51 +02:00
|
|
|
%lex-param {yyscan_t yyscanner}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
|
|
|
%token <c> LEX_CHAR
|
|
|
|
%token LEX_ALL
|
|
|
|
%token LEX_VOWL
|
|
|
|
%token LEX_CONS
|
|
|
|
%token LEX_USER1
|
|
|
|
%token LEX_USER2
|
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
%token LEX_L_SQBRACKET LEX_R_SQBRACKET
|
|
|
|
%token LEX_L_BRACKET LEX_R_BRACKET
|
|
|
|
%token LEX_HAT
|
2005-04-25 10:18:24 +02:00
|
|
|
|
|
|
|
%token LEX_QMARK
|
2005-04-19 18:26:50 +02:00
|
|
|
%token LEX_PLUS
|
|
|
|
%token LEX_STAR
|
|
|
|
%token LEX_SHARP
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
%type <NODE_TYPE> var
|
|
|
|
%type <NODE_TYPE> expr
|
2005-04-27 19:35:03 +02:00
|
|
|
%type <letters> vardis
|
|
|
|
%type <letters> exprdis
|
|
|
|
%type <NODE_TYPE> exprdisnode
|
2005-11-05 18:56:22 +01:00
|
|
|
%start start
|
2005-04-09 21:16:09 +02:00
|
|
|
%%
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
start: LEX_L_BRACKET expr LEX_R_BRACKET LEX_SHARP
|
2005-11-05 18:56:22 +01:00
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
NODE* sharp = regexp_createNODE(NODE_VAR,RE_FINAL_TOK,NULL,NULL);
|
|
|
|
*root = regexp_createNODE(NODE_AND,'\0',$2,sharp);
|
2005-11-05 18:56:22 +01:00
|
|
|
YYACCEPT;
|
2005-04-27 19:35:03 +02:00
|
|
|
}
|
2005-04-09 21:16:09 +02:00
|
|
|
;
|
|
|
|
|
2005-04-27 19:35:03 +02:00
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
expr : var
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=$1;
|
2005-11-05 18:56:22 +01:00
|
|
|
}
|
|
|
|
| expr expr
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_AND,'\0',$1,$2);
|
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
| var LEX_QMARK
|
2005-04-25 10:18:24 +02:00
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
|
|
|
|
$$=regexp_createNODE(NODE_OR,'\0',$1,epsilon);
|
2005-04-25 10:18:24 +02:00
|
|
|
}
|
|
|
|
| var LEX_PLUS
|
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_PLUS,'\0',$1,NULL);
|
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
| var LEX_STAR
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_STAR,'\0',$1,NULL);
|
|
|
|
}
|
2005-04-25 10:18:24 +02:00
|
|
|
/* () */
|
2005-11-05 18:56:22 +01:00
|
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=$2;
|
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_QMARK
|
2005-04-25 10:18:24 +02:00
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
|
|
|
|
$$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
|
2005-04-25 10:18:24 +02:00
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_PLUS
|
2005-04-25 10:18:24 +02:00
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
|
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_STAR
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
|
|
|
|
}
|
2005-04-25 10:18:24 +02:00
|
|
|
/* [] */
|
2005-04-27 19:35:03 +02:00
|
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=$2;
|
|
|
|
}
|
2005-04-27 19:35:03 +02:00
|
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_QMARK
|
2005-04-25 10:18:24 +02:00
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
|
|
|
|
$$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
|
2005-04-25 10:18:24 +02:00
|
|
|
}
|
2005-04-27 19:35:03 +02:00
|
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_PLUS
|
2005-04-25 10:18:24 +02:00
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
|
|
|
|
}
|
2005-04-27 19:35:03 +02:00
|
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_STAR
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
|
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
;
|
2005-04-09 21:16:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
var : LEX_CHAR
|
2005-04-16 22:55:51 +02:00
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
#ifdef DEBUG_RE_PARSE
|
2005-04-27 19:35:03 +02:00
|
|
|
printf("var : lecture %c\n",$1 + 'a' -1);
|
|
|
|
#endif
|
|
|
|
$$=regexp_createNODE(NODE_VAR,$1,NULL,NULL);
|
2005-11-05 18:56:22 +01:00
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
| LEX_ALL
|
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_VAR,RE_ALL_MATCH,NULL,NULL);
|
|
|
|
}
|
|
|
|
| LEX_VOWL
|
|
|
|
{
|
|
|
|
$$=regexp_createNODE(NODE_VAR,RE_VOWL_MATCH,NULL,NULL);
|
|
|
|
}
|
|
|
|
| LEX_CONS
|
2005-04-18 19:40:36 +02:00
|
|
|
{
|
2005-04-19 18:26:50 +02:00
|
|
|
$$=regexp_createNODE(NODE_VAR,RE_CONS_MATCH,NULL,NULL);
|
2005-04-18 19:40:36 +02:00
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
| LEX_USER1
|
2005-04-18 19:40:36 +02:00
|
|
|
{
|
2005-04-19 18:26:50 +02:00
|
|
|
$$=regexp_createNODE(NODE_VAR,RE_USR1_MATCH,NULL,NULL);
|
2005-04-18 19:40:36 +02:00
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
| LEX_USER2
|
2005-04-18 19:40:36 +02:00
|
|
|
{
|
2005-04-19 18:26:50 +02:00
|
|
|
$$=regexp_createNODE(NODE_VAR,RE_USR2_MATCH,NULL,NULL);
|
2005-04-18 19:40:36 +02:00
|
|
|
}
|
2005-04-16 22:55:51 +02:00
|
|
|
;
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
2005-04-27 19:35:03 +02:00
|
|
|
exprdisnode : exprdis
|
|
|
|
{
|
|
|
|
int i,j;
|
2005-05-06 01:45:04 +02:00
|
|
|
#ifdef DEBUG_RE_PARSE
|
2005-04-27 19:35:03 +02:00
|
|
|
printf("exprdisnode : exprdis : ");
|
|
|
|
#endif
|
|
|
|
for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
|
|
|
|
{
|
|
|
|
if (list->valid[i] == 0)
|
|
|
|
{
|
|
|
|
list->valid[i] = 1;
|
|
|
|
list->symbl[i] = RE_ALL_MATCH + i;
|
|
|
|
list->letters[i][0] = 0;
|
|
|
|
for(j=1; j < DIC_LETTERS; j++)
|
|
|
|
list->letters[i][j] = $1[j] ? 1 : 0;
|
2005-05-06 01:45:04 +02:00
|
|
|
#ifdef DEBUG_RE_PARSE
|
2005-04-27 19:35:03 +02:00
|
|
|
printf("list %d symbl x%02x : ",i,list->symbl[i]);
|
|
|
|
for(j=0; j < DIC_LETTERS; j++)
|
|
|
|
if (list->letters[i][j])
|
|
|
|
printf("%c",j+'a'-1);
|
|
|
|
printf("\n");
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
|
2005-11-05 18:56:22 +01:00
|
|
|
}
|
2005-04-27 19:35:03 +02:00
|
|
|
| LEX_HAT exprdis
|
|
|
|
{
|
|
|
|
int i,j;
|
2005-05-06 01:45:04 +02:00
|
|
|
#ifdef DEBUG_RE_PARSE
|
2005-04-27 19:35:03 +02:00
|
|
|
printf("exprdisnode : HAT exprdis : ");
|
|
|
|
#endif
|
|
|
|
for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
|
|
|
|
{
|
|
|
|
if (list->valid[i] == 0)
|
|
|
|
{
|
|
|
|
list->valid[i] = 1;
|
|
|
|
list->symbl[i] = RE_ALL_MATCH + i;
|
|
|
|
list->letters[i][0] = 0;
|
|
|
|
for(j=1; j < DIC_LETTERS; j++)
|
|
|
|
list->letters[i][j] = $2[j] ? 0 : 1;
|
2005-05-06 01:45:04 +02:00
|
|
|
#ifdef DEBUG_RE_PARSE
|
2005-04-27 19:35:03 +02:00
|
|
|
printf("list %d symbl x%02x : ",i,list->symbl[i]);
|
|
|
|
for(j=0; j < DIC_LETTERS; j++)
|
|
|
|
if (list->letters[i][j])
|
|
|
|
printf("%c",j+'a'-1);
|
|
|
|
printf("\n");
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
|
|
|
|
}
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
exprdis: vardis
|
|
|
|
{
|
|
|
|
memcpy($$,$1,sizeof(char)*DIC_LETTERS);
|
2005-11-05 18:56:22 +01:00
|
|
|
}
|
2005-04-27 19:35:03 +02:00
|
|
|
| vardis exprdis
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for(i=0; i < DIC_LETTERS; i++)
|
|
|
|
$$[i] = $1[i] | $2[i];
|
|
|
|
}
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vardis: LEX_CHAR
|
|
|
|
{
|
|
|
|
int c = $1;
|
|
|
|
memset($$,0,sizeof(char)*DIC_LETTERS);
|
2005-05-06 01:45:04 +02:00
|
|
|
#ifdef DEBUG_RE_PARSE
|
2005-04-27 19:35:03 +02:00
|
|
|
printf("vardis : lecture %c\n",c + 'a' -1);
|
|
|
|
#endif
|
|
|
|
$$[c] = 1;
|
|
|
|
}
|
2005-11-05 18:56:22 +01:00
|
|
|
;
|
2005-04-27 19:35:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
%%
|
2005-04-16 22:55:51 +02:00
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
void regexperror(YYLTYPE *llocp, yyscan_t yyscanner, NODE** root,
|
|
|
|
struct search_RegE_list_t *list,
|
2005-04-27 19:35:03 +02:00
|
|
|
struct regexp_error_report_t *err, char const *msg)
|
|
|
|
{
|
|
|
|
err->pos1 = llocp->first_column;
|
|
|
|
err->pos2 = llocp->last_column;
|
|
|
|
strncpy(err->msg,msg,sizeof(err->msg));
|
|
|
|
}
|
2005-05-06 01:45:04 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* shut down the compiler
|
|
|
|
*/
|
|
|
|
static int yy_init_globals (yyscan_t yyscanner )
|
|
|
|
{
|
|
|
|
yy_init_globals(yyscanner);
|
|
|
|
return 0;
|
|
|
|
}
|