eliot/dic/ery.ypp
Olivier Teulière e7a8d01a8d Merged the "cppdic" branch back into HEAD.
There are too many change to list properly, here is an overview of the main changes:
 - the dictionary is now in C++
 - the dictionary has a new format, where it is possible to specify the letters,
   their points, their frequency, ... It is backwards compatible.
 - Eliot now supports non-ASCII characters everywhere
 - i18n of the compdic, listdic, regexpmain binaries
 - i18n of the wxWidgets interface (now in english by default)
2008-01-08 13:52:32 +00:00

295 lines
7 KiB
Text

%{
/*****************************************************************************
* Eliot
* Copyright (C) 2005-2007 Antoine Fraboulet
* Authors: Antoine Fraboulet
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <malloc.h>
#include "dic.h"
#include "regexp.h"
#include "libdic_a-ery.h"
#include "libdic_a-erl.h"
/* ************************************************** */
/* ************************************************** */
/* ************************************************** */
/**
* function prototype for parser generated by bison
*/
int regexpparse(yyscan_t scanner, NODE** root,
struct search_RegE_list_t *list,
struct regexp_error_report_t *err);
/**
* function prototype for error reporting
*/
void regexperror(YYLTYPE *llocp, yyscan_t scanner, NODE** root,
struct search_RegE_list_t *list,
struct regexp_error_report_t *err,
char const *msg);
/* ************************************************** */
/* ************************************************** */
/* ************************************************** */
%}
%union {
char c;
NODE *NODE_TYPE;
char letters[DIC_LETTERS];
};
%defines
%name-prefix="regexp"
%pure-parser
%locations
%parse-param {yyscan_t yyscanner}
%parse-param {NODE **root}
%parse-param {struct search_RegE_list_t *list}
%parse-param {struct regexp_error_report_t *err}
%lex-param {yyscan_t yyscanner}
%token <c> LEX_CHAR
%token LEX_ALL
%token LEX_VOWL
%token LEX_CONS
%token LEX_USER1
%token LEX_USER2
%token LEX_L_SQBRACKET LEX_R_SQBRACKET
%token LEX_L_BRACKET LEX_R_BRACKET
%token LEX_HAT
%token LEX_QMARK
%token LEX_PLUS
%token LEX_STAR
%token LEX_SHARP
%type <NODE_TYPE> var
%type <NODE_TYPE> expr
%type <letters> vardis
%type <letters> exprdis
%type <NODE_TYPE> exprdisnode
%start start
%%
start: LEX_L_BRACKET expr LEX_R_BRACKET LEX_SHARP
{
NODE* sharp = regexp_createNODE(NODE_VAR,RE_FINAL_TOK,NULL,NULL);
*root = regexp_createNODE(NODE_AND,'\0',$2,sharp);
YYACCEPT;
}
;
expr : var
{
$$=$1;
}
| expr expr
{
$$=regexp_createNODE(NODE_AND,'\0',$1,$2);
}
| var LEX_QMARK
{
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
$$=regexp_createNODE(NODE_OR,'\0',$1,epsilon);
}
| var LEX_PLUS
{
$$=regexp_createNODE(NODE_PLUS,'\0',$1,NULL);
}
| var LEX_STAR
{
$$=regexp_createNODE(NODE_STAR,'\0',$1,NULL);
}
/* () */
| LEX_L_BRACKET expr LEX_R_BRACKET
{
$$=$2;
}
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_QMARK
{
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
$$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
}
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_PLUS
{
$$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
}
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_STAR
{
$$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
}
/* [] */
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET
{
$$=$2;
}
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_QMARK
{
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
$$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
}
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_PLUS
{
$$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
}
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_STAR
{
$$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
}
;
var : LEX_CHAR
{
#ifdef DEBUG_RE_PARSE
printf("var : lecture %c\n",$1 + 'a' -1);
#endif
$$=regexp_createNODE(NODE_VAR,$1,NULL,NULL);
}
| LEX_ALL
{
$$=regexp_createNODE(NODE_VAR,RE_ALL_MATCH,NULL,NULL);
}
| LEX_VOWL
{
$$=regexp_createNODE(NODE_VAR,RE_VOWL_MATCH,NULL,NULL);
}
| LEX_CONS
{
$$=regexp_createNODE(NODE_VAR,RE_CONS_MATCH,NULL,NULL);
}
| LEX_USER1
{
$$=regexp_createNODE(NODE_VAR,RE_USR1_MATCH,NULL,NULL);
}
| LEX_USER2
{
$$=regexp_createNODE(NODE_VAR,RE_USR2_MATCH,NULL,NULL);
}
;
exprdisnode : exprdis
{
int i,j;
#ifdef DEBUG_RE_PARSE
printf("exprdisnode : exprdis : ");
#endif
for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
{
if (list->valid[i] == 0)
{
list->valid[i] = 1;
list->symbl[i] = RE_ALL_MATCH + i;
list->letters[i][0] = 0;
for(j=1; j < DIC_LETTERS; j++)
list->letters[i][j] = $1[j] ? 1 : 0;
#ifdef DEBUG_RE_PARSE
printf("list %d symbl x%02x : ",i,list->symbl[i]);
for(j=0; j < DIC_LETTERS; j++)
if (list->letters[i][j])
printf("%c",j+'a'-1);
printf("\n");
#endif
break;
}
}
$$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
}
| LEX_HAT exprdis
{
int i,j;
#ifdef DEBUG_RE_PARSE
printf("exprdisnode : HAT exprdis : ");
#endif
for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
{
if (list->valid[i] == 0)
{
list->valid[i] = 1;
list->symbl[i] = RE_ALL_MATCH + i;
list->letters[i][0] = 0;
for(j=1; j < DIC_LETTERS; j++)
list->letters[i][j] = $2[j] ? 0 : 1;
#ifdef DEBUG_RE_PARSE
printf("list %d symbl x%02x : ",i,list->symbl[i]);
for(j=0; j < DIC_LETTERS; j++)
if (list->letters[i][j])
printf("%c",j+'a'-1);
printf("\n");
#endif
break;
}
}
$$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
}
;
exprdis: vardis
{
memcpy($$,$1,sizeof(char)*DIC_LETTERS);
}
| vardis exprdis
{
int i;
for(i=0; i < DIC_LETTERS; i++)
$$[i] = $1[i] | $2[i];
}
;
vardis: LEX_CHAR
{
int c = $1;
memset($$,0,sizeof(char)*DIC_LETTERS);
#ifdef DEBUG_RE_PARSE
printf("vardis : lecture %c\n",c + 'a' -1);
#endif
$$[c] = 1;
}
;
%%
#define UNUSED __attribute__((unused))
void regexperror(YYLTYPE *llocp, yyscan_t UNUSED yyscanner, NODE UNUSED **root,
struct search_RegE_list_t UNUSED *list,
struct regexp_error_report_t *err, char const *msg)
{
err->pos1 = llocp->first_column;
err->pos2 = llocp->last_column;
strncpy(err->msg,msg,sizeof(err->msg));
}
/*
* shut down the compiler
*/
//int yy_init_globals (yyscan_t yyscanner);