mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2025-01-18 10:26:15 +01:00
e7a8d01a8d
There are too many change to list properly, here is an overview of the main changes: - the dictionary is now in C++ - the dictionary has a new format, where it is possible to specify the letters, their points, their frequency, ... It is backwards compatible. - Eliot now supports non-ASCII characters everywhere - i18n of the compdic, listdic, regexpmain binaries - i18n of the wxWidgets interface (now in english by default)
295 lines
7 KiB
Text
295 lines
7 KiB
Text
%{
|
|
/*****************************************************************************
|
|
* Eliot
|
|
* Copyright (C) 2005-2007 Antoine Fraboulet
|
|
* Authors: Antoine Fraboulet
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*****************************************************************************/
|
|
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <malloc.h>
|
|
|
|
#include "dic.h"
|
|
#include "regexp.h"
|
|
#include "libdic_a-ery.h"
|
|
#include "libdic_a-erl.h"
|
|
|
|
/* ************************************************** */
|
|
/* ************************************************** */
|
|
/* ************************************************** */
|
|
|
|
/**
|
|
* function prototype for parser generated by bison
|
|
*/
|
|
int regexpparse(yyscan_t scanner, NODE** root,
|
|
struct search_RegE_list_t *list,
|
|
struct regexp_error_report_t *err);
|
|
|
|
/**
|
|
* function prototype for error reporting
|
|
*/
|
|
void regexperror(YYLTYPE *llocp, yyscan_t scanner, NODE** root,
|
|
struct search_RegE_list_t *list,
|
|
struct regexp_error_report_t *err,
|
|
char const *msg);
|
|
|
|
/* ************************************************** */
|
|
/* ************************************************** */
|
|
/* ************************************************** */
|
|
|
|
%}
|
|
%union {
|
|
char c;
|
|
NODE *NODE_TYPE;
|
|
char letters[DIC_LETTERS];
|
|
};
|
|
|
|
%defines
|
|
%name-prefix="regexp"
|
|
%pure-parser
|
|
%locations
|
|
%parse-param {yyscan_t yyscanner}
|
|
%parse-param {NODE **root}
|
|
%parse-param {struct search_RegE_list_t *list}
|
|
%parse-param {struct regexp_error_report_t *err}
|
|
%lex-param {yyscan_t yyscanner}
|
|
|
|
%token <c> LEX_CHAR
|
|
%token LEX_ALL
|
|
%token LEX_VOWL
|
|
%token LEX_CONS
|
|
%token LEX_USER1
|
|
%token LEX_USER2
|
|
|
|
%token LEX_L_SQBRACKET LEX_R_SQBRACKET
|
|
%token LEX_L_BRACKET LEX_R_BRACKET
|
|
%token LEX_HAT
|
|
|
|
%token LEX_QMARK
|
|
%token LEX_PLUS
|
|
%token LEX_STAR
|
|
%token LEX_SHARP
|
|
|
|
%type <NODE_TYPE> var
|
|
%type <NODE_TYPE> expr
|
|
%type <letters> vardis
|
|
%type <letters> exprdis
|
|
%type <NODE_TYPE> exprdisnode
|
|
%start start
|
|
%%
|
|
|
|
start: LEX_L_BRACKET expr LEX_R_BRACKET LEX_SHARP
|
|
{
|
|
NODE* sharp = regexp_createNODE(NODE_VAR,RE_FINAL_TOK,NULL,NULL);
|
|
*root = regexp_createNODE(NODE_AND,'\0',$2,sharp);
|
|
YYACCEPT;
|
|
}
|
|
;
|
|
|
|
|
|
expr : var
|
|
{
|
|
$$=$1;
|
|
}
|
|
| expr expr
|
|
{
|
|
$$=regexp_createNODE(NODE_AND,'\0',$1,$2);
|
|
}
|
|
| var LEX_QMARK
|
|
{
|
|
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
|
|
$$=regexp_createNODE(NODE_OR,'\0',$1,epsilon);
|
|
}
|
|
| var LEX_PLUS
|
|
{
|
|
$$=regexp_createNODE(NODE_PLUS,'\0',$1,NULL);
|
|
}
|
|
| var LEX_STAR
|
|
{
|
|
$$=regexp_createNODE(NODE_STAR,'\0',$1,NULL);
|
|
}
|
|
/* () */
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET
|
|
{
|
|
$$=$2;
|
|
}
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_QMARK
|
|
{
|
|
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
|
|
$$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
|
|
}
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_PLUS
|
|
{
|
|
$$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
|
|
}
|
|
| LEX_L_BRACKET expr LEX_R_BRACKET LEX_STAR
|
|
{
|
|
$$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
|
|
}
|
|
/* [] */
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET
|
|
{
|
|
$$=$2;
|
|
}
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_QMARK
|
|
{
|
|
NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
|
|
$$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
|
|
}
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_PLUS
|
|
{
|
|
$$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
|
|
}
|
|
| LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_STAR
|
|
{
|
|
$$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
|
|
}
|
|
;
|
|
|
|
|
|
|
|
var : LEX_CHAR
|
|
{
|
|
#ifdef DEBUG_RE_PARSE
|
|
printf("var : lecture %c\n",$1 + 'a' -1);
|
|
#endif
|
|
$$=regexp_createNODE(NODE_VAR,$1,NULL,NULL);
|
|
}
|
|
| LEX_ALL
|
|
{
|
|
$$=regexp_createNODE(NODE_VAR,RE_ALL_MATCH,NULL,NULL);
|
|
}
|
|
| LEX_VOWL
|
|
{
|
|
$$=regexp_createNODE(NODE_VAR,RE_VOWL_MATCH,NULL,NULL);
|
|
}
|
|
| LEX_CONS
|
|
{
|
|
$$=regexp_createNODE(NODE_VAR,RE_CONS_MATCH,NULL,NULL);
|
|
}
|
|
| LEX_USER1
|
|
{
|
|
$$=regexp_createNODE(NODE_VAR,RE_USR1_MATCH,NULL,NULL);
|
|
}
|
|
| LEX_USER2
|
|
{
|
|
$$=regexp_createNODE(NODE_VAR,RE_USR2_MATCH,NULL,NULL);
|
|
}
|
|
;
|
|
|
|
|
|
exprdisnode : exprdis
|
|
{
|
|
int i,j;
|
|
#ifdef DEBUG_RE_PARSE
|
|
printf("exprdisnode : exprdis : ");
|
|
#endif
|
|
for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
|
|
{
|
|
if (list->valid[i] == 0)
|
|
{
|
|
list->valid[i] = 1;
|
|
list->symbl[i] = RE_ALL_MATCH + i;
|
|
list->letters[i][0] = 0;
|
|
for(j=1; j < DIC_LETTERS; j++)
|
|
list->letters[i][j] = $1[j] ? 1 : 0;
|
|
#ifdef DEBUG_RE_PARSE
|
|
printf("list %d symbl x%02x : ",i,list->symbl[i]);
|
|
for(j=0; j < DIC_LETTERS; j++)
|
|
if (list->letters[i][j])
|
|
printf("%c",j+'a'-1);
|
|
printf("\n");
|
|
#endif
|
|
break;
|
|
}
|
|
}
|
|
$$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
|
|
}
|
|
| LEX_HAT exprdis
|
|
{
|
|
int i,j;
|
|
#ifdef DEBUG_RE_PARSE
|
|
printf("exprdisnode : HAT exprdis : ");
|
|
#endif
|
|
for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
|
|
{
|
|
if (list->valid[i] == 0)
|
|
{
|
|
list->valid[i] = 1;
|
|
list->symbl[i] = RE_ALL_MATCH + i;
|
|
list->letters[i][0] = 0;
|
|
for(j=1; j < DIC_LETTERS; j++)
|
|
list->letters[i][j] = $2[j] ? 0 : 1;
|
|
#ifdef DEBUG_RE_PARSE
|
|
printf("list %d symbl x%02x : ",i,list->symbl[i]);
|
|
for(j=0; j < DIC_LETTERS; j++)
|
|
if (list->letters[i][j])
|
|
printf("%c",j+'a'-1);
|
|
printf("\n");
|
|
#endif
|
|
break;
|
|
}
|
|
}
|
|
$$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
|
|
}
|
|
;
|
|
|
|
|
|
exprdis: vardis
|
|
{
|
|
memcpy($$,$1,sizeof(char)*DIC_LETTERS);
|
|
}
|
|
| vardis exprdis
|
|
{
|
|
int i;
|
|
for(i=0; i < DIC_LETTERS; i++)
|
|
$$[i] = $1[i] | $2[i];
|
|
}
|
|
;
|
|
|
|
|
|
|
|
vardis: LEX_CHAR
|
|
{
|
|
int c = $1;
|
|
memset($$,0,sizeof(char)*DIC_LETTERS);
|
|
#ifdef DEBUG_RE_PARSE
|
|
printf("vardis : lecture %c\n",c + 'a' -1);
|
|
#endif
|
|
$$[c] = 1;
|
|
}
|
|
;
|
|
|
|
|
|
%%
|
|
|
|
#define UNUSED __attribute__((unused))
|
|
|
|
void regexperror(YYLTYPE *llocp, yyscan_t UNUSED yyscanner, NODE UNUSED **root,
|
|
struct search_RegE_list_t UNUSED *list,
|
|
struct regexp_error_report_t *err, char const *msg)
|
|
{
|
|
err->pos1 = llocp->first_column;
|
|
err->pos2 = llocp->last_column;
|
|
strncpy(err->msg,msg,sizeof(err->msg));
|
|
}
|
|
|
|
/*
|
|
* shut down the compiler
|
|
*/
|
|
//int yy_init_globals (yyscan_t yyscanner);
|