mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2025-01-17 06:11:49 +01:00
be0dea2a08
- regexp is now working (still needs some testing) - regexp syntax error recovery is not implemented
598 lines
16 KiB
C
598 lines
16 KiB
C
/* Eliot */
|
|
/* Copyright (C) 1999 antoine.fraboulet */
|
|
/* antoine.fraboulet@free.fr */
|
|
/* */
|
|
/* This program is free software; you can redistribute it and/or modify */
|
|
/* it under the terms of the GNU General Public License as published by */
|
|
/* the Free Software Foundation; either version 2 of the License, or */
|
|
/* (at your option) any later version. */
|
|
/* */
|
|
/* This program is distributed in the hope that it will be useful, */
|
|
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
|
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
|
/* GNU General Public License for more details. */
|
|
/* */
|
|
/* You should have received a copy of the GNU General Public License */
|
|
/* along with this program; if not, write to the Free Software */
|
|
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
/*
|
|
* $Id: dic_search.c,v 1.7 2005/04/19 16:26:51 afrab Exp $
|
|
*/
|
|
|
|
/**
|
|
* \file dic_search.h
|
|
* \brief Dictionary lookup functions
|
|
* \author Antoine Fraboulet
|
|
* \date 2002
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "dic_internals.h"
|
|
#include "dic.h"
|
|
#include "dic_search.h"
|
|
|
|
#include "regexp.h"
|
|
#include "libdic_a-er.h" /* generated by bison */
|
|
#include "scanner.h" /* generated by flex */
|
|
#include "automaton.h"
|
|
|
|
extern int yyparse(yyscan_t scanner, NODE ** root);
|
|
|
|
|
|
/**
|
|
* Dic_seel_edgeptr
|
|
* walk the dictionary until the end of the word
|
|
* @param dic : dictionnary
|
|
* @param s : current pointer to letters
|
|
* @param eptr : current edge in the dawg
|
|
*/
|
|
static Dawg_edge*
|
|
Dic_seek_edgeptr(const Dictionary dic, const char* s, Dawg_edge *eptr)
|
|
{
|
|
if (*s)
|
|
{
|
|
Dawg_edge *p = dic->dawg + eptr->ptr;
|
|
do {
|
|
if (p->chr == (unsigned)(*s & DIC_CHAR_MASK))
|
|
return Dic_seek_edgeptr (dic,s + 1, p);
|
|
} while (!(*p++).last);
|
|
return dic->dawg;
|
|
}
|
|
else
|
|
return eptr;
|
|
}
|
|
|
|
|
|
/**
|
|
* Dic_search_word : direct application of Dic_seek_edgeptr
|
|
* @param dic : dictionary
|
|
* @param word : word to lookup
|
|
* @result 0 not a valid word, 1 ok
|
|
*/
|
|
|
|
int
|
|
Dic_search_word(const Dictionary dic, const char* word)
|
|
{
|
|
Dawg_edge *e;
|
|
e = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root);
|
|
return e->term;
|
|
}
|
|
|
|
|
|
/**
|
|
* global variables for Dic_search_word_by_len :
|
|
*
|
|
* a pointer to the structure is passed as a parameter
|
|
* so that all the search_* variables appear to the functions
|
|
* as global but the code remains re-entrant.
|
|
* Should be better to change the algorithm ...
|
|
*/
|
|
|
|
struct params_7plus1_t {
|
|
Dictionary search_dic;
|
|
int search_len;
|
|
int search_wordlistlen;
|
|
int search_wordlistlenmax;
|
|
char search_wordtst[DIC_WORD_MAX];
|
|
char search_letters[DIC_LETTERS];
|
|
char (*search_wordlist)[RES_7PL1_MAX][DIC_WORD_MAX];
|
|
};
|
|
|
|
static void
|
|
Dic_search_word_by_len(struct params_7plus1_t *params, int i, Dawg_edge *edgeptr)
|
|
{
|
|
/* depth first search in the dictionary */
|
|
do {
|
|
/* we use a static array and not a real list so we have to stop if
|
|
* the array is full */
|
|
if (params->search_wordlistlen >= params->search_wordlistlenmax)
|
|
break;
|
|
|
|
/* the test is false only when reach the end-node */
|
|
if (edgeptr->chr)
|
|
{
|
|
|
|
/* is the letter available in search_letters */
|
|
if (params->search_letters[edgeptr->chr])
|
|
{
|
|
params->search_wordtst[i] = edgeptr->chr + 'A' - 1;
|
|
params->search_letters[edgeptr->chr] --;
|
|
if (i == params->search_len)
|
|
{
|
|
if ((edgeptr->term)
|
|
/* && (params->search_wordlistlen < params->search_wordlistlenmax) */)
|
|
strcpy((*params->search_wordlist)[params->search_wordlistlen++],params->search_wordtst);
|
|
}
|
|
else /* if (params->search_wordlistlen < params->search_wordlistlenmax) */
|
|
{
|
|
Dic_search_word_by_len(params,i + 1, params->search_dic->dawg + edgeptr->ptr);
|
|
}
|
|
params->search_letters[edgeptr->chr] ++;
|
|
params->search_wordtst[i] = '\0';
|
|
}
|
|
|
|
/* the letter is of course available if we have a joker available */
|
|
if (params->search_letters[0])
|
|
{
|
|
params->search_wordtst[i] = edgeptr->chr + 'a' - 1;
|
|
params->search_letters[0] --;
|
|
if (i == params->search_len)
|
|
{
|
|
if ((edgeptr->term)
|
|
/* && (params->search_wordlistlen < params->search_wordlistlenmax) */)
|
|
strcpy((*(params->search_wordlist))[params->search_wordlistlen++],params->search_wordtst);
|
|
}
|
|
else /* if (params->search_wordlistlen < params->search_wordlistlenmax) */
|
|
{
|
|
Dic_search_word_by_len(params,i + 1,params->search_dic->dawg + edgeptr->ptr);
|
|
}
|
|
params->search_letters[0] ++;
|
|
params->search_wordtst[i] = '\0';
|
|
}
|
|
}
|
|
} while (! (*edgeptr++).last);
|
|
}
|
|
|
|
void
|
|
Dic_search_7pl1(const Dictionary dic, const char* rack,
|
|
char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
|
|
int joker)
|
|
{
|
|
int i,j,wordlen;
|
|
const char* r = rack;
|
|
struct params_7plus1_t params;
|
|
Dawg_edge *root_edge;
|
|
|
|
for(i=0; i < DIC_LETTERS; i++)
|
|
for(j=0; j < RES_7PL1_MAX; j++)
|
|
buff[i][j][0] = '\0';
|
|
|
|
for(i=0; i<DIC_LETTERS; i++)
|
|
params.search_letters[i] = 0;
|
|
|
|
if (dic == NULL || rack == NULL)
|
|
return;
|
|
|
|
/*
|
|
* the letters are verified and changed to the dic internal
|
|
* representation (*r & DIC_CHAR_MASK)
|
|
*/
|
|
for(wordlen=0; wordlen < DIC_WORD_MAX && *r; r++)
|
|
{
|
|
if (isalpha(*r))
|
|
{
|
|
params.search_letters[(int)*r & DIC_CHAR_MASK]++;
|
|
wordlen++;
|
|
}
|
|
else if (*r == '?')
|
|
{
|
|
if (joker)
|
|
{
|
|
params.search_letters[0]++;
|
|
wordlen++;
|
|
}
|
|
else
|
|
{
|
|
strncpy(buff[0][0],"** joker **",DIC_WORD_MAX);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (wordlen < 1)
|
|
return;
|
|
|
|
root_edge = dic->dawg + (dic->dawg[dic->root].ptr);
|
|
|
|
params.search_dic = dic;
|
|
params.search_wordlistlenmax = RES_7PL1_MAX;
|
|
|
|
/* search for all the words that can be done with the letters */
|
|
params.search_len = wordlen - 1;
|
|
params.search_wordtst[wordlen]='\0';
|
|
params.search_wordlist = & buff[0];
|
|
params.search_wordlistlen = 0;
|
|
Dic_search_word_by_len(¶ms,0,root_edge);
|
|
|
|
/* search for all the words that can be done with the letters +1 */
|
|
params.search_len = wordlen;
|
|
params.search_wordtst[wordlen + 1]='\0';
|
|
for(i='a'; i <= 'z'; i++)
|
|
{
|
|
params.search_letters[i & DIC_CHAR_MASK]++;
|
|
|
|
params.search_wordlist = & buff[i & DIC_CHAR_MASK];
|
|
params.search_wordlistlen = 0;
|
|
Dic_search_word_by_len(¶ms,0,root_edge);
|
|
|
|
params.search_letters[i & DIC_CHAR_MASK]--;
|
|
}
|
|
}
|
|
|
|
/****************************************/
|
|
/****************************************/
|
|
|
|
void
|
|
Dic_search_Racc(const Dictionary dic, const char* word,
|
|
char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
|
|
{
|
|
/* search_racc will try to add a letter in front and at the end of a word */
|
|
|
|
int i,wordlistlen;
|
|
Dawg_edge *edge;
|
|
char wordtst[DIC_WORD_MAX];
|
|
|
|
for(i=0; i < RES_RACC_MAX; i++)
|
|
wordlist[i][0] = 0;
|
|
|
|
if (dic == NULL || wordlist == NULL)
|
|
return;
|
|
|
|
/* let's try for the front */
|
|
wordlistlen = 0;
|
|
strcpy(wordtst+1,word);
|
|
for(i='a'; i <= 'z'; i++)
|
|
{
|
|
wordtst[0] = i;
|
|
if (Dic_search_word(dic,wordtst) && wordlistlen < RES_RACC_MAX)
|
|
strcpy(wordlist[wordlistlen++],wordtst);
|
|
}
|
|
|
|
/* add a letter at the end */
|
|
for(i=0; word[i]; i++)
|
|
wordtst[i] = word[i];
|
|
|
|
wordtst[i ] = '\0';
|
|
wordtst[i+1] = '\0';
|
|
|
|
edge = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root);
|
|
|
|
/* points to what the next letter can be */
|
|
edge = dic->dawg + edge->ptr;
|
|
|
|
if (edge != dic->dawg)
|
|
{
|
|
do {
|
|
if (edge->term && wordlistlen < RES_RACC_MAX)
|
|
{
|
|
wordtst[i] = edge->chr + 'a' - 1;
|
|
strcpy(wordlist[wordlistlen++],wordtst);
|
|
}
|
|
} while (!(*edge++).last);
|
|
}
|
|
}
|
|
|
|
/****************************************/
|
|
/****************************************/
|
|
|
|
|
|
void
|
|
Dic_search_Benj(const Dictionary dic, const char* word,
|
|
char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
|
|
{
|
|
int i,wordlistlen;
|
|
char wordtst[DIC_WORD_MAX];
|
|
Dawg_edge *edge0,*edge1,*edge2,*edgetst;
|
|
|
|
for(i=0; i < RES_BENJ_MAX; i++)
|
|
wordlist[i][0] = 0;
|
|
|
|
if (dic == NULL || word == NULL)
|
|
return;
|
|
|
|
wordlistlen = 0;
|
|
|
|
strcpy(wordtst+3,word);
|
|
edge0 = dic->dawg + (dic->dawg[dic->root].ptr);
|
|
do {
|
|
wordtst[0] = edge0->chr + 'a' - 1;
|
|
edge1 = dic->dawg + edge0->ptr;
|
|
do {
|
|
wordtst[1] = edge1->chr + 'a' - 1;
|
|
edge2 = dic->dawg + edge1->ptr;
|
|
do {
|
|
wordtst[2] = edge2->chr + 'a' - 1;
|
|
edgetst = Dic_seek_edgeptr(dic,word,edge2);
|
|
if (edgetst->term && wordlistlen < RES_BENJ_MAX)
|
|
strcpy(wordlist[wordlistlen++],wordtst);
|
|
} while (!(*edge2++).last);
|
|
} while (!(*edge1++).last);
|
|
} while (!(*edge0++).last);
|
|
}
|
|
|
|
|
|
/****************************************/
|
|
/****************************************/
|
|
|
|
struct params_cross_t {
|
|
Dictionary dic;
|
|
int wordlen;
|
|
int wordlistlen;
|
|
int wordlistlenmax;
|
|
char mask[DIC_WORD_MAX];
|
|
};
|
|
|
|
|
|
void
|
|
Dic_search_cross_rec(struct params_cross_t *params,
|
|
char wordlist[RES_CROS_MAX][DIC_WORD_MAX],
|
|
Dawg_edge *edgeptr)
|
|
{
|
|
Dawg_edge *current = params->dic->dawg + edgeptr->ptr;
|
|
|
|
if (params->mask[params->wordlen] == '\0' && edgeptr->term)
|
|
{
|
|
if (params->wordlistlen < params->wordlistlenmax)
|
|
strcpy(wordlist[params->wordlistlen++],params->mask);
|
|
}
|
|
else if (params->mask[params->wordlen] == '.')
|
|
{
|
|
do
|
|
{
|
|
params->mask[params->wordlen] = current->chr + 'a' - 1;
|
|
params->wordlen ++;
|
|
Dic_search_cross_rec(params,wordlist,current);
|
|
params->wordlen --;
|
|
params->mask[params->wordlen] = '.';
|
|
}
|
|
while (!(*current++).last);
|
|
}
|
|
else
|
|
{
|
|
do
|
|
{
|
|
if (current->chr == (unsigned int)(params->mask[params->wordlen] & DIC_CHAR_MASK))
|
|
{
|
|
params->wordlen ++;
|
|
Dic_search_cross_rec(params,wordlist,current);
|
|
params->wordlen --;
|
|
break;
|
|
}
|
|
}
|
|
while (!(*current++).last);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void
|
|
Dic_search_Cros(const Dictionary dic, const char* mask,
|
|
char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
|
|
{
|
|
int i;
|
|
struct params_cross_t params;
|
|
|
|
for(i=0; i < RES_CROS_MAX; i++)
|
|
wordlist[i][0] = 0;
|
|
|
|
if (dic == NULL || mask == NULL)
|
|
return;
|
|
|
|
for(i=0; i < DIC_WORD_MAX && mask[i]; i++)
|
|
{
|
|
if (isalpha(mask[i]))
|
|
params.mask[i] = (mask[i] & DIC_CHAR_MASK) + 'A' - 1;
|
|
else
|
|
params.mask[i] = '.';
|
|
}
|
|
params.mask[i] = '\0';
|
|
|
|
params.dic = dic;
|
|
params.wordlen = 0;
|
|
params.wordlistlen = 0;
|
|
params.wordlistlenmax = RES_CROS_MAX;
|
|
Dic_search_cross_rec(¶ms, wordlist, dic->dawg + dic->root);
|
|
}
|
|
|
|
/****************************************/
|
|
/****************************************/
|
|
|
|
struct params_regexp_t {
|
|
Dictionary dic;
|
|
automaton automaton;
|
|
struct search_RegE_list_t *charlist;
|
|
char word[DIC_WORD_MAX];
|
|
int wordlen;
|
|
int wordlistlen;
|
|
int wordlistlenmax;
|
|
};
|
|
|
|
void
|
|
Dic_search_regexp_rec(struct params_regexp_t *params,
|
|
int state,
|
|
Dawg_edge *edgeptr,
|
|
char wordlist[RES_REGE_MAX][DIC_WORD_MAX])
|
|
{
|
|
int i;
|
|
int next_state;
|
|
int special_char;
|
|
Dawg_edge *current;
|
|
|
|
/* if we have a valid word we store it */
|
|
if (params->automaton->accept[state] && edgeptr->term)
|
|
{
|
|
if (params->wordlistlen < params->wordlistlenmax)
|
|
{
|
|
#ifdef DEBUG_RE
|
|
fprintf(stdout,"adding word -%s-\n",params->word);
|
|
#endif
|
|
strcpy(wordlist[params->wordlistlen++],params->word);
|
|
}
|
|
}
|
|
|
|
/* we now drive the search by exploring the dictionary */
|
|
current = params->dic->dawg + edgeptr->ptr;
|
|
do {
|
|
/* the current letter is current->chr */
|
|
next_state = params->automaton->Dtrans[state][current->chr];
|
|
/* 1 : the letter appears in the automaton as is */
|
|
if (params->automaton->marque[next_state])
|
|
{
|
|
#ifdef DEBUG_RE
|
|
fprintf(stderr,"adding letter %c to partial word -%s-\n",current->chr + 'a' - 1,params->word);
|
|
#endif
|
|
params->word[params->wordlen] = current->chr + 'a' - 1;
|
|
params->wordlen ++;
|
|
Dic_search_regexp_rec(params,next_state,current,wordlist);
|
|
params->wordlen --;
|
|
params->word[params->wordlen] = '\0';
|
|
}
|
|
|
|
/* 2 : we search in user defined list */
|
|
for(i=0; i < DIC_SEARCH_REGE_LIST; i++)
|
|
{
|
|
special_char = params->charlist->symbl[i];
|
|
next_state = params->automaton->Dtrans[state][special_char];
|
|
if (params->charlist->valid[i] && // list must be scanned
|
|
params->charlist->letters[i][current->chr] && // current->chr is in the list
|
|
params->automaton->marque[next_state]) // next state is valid
|
|
{
|
|
#ifdef DEBUG_RE
|
|
fprintf(stderr,"** special char ");
|
|
regexp_print_letter(stderr,special_char);
|
|
fprintf(stderr," resolves to ");
|
|
regexp_print_letter(stderr,current->chr);
|
|
fprintf(stderr,"\n");
|
|
#endif
|
|
params->word[params->wordlen] = current->chr + 'a' - 1;
|
|
params->wordlen ++;
|
|
Dic_search_regexp_rec(params,next_state,current,wordlist);
|
|
params->wordlen --;
|
|
params->word[params->wordlen] = '\0';
|
|
}
|
|
#ifdef DEBUG_RE
|
|
else
|
|
{
|
|
if (params->automaton->marque[next_state] && ! params->charlist->valid[i])
|
|
{
|
|
fprintf(stderr,"special char ");
|
|
regexp_print_letter(stderr,special_char);
|
|
fprintf(stderr," appears in automaton but the list is invalid\n");
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
} while (!(*current++).last);
|
|
}
|
|
|
|
void yyerror (yyscan_t yyscanner, NODE** root, char const *msg)
|
|
{
|
|
printf("\n erreur ! (%s)\n",msg);
|
|
}
|
|
|
|
void
|
|
Dic_search_RegE(const Dictionary dic, const char* re,
|
|
char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
|
|
struct search_RegE_list_t *list)
|
|
{
|
|
int i,p,n,value;
|
|
int ptl[REGEXP_MAX+1];
|
|
int PS [REGEXP_MAX+1];
|
|
NODE* root;
|
|
yyscan_t scanner;
|
|
YY_BUFFER_STATE buf;
|
|
automaton a;
|
|
char stringbuf[250];
|
|
struct params_regexp_t params;
|
|
|
|
/* init */
|
|
for(i=0; i < RES_REGE_MAX; i++)
|
|
wordlist[i][0] = 0;
|
|
|
|
if (dic == NULL || re == NULL)
|
|
return;
|
|
|
|
/* (expr)# */
|
|
sprintf(stringbuf,"(%s)#",re);
|
|
for(i=0; i < REGEXP_MAX; i++)
|
|
{
|
|
PS[i] = 0;
|
|
ptl[i] = 0;
|
|
}
|
|
|
|
yylex_init( &scanner );
|
|
buf = yy_scan_string( stringbuf, scanner );
|
|
root = NULL;
|
|
value = yyparse( scanner , &root);
|
|
yy_delete_buffer(buf,scanner);
|
|
yylex_destroy( scanner );
|
|
|
|
if (value)
|
|
{
|
|
regexp_delete_tree(root);
|
|
return ;
|
|
}
|
|
|
|
n = 1;
|
|
p = 1;
|
|
regexp_parcours(root, &p, &n, ptl);
|
|
PS [0] = p - 1;
|
|
ptl[0] = p - 1;
|
|
|
|
regexp_possuivante(root,PS);
|
|
|
|
if ((a = automaton_build(root->PP,ptl,PS)) == NULL)
|
|
{
|
|
regexp_delete_tree(root);
|
|
return;
|
|
}
|
|
|
|
#ifdef DEBUG_RE2
|
|
automaton_dump(a,"auto");
|
|
#endif
|
|
|
|
#ifdef DEBUG_RE
|
|
fprintf(stderr,"recherche de l'expression %s\n",stringbuf);
|
|
fprintf(stderr,"lettres (%s) :",(list->valid[0])?"valid":"non valide");
|
|
for(i=0; i<DIC_LETTERS; i++)
|
|
regexp_print_letter(stderr,list->letters[0][i]);
|
|
fprintf(stderr,"\n");
|
|
fprintf(stderr,"voyelles (%s) :",(list->valid[1])?"valid":"non valide");
|
|
for(i=0; i<DIC_LETTERS; i++)
|
|
regexp_print_letter(stderr,list->letters[1][i]);
|
|
fprintf(stderr,"\n");
|
|
fprintf(stderr,"consonnes (%s) :",(list->valid[2])?"valid":"non valide");
|
|
for(i=0; i<DIC_LETTERS; i++)
|
|
regexp_print_letter(stderr,list->letters[2][i]);
|
|
fprintf(stderr,"\n");
|
|
#endif
|
|
|
|
params.dic = dic;
|
|
params.automaton = a;
|
|
params.charlist = list;
|
|
memset(params.word,'\0',sizeof(params.word));
|
|
params.wordlen = 0;
|
|
params.wordlistlen = 0;
|
|
params.wordlistlenmax = RES_REGE_MAX;
|
|
Dic_search_regexp_rec(¶ms, a->init, dic->dawg + dic->root, wordlist);
|
|
|
|
automaton_delete(a);
|
|
regexp_delete_tree(root);
|
|
}
|
|
|
|
/****************************************/
|
|
/****************************************/
|
|
|