/* Eliot */ /* Copyright (C) 1999 antoine.fraboulet */ /* antoine.fraboulet@free.fr */ /* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * $Id: dic_search.c,v 1.7 2005/04/19 16:26:51 afrab Exp $ */ /** * \file dic_search.h * \brief Dictionary lookup functions * \author Antoine Fraboulet * \date 2002 */ #include #include #include #include "dic_internals.h" #include "dic.h" #include "dic_search.h" #include "regexp.h" #include "libdic_a-er.h" /* generated by bison */ #include "scanner.h" /* generated by flex */ #include "automaton.h" extern int yyparse(yyscan_t scanner, NODE ** root); /** * Dic_seel_edgeptr * walk the dictionary until the end of the word * @param dic : dictionnary * @param s : current pointer to letters * @param eptr : current edge in the dawg */ static Dawg_edge* Dic_seek_edgeptr(const Dictionary dic, const char* s, Dawg_edge *eptr) { if (*s) { Dawg_edge *p = dic->dawg + eptr->ptr; do { if (p->chr == (unsigned)(*s & DIC_CHAR_MASK)) return Dic_seek_edgeptr (dic,s + 1, p); } while (!(*p++).last); return dic->dawg; } else return eptr; } /** * Dic_search_word : direct application of Dic_seek_edgeptr * @param dic : dictionary * @param word : word to lookup * @result 0 not a valid word, 1 ok */ int Dic_search_word(const Dictionary dic, const char* word) { Dawg_edge *e; e = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root); return e->term; } /** * global variables for Dic_search_word_by_len : * * a pointer to the structure is passed as a parameter * so that all the search_* variables appear to the functions * as global but the code remains re-entrant. * Should be better to change the algorithm ... */ struct params_7plus1_t { Dictionary search_dic; int search_len; int search_wordlistlen; int search_wordlistlenmax; char search_wordtst[DIC_WORD_MAX]; char search_letters[DIC_LETTERS]; char (*search_wordlist)[RES_7PL1_MAX][DIC_WORD_MAX]; }; static void Dic_search_word_by_len(struct params_7plus1_t *params, int i, Dawg_edge *edgeptr) { /* depth first search in the dictionary */ do { /* we use a static array and not a real list so we have to stop if * the array is full */ if (params->search_wordlistlen >= params->search_wordlistlenmax) break; /* the test is false only when reach the end-node */ if (edgeptr->chr) { /* is the letter available in search_letters */ if (params->search_letters[edgeptr->chr]) { params->search_wordtst[i] = edgeptr->chr + 'A' - 1; params->search_letters[edgeptr->chr] --; if (i == params->search_len) { if ((edgeptr->term) /* && (params->search_wordlistlen < params->search_wordlistlenmax) */) strcpy((*params->search_wordlist)[params->search_wordlistlen++],params->search_wordtst); } else /* if (params->search_wordlistlen < params->search_wordlistlenmax) */ { Dic_search_word_by_len(params,i + 1, params->search_dic->dawg + edgeptr->ptr); } params->search_letters[edgeptr->chr] ++; params->search_wordtst[i] = '\0'; } /* the letter is of course available if we have a joker available */ if (params->search_letters[0]) { params->search_wordtst[i] = edgeptr->chr + 'a' - 1; params->search_letters[0] --; if (i == params->search_len) { if ((edgeptr->term) /* && (params->search_wordlistlen < params->search_wordlistlenmax) */) strcpy((*(params->search_wordlist))[params->search_wordlistlen++],params->search_wordtst); } else /* if (params->search_wordlistlen < params->search_wordlistlenmax) */ { Dic_search_word_by_len(params,i + 1,params->search_dic->dawg + edgeptr->ptr); } params->search_letters[0] ++; params->search_wordtst[i] = '\0'; } } } while (! (*edgeptr++).last); } void Dic_search_7pl1(const Dictionary dic, const char* rack, char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX], int joker) { int i,j,wordlen; const char* r = rack; struct params_7plus1_t params; Dawg_edge *root_edge; for(i=0; i < DIC_LETTERS; i++) for(j=0; j < RES_7PL1_MAX; j++) buff[i][j][0] = '\0'; for(i=0; idawg + (dic->dawg[dic->root].ptr); params.search_dic = dic; params.search_wordlistlenmax = RES_7PL1_MAX; /* search for all the words that can be done with the letters */ params.search_len = wordlen - 1; params.search_wordtst[wordlen]='\0'; params.search_wordlist = & buff[0]; params.search_wordlistlen = 0; Dic_search_word_by_len(¶ms,0,root_edge); /* search for all the words that can be done with the letters +1 */ params.search_len = wordlen; params.search_wordtst[wordlen + 1]='\0'; for(i='a'; i <= 'z'; i++) { params.search_letters[i & DIC_CHAR_MASK]++; params.search_wordlist = & buff[i & DIC_CHAR_MASK]; params.search_wordlistlen = 0; Dic_search_word_by_len(¶ms,0,root_edge); params.search_letters[i & DIC_CHAR_MASK]--; } } /****************************************/ /****************************************/ void Dic_search_Racc(const Dictionary dic, const char* word, char wordlist[RES_RACC_MAX][DIC_WORD_MAX]) { /* search_racc will try to add a letter in front and at the end of a word */ int i,wordlistlen; Dawg_edge *edge; char wordtst[DIC_WORD_MAX]; for(i=0; i < RES_RACC_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || wordlist == NULL) return; /* let's try for the front */ wordlistlen = 0; strcpy(wordtst+1,word); for(i='a'; i <= 'z'; i++) { wordtst[0] = i; if (Dic_search_word(dic,wordtst) && wordlistlen < RES_RACC_MAX) strcpy(wordlist[wordlistlen++],wordtst); } /* add a letter at the end */ for(i=0; word[i]; i++) wordtst[i] = word[i]; wordtst[i ] = '\0'; wordtst[i+1] = '\0'; edge = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root); /* points to what the next letter can be */ edge = dic->dawg + edge->ptr; if (edge != dic->dawg) { do { if (edge->term && wordlistlen < RES_RACC_MAX) { wordtst[i] = edge->chr + 'a' - 1; strcpy(wordlist[wordlistlen++],wordtst); } } while (!(*edge++).last); } } /****************************************/ /****************************************/ void Dic_search_Benj(const Dictionary dic, const char* word, char wordlist[RES_BENJ_MAX][DIC_WORD_MAX]) { int i,wordlistlen; char wordtst[DIC_WORD_MAX]; Dawg_edge *edge0,*edge1,*edge2,*edgetst; for(i=0; i < RES_BENJ_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || word == NULL) return; wordlistlen = 0; strcpy(wordtst+3,word); edge0 = dic->dawg + (dic->dawg[dic->root].ptr); do { wordtst[0] = edge0->chr + 'a' - 1; edge1 = dic->dawg + edge0->ptr; do { wordtst[1] = edge1->chr + 'a' - 1; edge2 = dic->dawg + edge1->ptr; do { wordtst[2] = edge2->chr + 'a' - 1; edgetst = Dic_seek_edgeptr(dic,word,edge2); if (edgetst->term && wordlistlen < RES_BENJ_MAX) strcpy(wordlist[wordlistlen++],wordtst); } while (!(*edge2++).last); } while (!(*edge1++).last); } while (!(*edge0++).last); } /****************************************/ /****************************************/ struct params_cross_t { Dictionary dic; int wordlen; int wordlistlen; int wordlistlenmax; char mask[DIC_WORD_MAX]; }; void Dic_search_cross_rec(struct params_cross_t *params, char wordlist[RES_CROS_MAX][DIC_WORD_MAX], Dawg_edge *edgeptr) { Dawg_edge *current = params->dic->dawg + edgeptr->ptr; if (params->mask[params->wordlen] == '\0' && edgeptr->term) { if (params->wordlistlen < params->wordlistlenmax) strcpy(wordlist[params->wordlistlen++],params->mask); } else if (params->mask[params->wordlen] == '.') { do { params->mask[params->wordlen] = current->chr + 'a' - 1; params->wordlen ++; Dic_search_cross_rec(params,wordlist,current); params->wordlen --; params->mask[params->wordlen] = '.'; } while (!(*current++).last); } else { do { if (current->chr == (unsigned int)(params->mask[params->wordlen] & DIC_CHAR_MASK)) { params->wordlen ++; Dic_search_cross_rec(params,wordlist,current); params->wordlen --; break; } } while (!(*current++).last); } } void Dic_search_Cros(const Dictionary dic, const char* mask, char wordlist[RES_CROS_MAX][DIC_WORD_MAX]) { int i; struct params_cross_t params; for(i=0; i < RES_CROS_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || mask == NULL) return; for(i=0; i < DIC_WORD_MAX && mask[i]; i++) { if (isalpha(mask[i])) params.mask[i] = (mask[i] & DIC_CHAR_MASK) + 'A' - 1; else params.mask[i] = '.'; } params.mask[i] = '\0'; params.dic = dic; params.wordlen = 0; params.wordlistlen = 0; params.wordlistlenmax = RES_CROS_MAX; Dic_search_cross_rec(¶ms, wordlist, dic->dawg + dic->root); } /****************************************/ /****************************************/ struct params_regexp_t { Dictionary dic; automaton automaton; struct search_RegE_list_t *charlist; char word[DIC_WORD_MAX]; int wordlen; int wordlistlen; int wordlistlenmax; }; void Dic_search_regexp_rec(struct params_regexp_t *params, int state, Dawg_edge *edgeptr, char wordlist[RES_REGE_MAX][DIC_WORD_MAX]) { int i; int next_state; int special_char; Dawg_edge *current; /* if we have a valid word we store it */ if (params->automaton->accept[state] && edgeptr->term) { if (params->wordlistlen < params->wordlistlenmax) { #ifdef DEBUG_RE fprintf(stdout,"adding word -%s-\n",params->word); #endif strcpy(wordlist[params->wordlistlen++],params->word); } } /* we now drive the search by exploring the dictionary */ current = params->dic->dawg + edgeptr->ptr; do { /* the current letter is current->chr */ next_state = params->automaton->Dtrans[state][current->chr]; /* 1 : the letter appears in the automaton as is */ if (params->automaton->marque[next_state]) { #ifdef DEBUG_RE fprintf(stderr,"adding letter %c to partial word -%s-\n",current->chr + 'a' - 1,params->word); #endif params->word[params->wordlen] = current->chr + 'a' - 1; params->wordlen ++; Dic_search_regexp_rec(params,next_state,current,wordlist); params->wordlen --; params->word[params->wordlen] = '\0'; } /* 2 : we search in user defined list */ for(i=0; i < DIC_SEARCH_REGE_LIST; i++) { special_char = params->charlist->symbl[i]; next_state = params->automaton->Dtrans[state][special_char]; if (params->charlist->valid[i] && // list must be scanned params->charlist->letters[i][current->chr] && // current->chr is in the list params->automaton->marque[next_state]) // next state is valid { #ifdef DEBUG_RE fprintf(stderr,"** special char "); regexp_print_letter(stderr,special_char); fprintf(stderr," resolves to "); regexp_print_letter(stderr,current->chr); fprintf(stderr,"\n"); #endif params->word[params->wordlen] = current->chr + 'a' - 1; params->wordlen ++; Dic_search_regexp_rec(params,next_state,current,wordlist); params->wordlen --; params->word[params->wordlen] = '\0'; } #ifdef DEBUG_RE else { if (params->automaton->marque[next_state] && ! params->charlist->valid[i]) { fprintf(stderr,"special char "); regexp_print_letter(stderr,special_char); fprintf(stderr," appears in automaton but the list is invalid\n"); } } #endif } } while (!(*current++).last); } void yyerror (yyscan_t yyscanner, NODE** root, char const *msg) { printf("\n erreur ! (%s)\n",msg); } void Dic_search_RegE(const Dictionary dic, const char* re, char wordlist[RES_REGE_MAX][DIC_WORD_MAX], struct search_RegE_list_t *list) { int i,p,n,value; int ptl[REGEXP_MAX+1]; int PS [REGEXP_MAX+1]; NODE* root; yyscan_t scanner; YY_BUFFER_STATE buf; automaton a; char stringbuf[250]; struct params_regexp_t params; /* init */ for(i=0; i < RES_REGE_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || re == NULL) return; /* (expr)# */ sprintf(stringbuf,"(%s)#",re); for(i=0; i < REGEXP_MAX; i++) { PS[i] = 0; ptl[i] = 0; } yylex_init( &scanner ); buf = yy_scan_string( stringbuf, scanner ); root = NULL; value = yyparse( scanner , &root); yy_delete_buffer(buf,scanner); yylex_destroy( scanner ); if (value) { regexp_delete_tree(root); return ; } n = 1; p = 1; regexp_parcours(root, &p, &n, ptl); PS [0] = p - 1; ptl[0] = p - 1; regexp_possuivante(root,PS); if ((a = automaton_build(root->PP,ptl,PS)) == NULL) { regexp_delete_tree(root); return; } #ifdef DEBUG_RE2 automaton_dump(a,"auto"); #endif #ifdef DEBUG_RE fprintf(stderr,"recherche de l'expression %s\n",stringbuf); fprintf(stderr,"lettres (%s) :",(list->valid[0])?"valid":"non valide"); for(i=0; iletters[0][i]); fprintf(stderr,"\n"); fprintf(stderr,"voyelles (%s) :",(list->valid[1])?"valid":"non valide"); for(i=0; iletters[1][i]); fprintf(stderr,"\n"); fprintf(stderr,"consonnes (%s) :",(list->valid[2])?"valid":"non valide"); for(i=0; iletters[2][i]); fprintf(stderr,"\n"); #endif params.dic = dic; params.automaton = a; params.charlist = list; memset(params.word,'\0',sizeof(params.word)); params.wordlen = 0; params.wordlistlen = 0; params.wordlistlenmax = RES_REGE_MAX; Dic_search_regexp_rec(¶ms, a->init, dic->dawg + dic->root, wordlist); automaton_delete(a); regexp_delete_tree(root); } /****************************************/ /****************************************/