/* Eliot */ /* Copyright (C) 1999 Antoine Fraboulet */ /* */ /* This file is part of Eliot. */ /* */ /* Eliot is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* Eliot is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** * \file dic_search.c * \brief Dictionary lookup functions * \author Antoine Fraboulet * \date 2002 */ #include #include #include #include #include "dic_internals.h" #include "dic.h" #include "regexp.h" #include "dic_search.h" #include "libdic_a-er.h" /* generated by bison */ #include "scanner.h" /* generated by flex */ #include "automaton.h" /* * shut down the compiler */ static int yy_init_globals (yyscan_t yyscanner ) { yy_init_globals(yyscanner); return 0; } /** * Dic_seel_edgeptr * walk the dictionary until the end of the word * @param dic : dictionnary * @param s : current pointer to letters * @param eptr : current edge in the dawg */ static Dawg_edge* Dic_seek_edgeptr(const Dictionary dic, const char* s, Dawg_edge *eptr) { if (*s) { Dawg_edge *p = dic->dawg + eptr->ptr; do { if (p->chr == (unsigned)(*s & DIC_CHAR_MASK)) return Dic_seek_edgeptr (dic,s + 1, p); } while (!(*p++).last); return dic->dawg; } else return eptr; } /** * Dic_search_word_inner : direct application of Dic_seek_edgeptr * @param dic : dictionary * @param word : word to lookup * @result 0 not a valid word, 1 ok */ static int Dic_search_word_inner(const Dictionary dic, const char* word) { Dawg_edge *e; e = Dic_seek_edgeptr(dic, word, dic->dawg + dic->root); return e->term; } /** * Wrapper around Dic_search_word_inner, until we have multibyte support in * the dictionary */ int Dic_search_word(const Dictionary dic, const wchar_t* word) { int res; char *tmp_word = malloc(wcslen(word) + 1); sprintf(tmp_word, "%ls", word); // Do the actual work res = Dic_search_word_inner(dic, tmp_word); // Release memory free(tmp_word); return res; } /** * global variables for Dic_search_word_by_len : * * a pointer to the structure is passed as a parameter * so that all the search_* variables appear to the functions * as global but the code remains re-entrant. * Should be better to change the algorithm ... */ struct params_7plus1_t { Dictionary search_dic; int search_len; int search_wordlistlen; int search_wordlistlenmax; char search_wordtst[DIC_WORD_MAX]; char search_letters[DIC_LETTERS]; char (*search_wordlist)[RES_7PL1_MAX][DIC_WORD_MAX]; }; static void Dic_search_word_by_len(struct params_7plus1_t *params, int i, Dawg_edge *edgeptr) { /* depth first search in the dictionary */ do { /* we use a static array and not a real list so we have to stop if * the array is full */ if (params->search_wordlistlen >= params->search_wordlistlenmax) break; /* the test is false only when reach the end-node */ if (edgeptr->chr) { /* is the letter available in search_letters */ if (params->search_letters[edgeptr->chr]) { params->search_wordtst[i] = edgeptr->chr + 'A' - 1; params->search_letters[edgeptr->chr] --; if (i == params->search_len) { if ((edgeptr->term) /* && (params->search_wordlistlen < params->search_wordlistlenmax) */) strcpy((*params->search_wordlist)[params->search_wordlistlen++],params->search_wordtst); } else /* if (params->search_wordlistlen < params->search_wordlistlenmax) */ { Dic_search_word_by_len(params,i + 1, params->search_dic->dawg + edgeptr->ptr); } params->search_letters[edgeptr->chr] ++; params->search_wordtst[i] = '\0'; } /* the letter is of course available if we have a joker available */ if (params->search_letters[0]) { params->search_wordtst[i] = edgeptr->chr + 'a' - 1; params->search_letters[0] --; if (i == params->search_len) { if ((edgeptr->term) /* && (params->search_wordlistlen < params->search_wordlistlenmax) */) strcpy((*(params->search_wordlist))[params->search_wordlistlen++],params->search_wordtst); } else /* if (params->search_wordlistlen < params->search_wordlistlenmax) */ { Dic_search_word_by_len(params,i + 1,params->search_dic->dawg + edgeptr->ptr); } params->search_letters[0] ++; params->search_wordtst[i] = '\0'; } } } while (! (*edgeptr++).last); } static void Dic_search_7pl1_inner(const Dictionary dic, const char* rack, char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX], int joker) { int i,j,wordlen; const char* r = rack; struct params_7plus1_t params; Dawg_edge *root_edge; for(i=0; i < DIC_LETTERS; i++) for(j=0; j < RES_7PL1_MAX; j++) buff[i][j][0] = '\0'; for(i=0; idawg + (dic->dawg[dic->root].ptr); params.search_dic = dic; params.search_wordlistlenmax = RES_7PL1_MAX; /* search for all the words that can be done with the letters */ params.search_len = wordlen - 1; params.search_wordtst[wordlen]='\0'; params.search_wordlist = & buff[0]; params.search_wordlistlen = 0; Dic_search_word_by_len(¶ms,0,root_edge); /* search for all the words that can be done with the letters +1 */ params.search_len = wordlen; params.search_wordtst[wordlen + 1]='\0'; for(i='a'; i <= 'z'; i++) { params.search_letters[i & DIC_CHAR_MASK]++; params.search_wordlist = & buff[i & DIC_CHAR_MASK]; params.search_wordlistlen = 0; Dic_search_word_by_len(¶ms,0,root_edge); params.search_letters[i & DIC_CHAR_MASK]--; } } /** * Wrapper around Dic_search_7pl1_inner, until we have multibyte support in * the dictionary */ void Dic_search_7pl1(const Dictionary dic, const wchar_t* rack, wchar_t buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX], int joker) { int i, j, k; char tmp_buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX]; char *tmp_rack = malloc(wcslen(rack) + 1); sprintf(tmp_rack, "%ls", rack); // Do the actual work Dic_search_7pl1_inner(dic, tmp_rack, tmp_buff, joker); for (i = 0; i < DIC_LETTERS; i++) { for (j = 0; j < RES_7PL1_MAX; j++) { for (k = 0; k < DIC_WORD_MAX; k++) { buff[i][j][k] = tmp_buff[i][j][k]; } } } } /****************************************/ /****************************************/ static void Dic_search_Racc_inner(const Dictionary dic, const char* word, char wordlist[RES_RACC_MAX][DIC_WORD_MAX]) { /* search_racc will try to add a letter in front and at the end of a word */ int i,wordlistlen; Dawg_edge *edge; char wordtst[DIC_WORD_MAX]; for(i=0; i < RES_RACC_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || wordlist == NULL) return; /* let's try for the front */ wordlistlen = 0; strcpy(wordtst+1,word); for(i='a'; i <= 'z'; i++) { wordtst[0] = i; if (Dic_search_word_inner(dic,wordtst) && wordlistlen < RES_RACC_MAX) strcpy(wordlist[wordlistlen++],wordtst); } /* add a letter at the end */ for(i=0; word[i]; i++) wordtst[i] = word[i]; wordtst[i ] = '\0'; wordtst[i+1] = '\0'; edge = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root); /* points to what the next letter can be */ edge = dic->dawg + edge->ptr; if (edge != dic->dawg) { do { if (edge->term && wordlistlen < RES_RACC_MAX) { wordtst[i] = edge->chr + 'a' - 1; strcpy(wordlist[wordlistlen++],wordtst); } } while (!(*edge++).last); } } /** * Wrapper around Dic_search_Racc_inner, until we have multibyte support in * the dictionary */ void Dic_search_Racc(const Dictionary dic, const wchar_t* word, wchar_t wordlist[RES_RACC_MAX][DIC_WORD_MAX]) { int i, j; char tmp_buff[RES_RACC_MAX][DIC_WORD_MAX]; char *tmp_word = malloc(wcslen(word) + 1); sprintf(tmp_word, "%ls", word); // Do the actual work Dic_search_Racc_inner(dic, tmp_word, tmp_buff); for (i = 0; i < RES_RACC_MAX; i++) { for (j = 0; j < DIC_WORD_MAX; j++) { wordlist[i][j] = tmp_buff[i][j]; } } } /****************************************/ /****************************************/ static void Dic_search_Benj_inner(const Dictionary dic, const char* word, char wordlist[RES_BENJ_MAX][DIC_WORD_MAX]) { int i,wordlistlen; char wordtst[DIC_WORD_MAX]; Dawg_edge *edge0,*edge1,*edge2,*edgetst; for(i=0; i < RES_BENJ_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || word == NULL) return; wordlistlen = 0; strcpy(wordtst+3,word); edge0 = dic->dawg + (dic->dawg[dic->root].ptr); do { wordtst[0] = edge0->chr + 'a' - 1; edge1 = dic->dawg + edge0->ptr; do { wordtst[1] = edge1->chr + 'a' - 1; edge2 = dic->dawg + edge1->ptr; do { wordtst[2] = edge2->chr + 'a' - 1; edgetst = Dic_seek_edgeptr(dic,word,edge2); if (edgetst->term && wordlistlen < RES_BENJ_MAX) strcpy(wordlist[wordlistlen++],wordtst); } while (!(*edge2++).last); } while (!(*edge1++).last); } while (!(*edge0++).last); } /** * Wrapper around Dic_search_Benj_inner, until we have multibyte support in * the dictionary */ void Dic_search_Benj(const Dictionary dic, const wchar_t* word, wchar_t wordlist[RES_BENJ_MAX][DIC_WORD_MAX]) { int i, j; char tmp_buff[RES_BENJ_MAX][DIC_WORD_MAX]; char *tmp_word = malloc(wcslen(word) + 1); sprintf(tmp_word, "%ls", word); // Do the actual work Dic_search_Benj_inner(dic, tmp_word, tmp_buff); for (i = 0; i < RES_BENJ_MAX; i++) { for (j = 0; j < DIC_WORD_MAX; j++) { wordlist[i][j] = tmp_buff[i][j]; } } } /****************************************/ /****************************************/ struct params_cross_t { Dictionary dic; int wordlen; int wordlistlen; int wordlistlenmax; char mask[DIC_WORD_MAX]; }; void Dic_search_cross_rec(struct params_cross_t *params, char wordlist[RES_CROS_MAX][DIC_WORD_MAX], Dawg_edge *edgeptr) { Dawg_edge *current = params->dic->dawg + edgeptr->ptr; if (params->mask[params->wordlen] == '\0' && edgeptr->term) { if (params->wordlistlen < params->wordlistlenmax) strcpy(wordlist[params->wordlistlen++],params->mask); } else if (params->mask[params->wordlen] == '.') { do { params->mask[params->wordlen] = current->chr + 'a' - 1; params->wordlen ++; Dic_search_cross_rec(params,wordlist,current); params->wordlen --; params->mask[params->wordlen] = '.'; } while (!(*current++).last); } else { do { if (current->chr == (unsigned int)(params->mask[params->wordlen] & DIC_CHAR_MASK)) { params->wordlen ++; Dic_search_cross_rec(params,wordlist,current); params->wordlen --; break; } } while (!(*current++).last); } } static void Dic_search_Cros_inner(const Dictionary dic, const char* mask, char wordlist[RES_CROS_MAX][DIC_WORD_MAX]) { int i; struct params_cross_t params; for(i=0; i < RES_CROS_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || mask == NULL) return; for(i=0; i < DIC_WORD_MAX && mask[i]; i++) { if (isalpha(mask[i])) params.mask[i] = (mask[i] & DIC_CHAR_MASK) + 'A' - 1; else params.mask[i] = '.'; } params.mask[i] = '\0'; params.dic = dic; params.wordlen = 0; params.wordlistlen = 0; params.wordlistlenmax = RES_CROS_MAX; Dic_search_cross_rec(¶ms, wordlist, dic->dawg + dic->root); } /** * Wrapper around Dic_search_Cros_inner, until we have multibyte support in * the dictionary */ void Dic_search_Cros(const Dictionary dic, const wchar_t* mask, wchar_t wordlist[RES_CROS_MAX][DIC_WORD_MAX]) { int i, j; char tmp_buff[RES_CROS_MAX][DIC_WORD_MAX]; char *tmp_mask = malloc(wcslen(mask) + 1); sprintf(tmp_mask, "%ls", mask); // Do the actual work Dic_search_Cros_inner(dic, tmp_mask, tmp_buff); for (i = 0; i < RES_CROS_MAX; i++) { for (j = 0; j < DIC_WORD_MAX; j++) { wordlist[i][j] = tmp_buff[i][j]; } } } /****************************************/ /****************************************/ struct params_regexp_t { Dictionary dic; int minlength; int maxlength; automaton automaton; struct search_RegE_list_t *charlist; char word[DIC_WORD_MAX]; int wordlen; int wordlistlen; int wordlistlenmax; }; void Dic_search_regexp_rec(struct params_regexp_t *params, int state, Dawg_edge *edgeptr, char wordlist[RES_REGE_MAX][DIC_WORD_MAX]) { int next_state; Dawg_edge *current; /* if we have a valid word we store it */ if (automaton_get_accept(params->automaton,state) && edgeptr->term) { int l = strlen(params->word); if (params->wordlistlen < params->wordlistlenmax && params->minlength <= l && params->maxlength >= l) { strcpy(wordlist[params->wordlistlen++],params->word); } } /* we now drive the search by exploring the dictionary */ current = params->dic->dawg + edgeptr->ptr; do { /* the current letter is current->chr */ next_state = automaton_get_next_state(params->automaton,state,current->chr); /* 1 : the letter appears in the automaton as is */ if (next_state) { params->word[params->wordlen] = current->chr + 'a' - 1; params->wordlen ++; Dic_search_regexp_rec(params,next_state,current,wordlist); params->wordlen --; params->word[params->wordlen] = '\0'; } } while (!(*current++).last); } /** * function prototype for parser generated by bison */ int regexpparse(yyscan_t scanner, NODE** root, struct search_RegE_list_t *list, struct regexp_error_report_t *err); void Dic_search_RegE_inner(const Dictionary dic, const char* re, char wordlist[RES_REGE_MAX][DIC_WORD_MAX], struct search_RegE_list_t *list) { int i,p,n,value; int ptl[REGEXP_MAX+1]; int PS [REGEXP_MAX+1]; NODE* root; yyscan_t scanner; YY_BUFFER_STATE buf; automaton a; char stringbuf[250]; struct params_regexp_t params; struct regexp_error_report_t report; /* init */ for(i=0; i < RES_REGE_MAX; i++) wordlist[i][0] = 0; if (dic == NULL || re == NULL) return; /* (expr)# */ sprintf(stringbuf,"(%s)#",re); for(i=0; i < REGEXP_MAX; i++) { PS[i] = 0; ptl[i] = 0; } report.pos1 = 0; report.pos2 = 0; report.msg[0] = '\0'; /* parsing */ regexplex_init( &scanner ); buf = regexp_scan_string( stringbuf, scanner ); root = NULL; value = regexpparse( scanner , &root, list, &report); regexp_delete_buffer(buf,scanner); regexplex_destroy( scanner ); if (value) { #ifdef DEBUG_FLEX_IS_BROKEN fprintf(stderr,"parser error at pos %d - %d : %s\n", report.pos1, report.pos2, report.msg); #endif regexp_delete_tree(root); return ; } n = 1; p = 1; regexp_parcours(root, &p, &n, ptl); PS [0] = p - 1; ptl[0] = p - 1; regexp_possuivante(root,PS); if ((a = automaton_build(root->PP,ptl,PS,list)) != NULL) { params.dic = dic; params.minlength = list->minlength; params.maxlength = list->maxlength; params.automaton = a; params.charlist = list; memset(params.word,'\0',sizeof(params.word)); params.wordlen = 0; params.wordlistlen = 0; params.wordlistlenmax = RES_REGE_MAX; Dic_search_regexp_rec(¶ms, automaton_get_init(a), dic->dawg + dic->root, wordlist); automaton_delete(a); } regexp_delete_tree(root); } /** * Wrapper around Dic_search_RegE_inner, until we have multibyte support in * the dictionary */ void Dic_search_RegE(const Dictionary dic, const wchar_t* re, wchar_t wordlist[RES_REGE_MAX][DIC_WORD_MAX], struct search_RegE_list_t *list) { int i, j; char tmp_buff[RES_REGE_MAX][DIC_WORD_MAX]; char *tmp_re = malloc(wcslen(re) + 1); sprintf(tmp_re, "%ls", re); // Do the actual work Dic_search_RegE_inner(dic, tmp_re, tmp_buff, list); for (i = 0; i < RES_REGE_MAX; i++) { for (j = 0; j < DIC_WORD_MAX; j++) { wordlist[i][j] = tmp_buff[i][j]; } } } /****************************************/ /****************************************/