- Added several tests for the regular expressions engine

- New regexp parser using Boost.Spirit. Lex and yacc are now gone. The main advantage of this new parser, apart from being purely C++, is that it can handle wide characters. Currently, the new parser does the same as the previous one, but the code is not yet ready to use regular expressions with non-ASCII dictionaries.
2024-12-27 09:58:08 +01:00 · 2008-07-07 17:29:59 +00:00 · 2008-07-07 17:29:59 +00:00 · 11adaba410
commit 11adaba410
parent 597673e8e5
14 changed files with 717 additions and 810 deletions
--- a/configure.in
+++ b/configure.in
@ -23,17 +23,6 @@ AC_PROG_MAKE_SET
 AC_PROG_RANLIB
 PKG_PROG_PKG_CONFIG
 AC_PROG_YACC
 if test "$YACC" = yacc ; then
    AC_MSG_ERROR([Could not find the 'bison' program on your system])
 fi
 dnl Better than AC_PROG_LEX
 AM_PROG_LEX
 if test "$LEX" != "flex" ; then
    AC_MSG_ERROR([Could not find the 'flex' program on your system])
 fi
 dnl --------------------------------------------------------------
 dnl Checks for compilation flags
 dnl --------------------------------------------------------------
--- a/dic/Makefile.am
+++ b/dic/Makefile.am
@ -26,8 +26,6 @@ libdic_a_CFLAGS=
 libdic_a_YFLAGS=-d
 libdic_a_LFLAGS=
 libdic_a_SOURCES = \
 	erl.lpp \
 	ery.ypp \
 	dic_exception.cpp dic_exception.h \
 	header.cpp header.h \
 	dic_internals.h \
@ -36,44 +34,8 @@ libdic_a_SOURCES = \
 	dic_search.cpp \
 	encoding.cpp encoding.h \
 	automaton.cpp automaton.h \
-	regexp.cpp regexp.h
+	regexp.cpp regexp.h \
-
+	grammar.cpp grammar.h
 BUILT_SOURCES= 			\
 	libdic_a-erl.cpp 	\
 	libdic_a-erl.h 		\
 	libdic_a-ery.cpp 	\
 	libdic_a-ery.h
 nodist_libdic_a_SOURCES=	\
 	libdic_a-erl.cpp 	\
 	libdic_a-erl.h 		\
 	libdic_a-ery.cpp	\
 	libdic_a-ery.h
 # This hook triggers on 'make dist' (and 'make distcheck')
 # XXX: In fact, the recommended behaviour is:
 #  - list only libdic_a-ery.h in BUILT_SOURCES,
 #  - do not die with an error in configure.in if flex or bison is not found
 #  - do not have any dist-hook trigger
 # The result is that the generated files are kept in the tarball generated with make dist,
 # with still an error message for developers when the ypp or lpp file has been modified
 # and bison or flex is not found.
 # The problem is that, even though Automake is aware of the header generated by bison,
 # it seems to have problems with the one generated by flex...
 dist-hook:
 	-for file in $(BUILT_SOURCES) ; do rm -f $(distdir)/$$file ; done
 CLEANFILES= \
 	libdic_a-erl.cpp \
 	libdic_a-erl.h \
 	libdic_a-ery.cpp \
 	libdic_a-ery.h
 ## automake workaround to generate .h file 
 libdic_a-erl.h: erl.lpp
 	${LEX} ${srcdir}/erl.lpp
 #####################################
 if BUILD_DICTOOLS
--- a/dic/dic.h
+++ b/dic/dic.h
@ -100,7 +100,7 @@ public:
    /**
     * Returns the character code associated with an element,
-     * codes may range from 0 to 31. 0 is the null character.
+     * codes may range from 0 to 63. 0 is the null character.
     * @returns code for the encoded character
     */
    const dic_code_t getCode(const dic_elt_t &elt) const;
@ -114,14 +114,14 @@ public:
    /**
     * Returns a boolean to show if there is another available
     * character in the current depth (a neighbor in the tree)
-     * @returns 0 or 1 (true)
+     * @return true if the character is the last one at the current depth
     */
    bool isLast(const dic_elt_t &elt) const;
    /**
     * Returns a boolean to show if we are at the end of a word
-     * (see getNext)
+     * (see getNext())
-     * @returns 0 or 1 (true)
+     * @return true if this is the end of a word
     */
    bool isEndOfWord(const dic_elt_t &elt) const;
@ -132,7 +132,7 @@ public:
    const dic_elt_t getRoot() const;
    /**
-     * Returns the next available neighbor (see getLast)
+     * Returns the next available neighbor (see isLast())
     * @returns next dictionary element at the same depth
     */
    const dic_elt_t getNext(const dic_elt_t &elt) const;
@ -292,21 +292,12 @@ private:
    void searchWordByLen(struct params_7plus1_t *params,
                         int i, const DAWG_EDGE *edgeptr) const;
    /**
     * Internal version of searchRegExp, needed until
     * wide chars are supported by our regexp engine.
     */
    void searchRegExpInner(const string &iRegexp,
                           vector<string> &oWordList,
                           struct search_RegE_list_t *iList,
                           unsigned int iMaxResults) const;
    /// Helper for searchRegExp()
    template <typename DAWG_EDGE>
    void searchRegexpRecTempl(struct params_regexp_t *params,
                              int state,
                              const DAWG_EDGE *edgeptr,
-                              vector<string> &oWordList,
+                              vector<wstring> &oWordList,
                              unsigned int iMaxResults) const;
 };
--- a/dic/dic_search.cpp
+++ b/dic/dic_search.cpp
@ -1,7 +1,8 @@
 /*****************************************************************************
 * Eliot
- * Copyright (C) 2002-2007 Antoine Fraboulet
+ * Copyright (C) 2002-2008 Antoine Fraboulet & Olivier Teulière
 * Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
 *          Olivier Teulière  <ipkiss @@ gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@ -18,13 +19,6 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *****************************************************************************/
 /**
 *  \file   dic_search.c
 *  \brief  Dictionary lookup functions
 *  \author Antoine Fraboulet
 *  \date   2002
 */
 #include <cstdlib>
 #include <cstring>
 #include <cwchar>
@ -35,22 +29,13 @@
 #include "header.h"
 #include "encoding.h"
 #include "regexp.h"
 #include "libdic_a-ery.h"   /* generated by bison */
 #include "libdic_a-erl.h"   /* generated by flex  */
 #include "automaton.h"
 #include "grammar.h"
 static const unsigned int DEFAULT_VECT_ALLOC = 100;
 /**
 * Function prototype for bison generated parser
 */
 int regexpparse(yyscan_t scanner, NODE** root,
                struct search_RegE_list_t *iList,
                struct regexp_error_report_t *err);
 template <typename DAWG_EDGE>
 const DAWG_EDGE* Dictionary::seekEdgePtr(const wchar_t* s, const DAWG_EDGE *eptr) const
 {
@ -469,7 +454,7 @@ struct params_regexp_t
    int maxlength;
    Automaton *automaton_field;
    struct search_RegE_list_t *charlist;
-    char word[DIC_WORD_MAX];
+    wchar_t word[DIC_WORD_MAX];
    int  wordlen;
 };
@ -478,7 +463,7 @@ template <typename DAWG_EDGE>
 void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
                                      int state,
                                      const DAWG_EDGE *edgeptr,
-                                      vector<string> &oWordList,
+                                      vector<wstring> &oWordList,
                                      unsigned int iMaxResults) const
 {
    if (iMaxResults && oWordList.size() >= iMaxResults)
@ -488,7 +473,7 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
    /* if we have a valid word we store it */
    if (params->automaton_field->accept(state) && edgeptr->term)
    {
-        int l = strlen(params->word);
+        int l = wcslen(params->word);
        if (params->minlength <= l &&
            params->maxlength >= l)
        {
@ -504,98 +489,16 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
        /* 1: the letter appears in the automaton as is */
        if (next_state)
        {
-            params->word[params->wordlen] = current->chr + 'a' - 1;
+            params->word[params->wordlen] = current->chr + L'a' - 1;
            params->wordlen ++;
            searchRegexpRecTempl(params, next_state, current, oWordList, iMaxResults);
            params->wordlen --;
-            params->word[params->wordlen] = '\0';
+            params->word[params->wordlen] = L'\0';
        }
    } while (!(*current++).last);
 }
 void Dictionary::searchRegExpInner(const string &iRegexp,
                                   vector<string> &oWordList,
                                   struct search_RegE_list_t *iList,
                                   unsigned int iMaxResults) const
 {
    // Allocate room for all the results
    if (iMaxResults)
        oWordList.reserve(iMaxResults);
    else
        oWordList.reserve(DEFAULT_VECT_ALLOC);
    int ptl[REGEXP_MAX+1];
    int PS [REGEXP_MAX+1];
    /* (expr)# */
    char stringbuf[250];
    sprintf(stringbuf, "(%s)#", iRegexp.c_str());
    for (int i = 0; i < REGEXP_MAX; i++)
    {
        PS[i] = 0;
        ptl[i] = 0;
    }
    struct regexp_error_report_t report;
    report.pos1 = 0;
    report.pos2 = 0;
    report.msg[0] = '\0';
    /* parsing */
    yyscan_t scanner;
    regexplex_init( &scanner );
    YY_BUFFER_STATE buf = regexp_scan_string(stringbuf, scanner);
    NODE *root  = NULL;
    int value = regexpparse(scanner , &root, iList, &report);
    regexp_delete_buffer(buf, scanner);
    regexplex_destroy(scanner);
    if (value)
    {
 #ifdef DEBUG_FLEX_IS_BROKEN
        fprintf(stderr, "parser error at pos %d - %d: %s\n",
                report.pos1, report.pos2, report.msg);
 #endif
        regexp_delete_tree(root);
        return ;
    }
    int n = 1;
    int p = 1;
    regexp_parcours(root, &p, &n, ptl);
    PS [0] = p - 1;
    ptl[0] = p - 1;
    regexp_possuivante(root, PS);
    Automaton *a = new Automaton(root->PP, ptl, PS, iList);
    if (a)
    {
        struct params_regexp_t params;
        params.minlength      = iList->minlength;
        params.maxlength      = iList->maxlength;
        params.automaton_field = a;
        params.charlist       = iList;
        memset(params.word, '\0', sizeof(params.word));
        params.wordlen        = 0;
        if (getHeader().getVersion() == 0)
        {
            searchRegexpRecTempl(&params, a->getInitId(),
                                 getEdgeAt<DicEdgeOld>(getRoot()), oWordList, iMaxResults);
        }
        else
        {
            searchRegexpRecTempl(&params, a->getInitId(),
                                 getEdgeAt<DicEdge>(getRoot()), oWordList, iMaxResults);
        }
        delete a;
    }
    regexp_delete_tree(root);
 }
 void Dictionary::searchRegExp(const wstring &iRegexp,
                              vector<wstring> &oWordList,
                              struct search_RegE_list_t *iList,
@ -610,14 +513,65 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
    else
        oWordList.reserve(DEFAULT_VECT_ALLOC);
-    vector<string> tmpWordList;
+    int ptl[REGEXP_MAX+1];
-    // Do the actual work
+    int PS [REGEXP_MAX+1];
    searchRegExpInner(convertToMb(iRegexp), tmpWordList, iList, iMaxResults);
-    vector<string>::const_iterator it;
+    for (int i = 0; i < REGEXP_MAX; i++)
    for (it = tmpWordList.begin(); it != tmpWordList.end(); it++)
    {
-        oWordList.push_back(convertToWc(*it));
+        PS[i] = 0;
        ptl[i] = 0;
    }
    struct regexp_error_report_t report;
    report.pos1 = 0;
    report.pos2 = 0;
    report.msg[0] = '\0';
    /* parsing */
    Node *root = NULL;
    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, iList);
    if (!parsingOk)
    {
 #if 0
        fprintf(stderr, "parser error at pos %d - %d: %s\n",
                report.pos1, report.pos2, report.msg);
 #endif
        delete root;
        return;
    }
    int n = 1;
    int p = 1;
    root->traverse(p, n, ptl);
    PS [0] = p - 1;
    ptl[0] = p - 1;
    root->nextPos(PS);
    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, iList);
    if (a)
    {
        struct params_regexp_t params;
        params.minlength = iList->minlength;
        params.maxlength = iList->maxlength;
        params.automaton_field = a;
        params.charlist = iList;
        memset(params.word, L'\0', sizeof(params.word));
        params.wordlen = 0;
        if (getHeader().getVersion() == 0)
        {
            searchRegexpRecTempl(&params, a->getInitId(),
                                 getEdgeAt<DicEdgeOld>(getRoot()), oWordList, iMaxResults);
        }
        else
        {
            searchRegexpRecTempl(&params, a->getInitId(),
                                 getEdgeAt<DicEdge>(getRoot()), oWordList, iMaxResults);
        }
        delete a;
    }
    delete root;
 }
--- a/dic/erl.lpp
+++ b/dic/erl.lpp
@ -1,59 +0,0 @@
 %{
 /*****************************************************************************
 * Eliot
 * Copyright (C) 2005-2007 Antoine Fraboulet
 * Authors: Antoine Fraboulet
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *****************************************************************************/
 #include "dic.h"
 #include "regexp.h"
 #include "libdic_a-ery.h"
 #define MASK_TO_REMOVE    0x1F
 %}
 %option prefix="regexp"
 %option outfile="lex.yy.c"
 %option header-file="libdic_a-erl.h"
 %option reentrant bison-bridge
 %option bison-locations
 %option noyywrap nounput
 /* TODO : remove lexer translation */
 alphabet [a-zA-Z]
 %%
 {alphabet} {yylval_param->c=(yytext[0]&MASK_TO_REMOVE); return LEX_CHAR;}
 "["        {return LEX_L_SQBRACKET;}
 "]"        {return LEX_R_SQBRACKET;}
 "("        {return LEX_L_BRACKET;}
 ")"        {return LEX_R_BRACKET;}
 "^"        {return LEX_HAT;}
 "."        {return LEX_ALL;}
 ":v:"      {return LEX_VOWL;}
 ":c:"      {return LEX_CONS;}
 ":1:"      {return LEX_USER1;}
 ":2:"      {return LEX_USER2;}
 "?"        {return LEX_QMARK;}
 "+"        {return LEX_PLUS;}
 "*"        {return LEX_STAR;}
 "#"        {return LEX_SHARP;}
 %%
--- a/dic/ery.ypp
+++ b/dic/ery.ypp
@ -1,295 +0,0 @@
 %{
 /*****************************************************************************
 * Eliot
 * Copyright (C) 2005-2007 Antoine Fraboulet
 * Authors: Antoine Fraboulet
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *****************************************************************************/
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <malloc.h>
 #include "dic.h"
 #include "regexp.h"
 #include "libdic_a-ery.h"
 #include "libdic_a-erl.h"
 /* ************************************************** */
 /* ************************************************** */
 /* ************************************************** */
    /**
     * function prototype for parser generated by bison
     */
 int  regexpparse(yyscan_t scanner, NODE** root,
 		 struct search_RegE_list_t *list,
 		 struct regexp_error_report_t *err);
    /**
     * function prototype for error reporting
     */
 void regexperror(YYLTYPE *llocp, yyscan_t scanner, NODE** root,
 		 struct search_RegE_list_t *list,
 		 struct regexp_error_report_t *err,
 		 char const *msg);
 /* ************************************************** */
 /* ************************************************** */
 /* ************************************************** */
 %}
 %union {
  char c;
  NODE *NODE_TYPE;
  char letters[DIC_LETTERS];
 };
 %defines
 %name-prefix="regexp"
 %pure-parser
 %locations
 %parse-param {yyscan_t yyscanner}
 %parse-param {NODE **root}
 %parse-param {struct search_RegE_list_t *list}
 %parse-param {struct regexp_error_report_t *err}
 %lex-param   {yyscan_t yyscanner}
 %token  <c>  LEX_CHAR
 %token  LEX_ALL
 %token  LEX_VOWL
 %token  LEX_CONS
 %token  LEX_USER1
 %token  LEX_USER2
 %token  LEX_L_SQBRACKET LEX_R_SQBRACKET
 %token  LEX_L_BRACKET LEX_R_BRACKET
 %token  LEX_HAT
 %token  LEX_QMARK
 %token  LEX_PLUS
 %token  LEX_STAR
 %token  LEX_SHARP
 %type   <NODE_TYPE> var
 %type   <NODE_TYPE> expr
 %type   <letters>   vardis
 %type   <letters>   exprdis
 %type   <NODE_TYPE> exprdisnode
 %start  start
 %%
 start: LEX_L_BRACKET expr LEX_R_BRACKET LEX_SHARP
       {
 	 NODE* sharp = regexp_createNODE(NODE_VAR,RE_FINAL_TOK,NULL,NULL);
 	 *root = regexp_createNODE(NODE_AND,'\0',$2,sharp);
 	 YYACCEPT;
       }
     ;
 expr : var
       {
 	 $$=$1;
       }
     | expr expr
       {
 	 $$=regexp_createNODE(NODE_AND,'\0',$1,$2);
       }
     | var LEX_QMARK
       {
 	 NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
 	 $$=regexp_createNODE(NODE_OR,'\0',$1,epsilon);
       }
     | var LEX_PLUS
       {
 	 $$=regexp_createNODE(NODE_PLUS,'\0',$1,NULL);
       }
     | var LEX_STAR
       {
 	 $$=regexp_createNODE(NODE_STAR,'\0',$1,NULL);
       }
 /* () */
     | LEX_L_BRACKET expr LEX_R_BRACKET
       {
 	 $$=$2;
       }
     | LEX_L_BRACKET expr LEX_R_BRACKET LEX_QMARK
       {
 	 NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
 	 $$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
       }
     | LEX_L_BRACKET expr LEX_R_BRACKET LEX_PLUS
       {
 	 $$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
       }
     | LEX_L_BRACKET expr LEX_R_BRACKET LEX_STAR
       {
 	 $$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
       }
 /* [] */
     | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET
       {
 	 $$=$2;
       }
     | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_QMARK
       {
 	 NODE* epsilon=regexp_createNODE(NODE_VAR,RE_EPSILON,NULL,NULL);
 	 $$=regexp_createNODE(NODE_OR,'\0',$2,epsilon);
       }
     | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_PLUS
       {
 	 $$=regexp_createNODE(NODE_PLUS,'\0',$2,NULL);
       }
     | LEX_L_SQBRACKET exprdisnode LEX_R_SQBRACKET LEX_STAR
       {
 	 $$=regexp_createNODE(NODE_STAR,'\0',$2,NULL);
       }
     ;
 var : LEX_CHAR
       {
 #ifdef DEBUG_RE_PARSE
 	 printf("var : lecture %c\n",$1 + 'a' -1);
 #endif
         $$=regexp_createNODE(NODE_VAR,$1,NULL,NULL);
       }
     | LEX_ALL
       {
         $$=regexp_createNODE(NODE_VAR,RE_ALL_MATCH,NULL,NULL);
       }
     | LEX_VOWL
       {
         $$=regexp_createNODE(NODE_VAR,RE_VOWL_MATCH,NULL,NULL);
       }
     | LEX_CONS
       {
         $$=regexp_createNODE(NODE_VAR,RE_CONS_MATCH,NULL,NULL);
       }
     | LEX_USER1
       {
         $$=regexp_createNODE(NODE_VAR,RE_USR1_MATCH,NULL,NULL);
       }
     | LEX_USER2
       {
         $$=regexp_createNODE(NODE_VAR,RE_USR2_MATCH,NULL,NULL);
       }
     ;
 exprdisnode : exprdis
       {
 	 int i,j;
 #ifdef DEBUG_RE_PARSE
 	 printf("exprdisnode : exprdis : ");
 #endif
 	 for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
 	   {
 	     if (list->valid[i] == 0)
 	       {
 		 list->valid[i] = 1;
 		 list->symbl[i] = RE_ALL_MATCH + i;
 		 list->letters[i][0] = 0;
 		 for(j=1; j < DIC_LETTERS; j++)
 		   list->letters[i][j] = $1[j] ? 1 : 0;
 #ifdef DEBUG_RE_PARSE
 		 printf("list %d symbl x%02x : ",i,list->symbl[i]);
 		 for(j=0; j < DIC_LETTERS; j++)
 		   if (list->letters[i][j])
 		     printf("%c",j+'a'-1);
 		 printf("\n");
 #endif
 		 break;
 	       }
 	   }
 	 $$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
       }
     | LEX_HAT exprdis
       {
 	 int i,j;
 #ifdef DEBUG_RE_PARSE
 	 printf("exprdisnode : HAT exprdis : ");
 #endif
 	 for(i=RE_LIST_USER_END + 1; i < DIC_SEARCH_REGE_LIST; i++)
 	   {
 	     if (list->valid[i] == 0)
 	       {
 		 list->valid[i] = 1;
 		 list->symbl[i] = RE_ALL_MATCH + i;
 		 list->letters[i][0] = 0;
 		 for(j=1; j < DIC_LETTERS; j++)
 		   list->letters[i][j] = $2[j] ? 0 : 1;
 #ifdef DEBUG_RE_PARSE
 		 printf("list %d symbl x%02x : ",i,list->symbl[i]);
 		 for(j=0; j < DIC_LETTERS; j++)
 		   if (list->letters[i][j])
 		     printf("%c",j+'a'-1);
 		 printf("\n");
 #endif
 		 break;
 	       }
 	   }
 	 $$=regexp_createNODE(NODE_VAR,list->symbl[i],NULL,NULL);
       }
     ;
 exprdis: vardis
       {
 	 memcpy($$,$1,sizeof(char)*DIC_LETTERS);
       }
     | vardis exprdis
       {
 	 int i;
 	 for(i=0; i < DIC_LETTERS; i++)
 	   $$[i] = $1[i] | $2[i];
       }
     ;
 vardis: LEX_CHAR
       {
 	 int c = $1;
 	 memset($$,0,sizeof(char)*DIC_LETTERS);
 #ifdef DEBUG_RE_PARSE
 	 printf("vardis : lecture %c\n",c + 'a' -1);
 #endif
 	 $$[c] = 1;
       }
     ;
 %%
 #define UNUSED __attribute__((unused))
 void regexperror(YYLTYPE *llocp, yyscan_t UNUSED yyscanner, NODE UNUSED **root,
 		 struct search_RegE_list_t UNUSED *list,
 		 struct regexp_error_report_t *err, char const *msg)
 {
  err->pos1 = llocp->first_column;
  err->pos2 = llocp->last_column;
  strncpy(err->msg,msg,sizeof(err->msg));
 }
 /*
 * shut down the compiler
 */
 //int yy_init_globals (yyscan_t yyscanner);
--- a/dic/grammar.cpp
+++ b/dic/grammar.cpp
@ -0,0 +1,340 @@
 /*****************************************************************************
 * Eliot
 * Copyright (C) 2008 Olivier Teulière
 * Authors: Olivier Teulière <ipkiss @@ gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *****************************************************************************/
 #include <string>
 #include <stack>
 #include <boost/spirit/core.hpp>
 #include <boost/spirit/utility/chset.hpp>
 #include <boost/spirit/tree/ast.hpp>
 #ifdef DEBUG_RE
 #include <boost/spirit/tree/tree_to_xml.hpp>
 #include <map>
 #include <iostream>
 #endif
 #include "dic.h"
 #include "header.h"
 #include "regexp.h"
 using namespace boost::spirit;
 using namespace std;
 // TODO:
 // - error handling
 // A few typedefs to simplify things
 typedef const wchar_t *iterator_t;
 typedef tree_match<iterator_t> parse_tree_match_t;
 typedef parse_tree_match_t::const_tree_iterator iter_t;
 struct RegexpGrammar : grammar<RegexpGrammar>
 {
    static const int wrapperId = 0;
    static const int exprId = 1;
    static const int repeatId = 2;
    static const int groupId = 3;
    static const int varId = 4;
    static const int choiceId = 5;
    static const int alphavarId = 6;
    RegexpGrammar(const wstring &letters)
    {
        wstring lower = letters;
        std::transform(lower.begin(), lower.end(), lower.begin(), towlower);
        m_allLetters = letters + lower;
    }
    template <typename ScannerT>
    struct definition
    {
        // Constructor
        definition(const RegexpGrammar &self)
        {
            wrapper
                = expr >> L"#"
                ;
            expr
                = repeat >> *expr;
                ;
            repeat
                = group >> root_node_d[ch_p(L'?')]
                | group >> root_node_d[ch_p(L'*')]
                | group >> root_node_d[ch_p(L'+')]
                | group
                ;
            group
                = var
                | root_node_d[str_p(L"[^")] >> choice >> no_node_d[ch_p(L']')]
                | root_node_d[ch_p(L'[')] >> choice >> no_node_d[ch_p(L']')]
                | root_node_d[ch_p(L'(')] >> +repeat >> no_node_d[ch_p(L')')] // XXX: 'expr' instead of '+repeat' doesn't work. Why?
                ;
            var
                = alphavar
                | ch_p(L'.')
                | str_p(L":v:")
                | str_p(L":c:")
                | str_p(L":1:")
                | str_p(L":2:")
                ;
            choice
                = leaf_node_d[+alphavar]
                ;
            alphavar
                = chset<>(self.m_allLetters.c_str())
                ;
        }
        rule<ScannerT, parser_context<>, parser_tag<wrapperId> > wrapper;
        rule<ScannerT, parser_context<>, parser_tag<exprId> > expr;
        rule<ScannerT, parser_context<>, parser_tag<repeatId> > repeat;
        rule<ScannerT, parser_context<>, parser_tag<groupId> > group;
        rule<ScannerT, parser_context<>, parser_tag<varId> > var;
        rule<ScannerT, parser_context<>, parser_tag<choiceId> > choice;
        rule<ScannerT, parser_context<>, parser_tag<alphavarId> > alphavar;
        const rule<ScannerT, parser_context<>, parser_tag<wrapperId> > & start() const { return wrapper; }
    };
    wstring m_allLetters;
 };
 void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
              struct search_RegE_list_t *iList, bool negate = false)
 {
    if (i->value.id() == RegexpGrammar::alphavarId)
    {
        assert(i->children.size() == 0);
        // Extract the character and convert it to its internal code
        uint8_t code = iHeader.getCodeFromChar(*i->value.begin());
        Node *n = new Node(NODE_VAR, code, NULL, NULL);
        evalStack.push(n);
    }
    else if (i->value.id() == RegexpGrammar::choiceId)
    {
 #if 0
        assert(i->children.size() == 0);
        string choiceLetters(i->value.begin(), i->value.end());
        int j;
        for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; j++)
        {
            if (!iList->valid[j])
            {
                iList->valid[j] = true;
                iList->symbl[j] = RE_ALL_MATCH + j;
                iList->letters[j][0] = false;
                for (int k = 1; k < DIC_LETTERS; k++)
                {
                    bool contains = (choiceLetters.find(k + L'a' - 1) != string::npos);
                    iList->letters[j][k] = (contains ? !negate : negate);
                }
                break;
            }
        }
        Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
        evalStack.push(node);
 #endif
 #if 1
        assert(i->children.size() == 0);
        wstring choiceLetters(i->value.begin(), i->value.end());
        // Make sure the letters are in upper case
        std::transform(choiceLetters.begin(), choiceLetters.end(),
                       choiceLetters.begin(), towupper);
        // The dictionary letters are already in upper case
        const wstring &letters = iHeader.getLetters();
        wstring::const_iterator itLetter;
        int j;
        for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; ++j)
        {
            if (!iList->valid[j])
            {
                iList->valid[j] = true;
                iList->symbl[j] = RE_ALL_MATCH + j;
                iList->letters[j][0] = false;
                for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
                {
                    bool contains = (choiceLetters.find(*itLetter) != string::npos);
                    iList->letters[j][iHeader.getCodeFromChar(*itLetter)] =
                        (contains ? !negate : negate);
                }
                break;
            }
        }
        Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
        evalStack.push(node);
 #endif
    }
    else if (i->value.id() == RegexpGrammar::varId)
    {
        assert(i->children.size() == 0);
        string var(i->value.begin(), i->value.end());
        Node *node = NULL;
        if (var == ":v:")
            node = new Node(NODE_VAR, RE_VOWL_MATCH, NULL, NULL);
        else if (var == ":c:")
            node = new Node(NODE_VAR, RE_CONS_MATCH, NULL, NULL);
        else if (var == ":1:")
            node = new Node(NODE_VAR, RE_USR1_MATCH, NULL, NULL);
        else if (var == ":2:")
            node = new Node(NODE_VAR, RE_USR2_MATCH, NULL, NULL);
        else if (var == ".")
            node = new Node(NODE_VAR, RE_ALL_MATCH, NULL, NULL);
        else
            assert(0);
        evalStack.push(node);
    }
    else if (i->value.id() == RegexpGrammar::groupId)
    {
        if (*i->value.begin() == L'(')
        {
            assert(i->children.size() != 0);
            // Create a node for each child
            iter_t iter;
            for (iter = i->children.begin(); iter != i->children.end(); ++iter)
                evaluate(iHeader, iter, evalStack, iList);
            // "Concatenate" the created child nodes with AND nodes
            for (uint j = 0; j < i->children.size() - 1; ++j)
            {
                Node *old2 = evalStack.top();
                evalStack.pop();
                Node *old1 = evalStack.top();
                evalStack.pop();
                Node *node = new Node(NODE_AND, '\0', old1, old2);
                evalStack.push(node);
            }
        }
        else if (*i->value.begin() == L'[')
        {
            assert(i->children.size() == 1);
            bool hasCaret = (i->value.begin() + 1 != i->value.end());
            evaluate(iHeader, i->children.begin(), evalStack, iList, hasCaret);
        }
        else
            assert(0);
    }
    else if (i->value.id() == RegexpGrammar::repeatId)
    {
        assert(i->children.size() == 1);
        evaluate(iHeader, i->children.begin(), evalStack, iList);
        if (*i->value.begin() == L'*')
        {
            assert(i->children.size() == 1);
            Node *old = evalStack.top();
            evalStack.pop();
            Node *node = new Node(NODE_STAR, '\0', old, NULL);
            evalStack.push(node);
        }
        else if (*i->value.begin() == L'+')
        {
            assert(i->children.size() == 1);
            Node *old = evalStack.top();
            evalStack.pop();
            Node *node = new Node(NODE_PLUS, '\0', old, NULL);
            evalStack.push(node);
        }
        else if (*i->value.begin() == L'?')
        {
            assert(i->children.size() == 1);
            Node *old = evalStack.top();
            evalStack.pop();
            Node *epsilon = new Node(NODE_VAR, RE_EPSILON, NULL, NULL);
            Node *node = new Node(NODE_OR, '\0', old, epsilon);
            evalStack.push(node);
        }
        else
            assert(0);
    }
    else if (i->value.id() == RegexpGrammar::exprId)
    {
        assert(i->children.size() == 2);
        evaluate(iHeader, i->children.begin(), evalStack, iList);
        evaluate(iHeader, i->children.begin() + 1, evalStack, iList);
        Node *old2 = evalStack.top();
        evalStack.pop();
        Node *old1 = evalStack.top();
        evalStack.pop();
        Node *node = new Node(NODE_AND, '\0', old1, old2);
        evalStack.push(node);
    }
    else if (i->value.id() == RegexpGrammar::wrapperId)
    {
        assert(i->children.size() == 2);
        evaluate(iHeader, i->children.begin(), evalStack, iList);
        Node *old = evalStack.top();
        evalStack.pop();
        Node* sharp = new Node(NODE_VAR, RE_FINAL_TOK, NULL, NULL);
        Node *node = new Node(NODE_AND, '\0', old, sharp);
        evalStack.push(node);
    }
    else
    {
        assert(0);
    }
 }
 bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList)
 {
    // Create a grammar object
    RegexpGrammar g(iDic.getHeader().getLetters());
    // Parse the input and generate an Abstract Syntax Tree (AST)
    tree_parse_info<const wchar_t*> info = ast_parse(input, g);
    if (info.full)
    {
 #ifdef DEBUG_RE
        // Dump parse tree as XML
        std::map<parser_id, std::string> rule_names;
        rule_names[RegexpGrammar::wrapperId] = "wrapper";
        rule_names[RegexpGrammar::exprId] = "expr";
        rule_names[RegexpGrammar::repeatId] = "repeat";
        rule_names[RegexpGrammar::groupId] = "group";
        rule_names[RegexpGrammar::varId] = "var";
        rule_names[RegexpGrammar::choiceId] = "choice";
        rule_names[RegexpGrammar::alphavarId] = "alphavar";
        tree_to_xml(cout, info.trees);
 #endif
        stack<Node*> evalStack;
        evaluate(iDic.getHeader(), info.trees.begin(), evalStack, iList);
        assert(evalStack.size() == 1);
        *root = evalStack.top();
        return true;
    }
    else
    {
        return false;
    }
 }
--- a/dic/grammar.h
+++ b/dic/grammar.h
@ -0,0 +1,31 @@
 /*****************************************************************************
 * Eliot
 * Copyright (C) 2008 Olivier Teulière
 * Authors: Olivier Teulière <ipkiss @@ gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *****************************************************************************/
 #ifndef _GRAMMAR_H_
 #define _GRAMMAR_H_
 class Dictionary;
 class Node;
 struct search_RegE_list_t;
 bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList);
 #endif
--- a/dic/regexp.cpp
+++ b/dic/regexp.cpp
@ -39,115 +39,80 @@
 #include "regexp.h"
 #include "automaton.h"
 #ifndef PDBG
 #ifdef DEBUG_RE2
 #define PDBG(x) x
 #else
 #define PDBG(x)
 #endif
 #endif
-
+Node::Node(int type, char v, Node *fg, Node *fd)
-NODE* regexp_createNODE(int type, char v, NODE *fg, NODE *fd)
+    : m_type(type), m_var(v), m_fg(fg), m_fd(fd), m_number(0), m_position(0),
    m_annulable(false), m_PP(0), m_DP(0)
 {
    NODE *x;
    x=(NODE *)malloc(sizeof(NODE));
    x->type      = type;
    x->var       = v;
    x->fd        = fd;
    x->fg        = fg;
    x->number    = 0;
    x->position  = 0;
    x->annulable = 0;
    x->PP        = 0;
    x->DP        = 0;
    return x;
 }
-void regexp_delete_tree(NODE *root)
+Node::~Node()
 {
-    if (root == NULL)
+    delete m_fg;
-        return;
+    delete m_fd;
    regexp_delete_tree(root->fg);
    regexp_delete_tree(root->fd);
    free(root);
 }
 #ifdef DEBUG_RE
 static void print_node(FILE*, NODE *n, int detail);
 #endif
 /**
- * computes position, annulable, PP, DP attributes
+ * p is the current leaf position
- * @param r   = root
+ * n is the current node number
 * @param p   = current leaf position
 * @param n   = current node number
 * @param ptl = position to letter
 */
-
+void Node::traverse(int &p, int &n, int ptl[])
 void regexp_parcours(NODE* r, int *p, int *n, int ptl[])
 {
-    if (r == NULL)
+    if (m_fg)
-        return;
+        m_fg->traverse(p, n, ptl);
    if (m_fd)
        m_fd->traverse(p, n, ptl);
-    regexp_parcours(r->fg, p, n, ptl);
+    m_number = n;
-    regexp_parcours(r->fd, p, n, ptl);
+    ++n;
-    switch (r->type)
+    switch (m_type)
    {
        case NODE_VAR:
-            r->position = *p;
+            m_position = p;
-            ptl[*p] = r->var;
+            ptl[p] = m_var;
-            *p = *p + 1;
+            ++p;
-            r->annulable = 0;
+            m_annulable = false;
-            r->PP = 1 << (r->position - 1);
+            m_PP = 1 << (m_position - 1);
-            r->DP = 1 << (r->position - 1);
+            m_DP = 1 << (m_position - 1);
            break;
        case NODE_OR:
-            r->position = 0;
+            m_position = 0;
-            r->annulable = r->fg->annulable || r->fd->annulable;
+            m_annulable = m_fg->m_annulable || m_fd->m_annulable;
-            r->PP = r->fg->PP | r->fd->PP;
+            m_PP = m_fg->m_PP | m_fd->m_PP;
-            r->DP = r->fg->DP | r->fd->DP;
+            m_DP = m_fg->m_DP | m_fd->m_DP;
            break;
        case NODE_AND:
-            r->position = 0;
+            m_position = 0;
-            r->annulable = r->fg->annulable && r->fd->annulable;
+            m_annulable = m_fg->m_annulable && m_fd->m_annulable;
-            r->PP = (r->fg->annulable) ? (r->fg->PP | r->fd->PP) : r->fg->PP;
+            m_PP = (m_fg->m_annulable) ? (m_fg->m_PP | m_fd->m_PP) : m_fg->m_PP;
-            r->DP = (r->fd->annulable) ? (r->fg->DP | r->fd->DP) : r->fd->DP;
+            m_DP = (m_fd->m_annulable) ? (m_fg->m_DP | m_fd->m_DP) : m_fd->m_DP;
            break;
        case NODE_PLUS:
-            r->position = 0;
+            m_position = 0;
-            r->annulable = 0;
+            m_annulable = false;
-            r->PP = r->fg->PP;
+            m_PP = m_fg->m_PP;
-            r->DP = r->fg->DP;
+            m_DP = m_fg->m_DP;
            break;
        case NODE_STAR:
-            r->position = 0;
+            m_position = 0;
-            r->annulable = 1;
+            m_annulable = true;
-            r->PP = r->fg->PP;
+            m_PP = m_fg->m_PP;
-            r->DP = r->fg->DP;
+            m_DP = m_fg->m_DP;
            break;
    }
    r->number = *n;
    *n = *n + 1;
 }
 /**
 * computes possuivante
 * @param r   = root
 * @param PS  = next position
 */
-void regexp_possuivante(NODE* r, int PS[])
+void Node::nextPos(int PS[])
 {
-    if (r == NULL)
+    if (m_fg)
-        return;
+        m_fg->nextPos(PS);
    if (m_fd)
        m_fd->nextPos(PS);
-    regexp_possuivante(r->fg, PS);
+    switch (m_type)
    regexp_possuivante(r->fd, PS);
    switch (r->type)
    {
        case NODE_AND:
            /************************************/
@ -156,8 +121,8 @@ void regexp_possuivante(NODE* r, int PS[])
            /************************************/
            for (int pos = 1; pos <= PS[0]; pos++)
            {
-                if (r->fg->DP & (1 << (pos-1)))
+                if (m_fg->m_DP & (1 << (pos-1)))
-                    PS[pos] |= r->fd->PP;
+                    PS[pos] |= m_fd->m_PP;
            }
            break;
        case NODE_PLUS:
@ -168,8 +133,8 @@ void regexp_possuivante(NODE* r, int PS[])
            /************************************/
            for (int pos = 1; pos <= PS[0]; pos++)
            {
-                if (r->DP & (1 << (pos-1)))
+                if (m_DP & (1 << (pos-1)))
-                    PS[pos] |= r->PP;
+                    PS[pos] |= m_PP;
            }
            break;
        case NODE_STAR:
@ -179,32 +144,27 @@ void regexp_possuivante(NODE* r, int PS[])
            /************************************/
            for (int pos = 1; pos <= PS[0]; pos++)
            {
-                if (r->DP & (1 << (pos-1)))
+                if (m_DP & (1 << (pos-1)))
-                    PS[pos] |= r->PP;
+                    PS[pos] |= m_PP;
            }
            break;
    }
 }
-/*////////////////////////////////////////////////
+////////////////////////////////////////////////
 // DEBUG only fonctions
-////////////////////////////////////////////////*/
+////////////////////////////////////////////////
 #ifdef DEBUG_RE
-void regexp_print_PS(int PS[])
+void printPS(int PS[])
 {
-    printf("** positions suivantes **\n");
+    printf("** next positions **\n");
    for (int i = 1; i <= PS[0]; i++)
    {
        printf("%02d: 0x%08x\n", i, PS[i]);
    }
 }
 #endif
 /*////////////////////////////////////////////////
 ////////////////////////////////////////////////*/
 #ifdef DEBUG_RE
 void regexp_print_ptl(int ptl[])
 {
    printf("** pos -> lettre: ");
@ -216,8 +176,6 @@ void regexp_print_ptl(int ptl[])
 }
 #endif
 /*////////////////////////////////////////////////
 ////////////////////////////////////////////////*/
 void regexp_print_letter(FILE* f, char l)
 {
@ -239,8 +197,6 @@ void regexp_print_letter(FILE* f, char l)
    }
 }
 /*////////////////////////////////////////////////
 ////////////////////////////////////////////////*/
 void regexp_print_letter2(FILE* f, char l)
 {
@ -262,19 +218,14 @@ void regexp_print_letter2(FILE* f, char l)
    }
 }
 /*////////////////////////////////////////////////
 ////////////////////////////////////////////////*/
 #ifdef DEBUG_RE
-static void print_node(FILE* f, NODE *n, int detail)
+void Node::printNode(FILE* f, int detail) const
 {
-    if (n == NULL)
+    switch (m_type)
        return;
    switch (n->type)
    {
        case NODE_VAR:
-            regexp_print_letter(f, n->var);
+            regexp_print_letter(f, m_var);
            break;
        case NODE_OR:
            fprintf(f, "OR");
@ -292,71 +243,54 @@ static void print_node(FILE* f, NODE *n, int detail)
    if (detail == 2)
    {
        fprintf(f, "\\n pos=%d\\n annul=%d\\n PP=0x%04x\\n DP=0x%04x",
-                n->position, n->annulable, n->PP, n->DP);
+                m_position, m_annulable, m_PP, m_DP);
    }
 }
 #endif
-/*////////////////////////////////////////////////
+void Node::printNodesRec(FILE* f, int detail) const
 ////////////////////////////////////////////////*/
 #ifdef DEBUG_RE
 static void print_tree_nodes(FILE* f, NODE* n, int detail)
 {
-    if (n == NULL)
+    if (m_fg)
-        return;
+        m_fg->printNodesRec(f, detail);
    if (m_fd)
        m_fd->printNodesRec(f, detail);
-    print_tree_nodes(f, n->fg, detail);
+    fprintf(f, "%d [ label=\"", m_number);
-    print_tree_nodes(f, n->fd, detail);
+    printNode(f, detail);
    fprintf(f, "%d [ label=\"", n->number);
    print_node(f, n, detail);
    fprintf(f, "\"];\n");
 }
 #endif
-/*////////////////////////////////////////////////
+void Node::printEdgesRec(FILE *f) const
 ////////////////////////////////////////////////*/
 #ifdef DEBUG_RE
 static void print_tree_edges(FILE *f, NODE *n)
 {
-    if (n == NULL)
+    if (m_fg)
-        return;
+        m_fg->printEdgesRec(f);
    if (m_fd)
        m_fd->printEdgesRec(f);
-    print_tree_edges(f, n->fg);
+    switch (m_type)
    print_tree_edges(f, n->fd);
    switch (n->type)
    {
        case NODE_OR:
-            fprintf(f, "%d -> %d;", n->number, n->fg->number);
+            fprintf(f, "%d -> %d;", m_number, m_fg->m_number);
-            fprintf(f, "%d -> %d;", n->number, n->fd->number);
+            fprintf(f, "%d -> %d;", m_number, m_fd->m_number);
            break;
        case NODE_AND:
-            fprintf(f, "%d -> %d;", n->number, n->fg->number);
+            fprintf(f, "%d -> %d;", m_number, m_fg->m_number);
-            fprintf(f, "%d -> %d;", n->number, n->fd->number);
+            fprintf(f, "%d -> %d;", m_number, m_fd->m_number);
            break;
        case NODE_PLUS:
        case NODE_STAR:
-            fprintf(f, "%d -> %d;", n->number, n->fg->number);
+            fprintf(f, "%d -> %d;", m_number, m_fg->m_number);
            break;
    }
 }
 #endif
-/*////////////////////////////////////////////////
+void Node::printTreeDot(const string &iFileName, int detail) const
 ////////////////////////////////////////////////*/
 #ifdef DEBUG_RE
 void regexp_print_tree(NODE* n, const string &iName, int detail)
 {
-    FILE *f = fopen(iName.c_str(), "w");
+    FILE *f = fopen(iFileName.c_str(), "w");
    if (f == NULL)
        return;
-    fprintf(f, "digraph %s {\n", iName.c_str());
+    fprintf(f, "digraph %s {\n", iFileName.c_str());
-    print_tree_nodes(f, n, detail);
+    printNodesRec(f, detail);
-    print_tree_edges(f, n);
+    printEdgesRec(f);
    fprintf(f, "fontsize=20;\n");
    fprintf(f, "}\n");
    fclose(f);
@ -369,7 +303,7 @@ void regexp_print_tree(NODE* n, const string &iName, int detail)
    }
    else if (pid == 0)
    {
-        execlp("dotty", "dotty", iName.c_str(), NULL);
+        execlp("dotty", "dotty", iFileName.c_str(), NULL);
        printf("exec dotty failed\n");
        exit(1);
    }
--- a/dic/regexp.h
+++ b/dic/regexp.h
@ -28,6 +28,8 @@
 #ifndef _REGEXP_H_
 #define _REGEXP_H_
 #include <string>
 #define NODE_TOP    0
 #define NODE_VAR    1
 #define NODE_OR     2
@ -35,96 +37,31 @@
 #define NODE_STAR   4
 #define NODE_PLUS   5
 using std::string;
-typedef struct node
+class Node
 {
-    int              type;
+public:
    char             var;
    struct node      *fg;
    struct node      *fd;
    int number;
    int position;
    int annulable;
    int PP;
    int DP;
 } NODE;
    /**
     * different letters in the dictionary
     */
 #define DIC_LETTERS  27
    /**
     * maximum number of accepted terminals in regular expressions
     */
 #define REGEXP_MAX 32
    /**
     * special terminals that should not appear in the dictionary
     */
 #define RE_EPSILON     (DIC_LETTERS + 0)
 #define RE_FINAL_TOK   (DIC_LETTERS + 1)
 #define RE_ALL_MATCH   (DIC_LETTERS + 2)
 #define RE_VOWL_MATCH  (DIC_LETTERS + 3)
 #define RE_CONS_MATCH  (DIC_LETTERS + 4)
 #define RE_USR1_MATCH  (DIC_LETTERS + 5)
 #define RE_USR2_MATCH  (DIC_LETTERS + 6)
    /**
     * number of lists for regexp letter match \n
     * 0 : all tiles                           \n
     * 1 : vowels                              \n
     * 2 : consonants                          \n
     * 3 : user defined 1                      \n
     * 4 : user defined 2                      \n
     * x : lists used during parsing           \n
     */
 #define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
    /**
     * Structure used for Dic_search_RegE \n
     * this structure is used to explicit letters list that will be matched
     * against special tokens in the regular expression search
     */
 struct search_RegE_list_t {
  /** maximum length for results */
  int minlength;
  /** maximum length for results */
  int maxlength;
  /** special symbol associated with the list */
  char symbl[DIC_SEARCH_REGE_LIST];
  /** 0 or 1 if list is valid */
  int  valid[DIC_SEARCH_REGE_LIST];
  /** 0 or 1 if letter is present in the list */
  char letters[DIC_SEARCH_REGE_LIST][DIC_LETTERS];
 };
 #define RE_LIST_ALL_MATCH  0
 #define RE_LIST_VOYL_MATCH 1
 #define RE_LIST_CONS_MATCH 2
 #define RE_LIST_USER_BEGIN 3
 #define RE_LIST_USER_END   4
    /**
     * Create a node for the syntactic tree used for
-     * parsing regular expressions                    \n
+     * parsing regular expressions
     * The fonction is called by bison grammar rules
     */
-NODE* regexp_createNODE(int type,char v,NODE *fg,NODE *fd);
+    Node(int type, char v, Node *fg, Node *fd);
    /**
-     * delete regexp syntactic tree
+     * Delete regexp syntactic tree
     */
-void  regexp_delete_tree(NODE * root);
+    ~Node();
    /**
-     * Computes positions, first positions (PP), last position (DP)
+     * Computes positions, first positions (PP), last position (DP),
-     * and translation table 'position to letter' (ptl)
+     * and annulable attribute
     *
     * @param p : max position found in the tree (must be initialized to 1)
     * @param n : number of nodes in the tree (must be initialized to 1)
     * @param ptl : position to letter translation table
     */
-void  regexp_parcours(NODE* r, int *p, int *n, int ptl[]);
+    void traverse(int &p, int &n, int ptl[]);
    /**
     * Computes 'next position' table used for building the
@ -133,14 +70,106 @@ void  regexp_parcours(NODE* r, int *p, int *n, int ptl[]);
     * @param PS : next position table, PS[0] must contain the
     * number of terminals contained in the regular expression
     */
-void  regexp_possuivante(NODE* r, int PS[]);
+    void nextPos(int PS[]);
    /// Return the first position
    int getFirstPos() const { return m_PP; }
 #ifdef DEBUG_RE
    /**
     * Print the tree rooted at the current node to a file suitable
     * for dot (Graphviz)
     */
    void printTreeDot(const string &iFileName, int detail) const;
 #endif
 private:
    int m_type;
    char m_var;
    Node *m_fg;
    Node *m_fd;
    int m_number;
    int m_position;
    bool m_annulable;
    int m_PP;
    int m_DP;
 #ifdef DEBUG_RE
    /// Print the current node to file
    void printNode(FILE* f, int detail) const;
    /// Print recursively the current node and its subnodes to file
    void printNodesRec(FILE *f, int detail) const;
    /// Print recursively the edges of the tree rooted at the current node
    void printEdgesRec(FILE *f) const;
 #endif
 };
 /**
 * different letters in the dictionary
 */
 #define DIC_LETTERS 63
 /**
 * maximum number of accepted terminals in regular expressions
 */
 #define REGEXP_MAX 32
 /**
 * special terminals that should not appear in the dictionary
 */
 #define RE_EPSILON     (DIC_LETTERS + 0)
 #define RE_FINAL_TOK   (DIC_LETTERS + 1)
 #define RE_ALL_MATCH   (DIC_LETTERS + 2)
 #define RE_VOWL_MATCH  (DIC_LETTERS + 3)
 #define RE_CONS_MATCH  (DIC_LETTERS + 4)
 #define RE_USR1_MATCH  (DIC_LETTERS + 5)
 #define RE_USR2_MATCH  (DIC_LETTERS + 6)
 /**
 * number of lists for regexp letter match \n
 * 0 : all tiles                           \n
 * 1 : vowels                              \n
 * 2 : consonants                          \n
 * 3 : user defined 1                      \n
 * 4 : user defined 2                      \n
 * x : lists used during parsing           \n
 */
 #define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
 /**
 * Structure used for Dic_search_RegE \n
 * this structure is used to explicit letters list that will be matched
 * against special tokens in the regular expression search
 */
 struct search_RegE_list_t
 {
    /** maximum length for results */
    int minlength;
    /** maximum length for results */
    int maxlength;
    /** special symbol associated with the list */
    char symbl[DIC_SEARCH_REGE_LIST];
    /** 0 or 1 if list is valid */
    bool valid[DIC_SEARCH_REGE_LIST];
    /** 0 or 1 if letter is present in the list */
    bool letters[DIC_SEARCH_REGE_LIST][DIC_LETTERS];
 };
 #define RE_LIST_ALL_MATCH  0
 #define RE_LIST_VOYL_MATCH 1
 #define RE_LIST_CONS_MATCH 2
 #define RE_LIST_USER_BEGIN 3
 #define RE_LIST_USER_END   4
 #define MAX_REGEXP_ERROR_LENGTH 500
-struct regexp_error_report_t {
+struct regexp_error_report_t
-  int pos1;
+{
-  int pos2;
+    int pos1;
-  char msg[MAX_REGEXP_ERROR_LENGTH];
+    int pos2;
    char msg[MAX_REGEXP_ERROR_LENGTH];
 };
 #include <cstdio>
@ -149,7 +178,6 @@ void  regexp_print_letter(FILE* f, char l);
 void  regexp_print_letter2(FILE* f, char l);
 void  regexp_print_PS(int PS[]);
 void  regexp_print_ptl(int ptl[]);
 void  regexp_print_tree(NODE* n, char* name, int detail);
 #endif /* _REGEXP_H_ */
--- a/dic/regexpmain.cpp
+++ b/dic/regexpmain.cpp
@ -40,62 +40,40 @@
 #endif
 #include "dic.h"
 #include "header.h"
 #include "regexp.h"
 #include "encoding.h"
-#define __UNUSED__ __attribute__((unused))
+void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t *iList)
 /********************************************************/
 /********************************************************/
 /********************************************************/
 const unsigned int all_letter[DIC_LETTERS] =
 {
-    /*                      1  1 1 1 1 1 1 1 1 1 2 2 2  2  2  2  2 */
+    memset(iList, 0, sizeof(*iList));
    /* 0 1 2 3 4  5 6 7 8 9 0  1 2 3 4 5 6 7 8 9 0 1 2  3  4  5  6 */
    /* x A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z */
       0,1,1,1,1, 1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1, 1, 1, 1, 1
 };
 const unsigned int vowels[DIC_LETTERS] =
 {
    /* x A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z */
       0,1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0
 };
 const unsigned int consonants[DIC_LETTERS] =
 {
    /* x A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z */
       0,0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1
 };
 void init_letter_lists(struct search_RegE_list_t *iList)
 {
    memset (iList, 0, sizeof(*iList));
    iList->minlength = 1;
    iList->maxlength = 15;
-    iList->valid[0] = 1; // all letters
+    iList->valid[0] = true; // all letters
    iList->symbl[0] = RE_ALL_MATCH;
-    iList->valid[1] = 1; // vowels
+    iList->valid[1] = true; // vowels
    iList->symbl[1] = RE_VOWL_MATCH;
-    iList->valid[2] = 1; // consonants
+    iList->valid[2] = true; // consonants
    iList->symbl[2] = RE_CONS_MATCH;
-    for (int i = 0; i < DIC_LETTERS; i++)
+    iList->letters[0][0] = false;
    iList->letters[1][0] = false;
    iList->letters[2][0] = false;
    const wstring &allLetters = iDic.getHeader().getLetters();
    for (size_t i = 1; i <= allLetters.size(); ++i)
    {
-        iList->letters[0][i] = all_letter[i];
+        iList->letters[0][i] = true;
-        iList->letters[1][i] = vowels[i];
+        iList->letters[1][i] = iDic.getHeader().isVowel(i);
-        iList->letters[2][i] = consonants[i];
+        iList->letters[2][i] = iDic.getHeader().isConsonant(i);
    }
-    iList->valid[3] = 0; // user defined list 1
+
    iList->valid[3] = false; // user defined list 1
    iList->symbl[3] = RE_USR1_MATCH;
-    iList->valid[4] = 0; // user defined list 2
+    iList->valid[4] = false; // user defined list 2
    iList->symbl[4] = RE_USR2_MATCH;
 }
-/********************************************************/
+
 /********************************************************/
 /********************************************************/
 void usage(const char *iBinaryName)
 {
    cerr << _("usage: %s dictionary") << iBinaryName << endl;
@ -142,7 +120,7 @@ int main(int argc, char* argv[])
                break;
            /* automaton */
-            init_letter_lists(&regList);
+            init_letter_lists(dic, &regList);
            vector<wstring> wordList;
            dic.searchRegExp(convertToWc(er), wordList, &regList);
@ -163,7 +141,7 @@ int main(int argc, char* argv[])
    }
    catch (...)
    {
-        std::cerr << "Unkown exception taken" << endl;
+        std::cerr << "Unknown exception taken" << endl;
        return 1;
    }
 }
--- a/test/regexp.input
+++ b/test/regexp.input
@ -5,12 +5,18 @@ x .*a.*e.*i.*o.*u.* 50 13
 x .*a.*e.*i.*o.*u.* 50 13 13
 x .*hop.* 50
 x a.*b 50
-x [abc].*b 
+x [abc].*b
 x [abc]*b
 x [abc]*.b
 x .*(cba)*b
 x .*(cba)+b
 x .*(nn)+.*
 x .*(nn)+.*x 200
 x ne.
 x ne:v:
 x ne:v:?
 x ne:c:s
 x (ass)+..
 x c:v:+p
 q
--- a/test/regexp.ref
+++ b/test/regexp.ref
@ -343,7 +343,7 @@ acheb
 aeroclub
 aplomb
 3 printed results
-commande> x [abc].*b 
+commande> x [abc].*b
 search for [abc].*b (50,1,15)
 acheb
 aeroclub
@ -539,4 +539,52 @@ vallonneux
 vanneaux
 vicennaux
 57 printed results
 commande> x ne.
 search for ne. (50,1,15)
 nee
 nef
 nem
 neo
 nes
 net
 ney
 nez
 8 printed results
 commande> x ne:v:
 search for ne:v: (50,1,15)
 nee
 neo
 ney
 3 printed results
 commande> x ne:v:?
 search for ne:v:? (50,1,15)
 ne
 nee
 neo
 ney
 4 printed results
 commande> x ne:c:s
 search for ne:c:s (50,1,15)
 nefs
 nems
 nets
 news
 neys
 5 printed results
 commande> x (ass)+..
 search for (ass)+.. (50,1,15)
 assai
 assassin
 assec
 asses
 assez
 assis
 assit
 7 printed results
 commande> x c:v:+p
 search for c:v:+p (50,1,15)
 cap
 cep
 coup
 3 printed results
 commande> q
--- a/utils/eliottxt.cpp
+++ b/utils/eliottxt.cpp
@ -800,11 +800,11 @@ void eliot_regexp_build_default_llist(const Dictionary &iDic,
    llist.symbl[3] = RE_USR1_MATCH;
    llist.symbl[5] = RE_USR2_MATCH;
-    llist.valid[0] = 1; // all letters
+    llist.valid[0] = true; // all letters
-    llist.valid[1] = 1; // vowels
+    llist.valid[1] = true; // vowels
-    llist.valid[2] = 1; // consonants
+    llist.valid[2] = true; // consonants
-    llist.valid[3] = 0; // user defined list 1
+    llist.valid[3] = false; // user defined list 1
-    llist.valid[4] = 0; // user defined list 2
+    llist.valid[4] = false; // user defined list 2
    for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
    {