- Fixed a bug in the parser, preventing to parse correctly some regular expressions with wide chars. The regular expressions seem now to work fine with a dictionary using polish words.

- More C++ in the automaton code - Simplified the code in several places
2025-01-17 06:11:49 +01:00 · 2008-07-27 13:32:47 +00:00 · 2008-07-27 13:32:47 +00:00 · 90badeca21
commit 90badeca21
parent 7ea51d2f23
8 changed files with 200 additions and 328 deletions
--- a/dic/automaton.cpp
+++ b/dic/automaton.cpp
@ -54,8 +54,41 @@ using namespace std;

 #define MAX_TRANSITION_LETTERS 256

-typedef struct automaton_state_t *astate;

+/* ************************************************** *
+   Definition of the automaton state
+ * ************************************************** */
+
+static string idToString(const set<uint64_t> &iId);
+
+class State
+{
+public:
+    State(const set<uint64_t> iId) : m_id(iId) { init(); }
+    State(uint64_t iId)
+    {
+        m_id.insert(iId);
+        init();
+    }
+
+    const set<uint64_t> & getId() const { return m_id; }
+
+    // FIXME: should be private
+    bool m_accept;
+    int id_static;
+    State * m_next[MAX_TRANSITION_LETTERS];
+
+private:
+    set<uint64_t> m_id;
+
+    void init()
+    {
+        m_accept = false;
+        id_static = 0;
+        memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
+        DMSG(printf("** state %s creation\n", idToString(m_id).c_str()));
+    }
+};

 /* ************************************************** *
   Helper class, allowing to build a NFA, then a DFA
@ -64,10 +97,10 @@ typedef struct automaton_state_t *astate;
 class AutomatonHelper
 {
 public:
-    AutomatonHelper(astate iInitState);
+    AutomatonHelper(State * iInitState);
    ~AutomatonHelper();

-    astate getInitState() const { return m_initState; }
+    State * getInitState() const { return m_initState; }
 #ifdef DEBUG_AUTOMATON
    void dump(const string &iFileName) const;
 #endif
@ -77,38 +110,21 @@ public:
                                    struct search_RegE_list_t *iList);

    /// List of states
-    list<astate> m_states;
+    list<State *> m_states;

 private:
    /// Initial state of the automaton
-    astate m_initState;
+    State * m_initState;

-    void addState(astate s);
-    astate getState(const set<uint64_t> &iId) const;
+    void addState(State * s);
+    State * getState(const set<uint64_t> &iId) const;
    void printNodes(FILE* f) const;
    void printEdges(FILE* f) const;
-    void setAccept(astate s) const;
+    void setAccept(State * s) const;
    set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct search_RegE_list_t *iList) const;
 };


-/* ************************************************** *
-   State handling
- * ************************************************** */
-
-static set<uint64_t> s_state_id_create(uint64_t id);
-static string   s_state_id_to_str(const set<uint64_t> &iId);
-static astate   s_state_create   (const set<uint64_t> &iId);
-
-struct automaton_state_t
-{
-    set<uint64_t> id;
-    bool accept;
-    int      id_static;
-    astate   next[MAX_TRANSITION_LETTERS];
-};
-
-
 /* ************************************************** *
   Definition of the Automaton class
 * ************************************************** */
@ -125,7 +141,7 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_

    finalize(*dfa);
    DMSG(printf("\n final automaton OK \n\n"));
-    DMSG(automaton_dump("auto_fin"));
+    DMSG(dump("auto_fin"));

    delete nfa;
    delete dfa;
@ -157,7 +173,7 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
    }

    /* Create new id for states */
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    int i;
    for (i = 1, it = iHelper.m_states.begin();
         it != iHelper.m_states.end(); it++, i++)
@ -168,18 +184,18 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
    /* Build new automaton */
    for (it = iHelper.m_states.begin(); it != iHelper.m_states.end(); it++)
    {
-        astate s = *it;
+        State * s = *it;
        int i = s->id_static;

        if (s == iHelper.getInitState())
            m_init = i;
-        if (s->accept)
+        if (s->m_accept)
            m_acceptors[i] = true;

        for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
        {
-            if (s->next[l])
-                m_transitions[i][l] = s->next[l]->id_static;
+            if (s->m_next[l])
+                m_transitions[i][l] = s->m_next[l]->id_static;
        }
    }
 }
@ -231,49 +247,11 @@ void Automaton::dump(const string &iFileName) const
 }


-/* ************************************************** *
-   Definition of the state handling methods
- * ************************************************** */
-
-static set<uint64_t> s_state_id_create(uint64_t id)
-{
-    set<uint64_t> l;
-    l.insert(id);
-    return l;
-}
-
-
-static string s_state_id_to_str(const set<uint64_t> &iId)
-{
-    string s;
-    set<uint64_t>::const_iterator it;
-    for (it = iId.begin(); it != iId.end(); it++)
-    {
-        char tmp[50];
-        sprintf(tmp, "%llu ", *it);
-        s += tmp;
-    }
-    return s;
-}
-
-
-static astate s_state_create(const set<uint64_t> &iId)
-{
-    astate s = new automaton_state_t();
-    // TODO: use copy constructor
-    s->id     = iId;
-    s->accept = false;
-    memset(s->next, 0, sizeof(astate)*MAX_TRANSITION_LETTERS);
-    DMSG(printf("** state %s creation\n", s_state_id_to_str(iId).c_str()));
-    return s;
-}
-
-
 /* ************************************************** *
   Definition of the AutomatonHelper class
 * ************************************************** */

-AutomatonHelper::AutomatonHelper(astate iInitState)
+AutomatonHelper::AutomatonHelper(State * iInitState)
    : m_initState(iInitState)
 {
 }
@ -281,7 +259,7 @@ AutomatonHelper::AutomatonHelper(astate iInitState)

 AutomatonHelper::~AutomatonHelper()
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    for (it = m_states.begin(); it != m_states.end(); it++)
    {
        delete *it;
@ -289,22 +267,22 @@ AutomatonHelper::~AutomatonHelper()
 }


-void AutomatonHelper::addState(astate s)
+void AutomatonHelper::addState(State * s)
 {
    m_states.push_front(s);
-    DMSG(printf("** state %s added to automaton\n", s_state_id_to_str(s->id).c_str()));
+    DMSG(printf("** state %s added to automaton\n", idToString(s->getId()).c_str()));
 }


-astate AutomatonHelper::getState(const set<uint64_t> &iId) const
+State * AutomatonHelper::getState(const set<uint64_t> &iId) const
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    for (it = m_states.begin(); it != m_states.end(); it++)
    {
-        astate s = *it;
-        if (s->id == iId)
+        State * s = *it;
+        if (s->getId() == iId)
        {
-            //DMSG(printf("** get state %s ok\n", s_state_id_to_str(s->id).c_str()));
+            //DMSG(printf("** get state %s ok\n", idToString(s->getId()).c_str()));
            return s;
        }
    }
@ -318,66 +296,62 @@ astate AutomatonHelper::getState(const set<uint64_t> &iId) const
 AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint64_t *PS)
 {
    uint64_t maxpos = PS[0];
-    astate current_state;
-    char used_letter[MAX_TRANSITION_LETTERS];
+    State * current_state;
+    bool used_letter[MAX_TRANSITION_LETTERS];


    /* 1: init_state = root->PP */
-    set<uint64_t> temp_id0 = s_state_id_create(init_state_id);
-    astate temp_state = s_state_create(temp_id0);
+    State * temp_state = new State(init_state_id);
    AutomatonHelper *nfa = new AutomatonHelper(temp_state);
    nfa->addState(temp_state);
-    list<astate> L;
+    list<State *> L;
    L.push_front(temp_state);
    /* 2: while \exist state \in state_list */
    while (! L.empty())
    {
        current_state = L.front();
        L.pop_front();
-        DMSG(printf("** current state = %s\n", s_state_id_to_str(current_state->id).c_str()));
+        DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str()));
        memset(used_letter, 0, sizeof(used_letter));
        /* 3: \foreach l in \sigma | l \neq # */
        for (uint32_t p = 1; p < maxpos; p++)
        {
            int current_letter = ptl[p];
-            if (used_letter[current_letter] == 0)
+            if (used_letter[current_letter] == false)
            {
                /* 4: int set = \cup { PS(pos) | pos \in state \wedge pos == l } */
                uint64_t ens = 0;
                for (uint32_t pos = 1; pos <= maxpos; pos++)
                {
                    if (ptl[pos] == current_letter &&
-                        (unsigned int)*(current_state->id.begin()) & (1 << (pos - 1)))
+                        (unsigned int)*(current_state->getId().begin()) & (1 << (pos - 1)))
                        ens |= PS[pos];
                }
                /* 5: transition from current_state to temp_state */
                if (ens)
                {
-                    set<uint64_t> temp_id = s_state_id_create(ens);
+                    set<uint64_t> temp_id;
+                    temp_id.insert(ens);
                    temp_state = nfa->getState(temp_id);
                    if (temp_state == NULL)
                    {
-                        temp_state = s_state_create(temp_id);
+                        temp_state = new State(temp_id);
                        nfa->addState(temp_state);
-                        current_state->next[current_letter] = temp_state;
                        L.push_front(temp_state);
                    }
-                    else
-                    {
-                        current_state->next[current_letter] = temp_state;
-                    }
+                    current_state->m_next[current_letter] = temp_state;
                }
-                used_letter[current_letter] = 1;
+                used_letter[current_letter] = true;
            }
        }
    }

-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    for (it = nfa->m_states.begin(); it != nfa->m_states.end(); it++)
    {
-        astate s = *it;
-        if (*(s->id.begin()) & (1 << (maxpos - 1)))
-            s->accept = true;
+        State * s = *it;
+        if (*(s->getId().begin()) & (1 << (maxpos - 1)))
+            s->m_accept = true;
    }

    return nfa;
@ -395,24 +369,26 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
    set<uint64_t>::const_iterator it;
    for (it = S.begin(); it != S.end(); it++)                /* \forall y \in S */
    {
-        astate y, z;

-        set<uint64_t> t = s_state_id_create(*it);
-        assert(y = getState(t));
+        set<uint64_t> t;
+        t.insert(*it);
+        State *y = getState(t);
+        assert(y != NULL);

        set<uint64_t> Ry;                                        /* Ry = \empty             */

-        if ((z = y->next[letter]) != NULL)                   /* \delta (y,z) = l        */
+        State *z;
+        if ((z = y->m_next[letter]) != NULL)                   /* \delta (y,z) = l        */
        {
-            r = getSuccessor(z->id, RE_EPSILON, iList);
+            r = getSuccessor(z->getId(), RE_EPSILON, iList);
            Ry.insert(r.begin(), r.end());
-            Ry.insert(z->id.begin(), z->id.end()); /* Ry = Ry \cup succ(z)    */
+            Ry.insert(z->getId().begin(), z->getId().end()); /* Ry = Ry \cup succ(z)    */
        }

        /* \epsilon transition from start node */
-        if ((z = y->next[RE_EPSILON]) != NULL)               /* \delta (y,z) = \epsilon */
+        if ((z = y->m_next[RE_EPSILON]) != NULL)               /* \delta (y,z) = \epsilon */
        {
-            r = getSuccessor(z->id, letter, iList);
+            r = getSuccessor(z->getId(), letter, iList);
            Ry.insert(r.begin(), r.end());       /* Ry = Ry \cup succ(z)    */
        }

@ -422,26 +398,21 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
            {
                if (iList->valid[i])
                {
-                    if (iList->letters[i][letter] && (z = y->next[(int)iList->symbl[i]]) != NULL)
+                    if (iList->letters[i][letter] && (z = y->m_next[(int)iList->symbl[i]]) != NULL)
                    {
                        DMSG(printf("*** letter "));
                        DMSG(regexp_print_letter(stdout, letter));
                        DMSG(printf("is in "));
                        DMSG(regexp_print_letter(stdout, i));

-                        r = getSuccessor(z->id, RE_EPSILON, iList);
+                        r = getSuccessor(z->getId(), RE_EPSILON, iList);
                        Ry.insert(r.begin(), r.end());
-                        Ry.insert(z->id.begin(), z->id.end());
+                        Ry.insert(z->getId().begin(), z->getId().end());
                    }
                }
            }
        }

-#if 0
-        if (alist_is_empty(Ry))                              /* Ry = \empty             */
-            return Ry;
-#endif
-
        R.insert(Ry.begin(), Ry.end());                      /* R = R \cup Ry           */
    }

@ -449,19 +420,19 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
 }


-void AutomatonHelper::setAccept(astate s) const
+void AutomatonHelper::setAccept(State * s) const
 {
-    DMSG(printf("=== setting accept for node (%s) :", s_state_id_to_str(s->id).c_str()));
-    list<astate>::const_iterator it;
+    DMSG(printf("=== setting accept for node (%s) :", idToString(s->getId()).c_str()));
+    list<State *>::const_iterator it;
    for (it = m_states.begin(); it != m_states.end(); it++)
    {
-        astate ns = *it;
-        int idx = *(ns->id.begin());
-        DMSG(printf("%s ", s_state_id_to_str(ns->id).c_str()));
-        if (ns->accept && (std::find(s->id.begin(), s->id.end(), idx) != s->id.end()))
+        State * ns = *it;
+        uint64_t idx = *(ns->getId().begin());
+        DMSG(printf("%s ", idToString(ns->getId()).c_str()));
+        if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), idx) != s->getId().end()))
        {
            DMSG(printf("(ok) "));
-            s->accept = true;
+            s->m_accept = true;
        }
    }
    DMSG(printf("\n"));
@ -471,13 +442,12 @@ void AutomatonHelper::setAccept(astate s) const
 AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
                                          struct search_RegE_list_t *iList)
 {
-    astate current_state;
+    State * current_state;

-    list<astate> L;
+    list<State *> L;

    // Clone the list
-    set<uint64_t> temp_id0 = iNfa.m_initState->id;
-    astate temp_state = s_state_create(temp_id0);
+    State * temp_state = new State(iNfa.m_initState->getId());
    AutomatonHelper *dfa = new AutomatonHelper(temp_state);
    dfa->addState(temp_state);
    L.push_front(temp_state);
@ -485,40 +455,35 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
    {
        current_state = L.front();
        L.pop_front();
-        DMSG(printf("** current state = %s\n", s_state_id_to_str(current_state->id).c_str()));
+        DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str()));
        for (int letter = 1; letter < DIC_LETTERS; letter++)
        {
-            // DMSG(printf("*** start successor of %s\n", s_state_id_to_str(current_state->id).c_str()));
+            // DMSG(printf("*** start successor of %s\n", idToString(current_state->getId()).c_str()));

-            set<uint64_t> temp_id = iNfa.getSuccessor(current_state->id, letter, iList);
+            set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), letter, iList);

            if (! temp_id.empty())
            {
-
-                DMSG(printf("*** successor of %s for ", s_state_id_to_str(current_state->id).c_str()));
+                DMSG(printf("*** successor of %s for ", idToString(current_state->getId()).c_str()));
                DMSG(regexp_print_letter(stdout, letter));
-                DMSG(printf(" = %s\n", s_state_id_to_str(temp_id).c_str()));
+                DMSG(printf(" = %s\n", idToString(temp_id).c_str()));

                temp_state = dfa->getState(temp_id);

-                // DMSG(printf("*** automaton get state -%s- ok\n", s_state_id_to_str(temp_id).c_str()));
+                // DMSG(printf("*** automaton get state -%s- ok\n", idToString(temp_id).c_str()));

                if (temp_state == NULL)
                {
-                    temp_state = s_state_create(temp_id);
+                    temp_state = new State(temp_id);
                    dfa->addState(temp_state);
-                    current_state->next[letter] = temp_state;
                    L.push_front(temp_state);
                }
-                else
-                {
-                    current_state->next[letter] = temp_state;
-                }
+                current_state->m_next[letter] = temp_state;
            }
        }
    }

-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    for (it = dfa->m_states.begin(); it != dfa->m_states.end(); it++)
    {
        iNfa.setAccept(*it);
@ -531,19 +496,33 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
 * ************************************************** *
 * ************************************************** */

+static string idToString(const set<uint64_t> &iId)
+{
+    string s;
+    set<uint64_t>::const_iterator it;
+    for (it = iId.begin(); it != iId.end(); it++)
+    {
+        char tmp[50];
+        sprintf(tmp, "%llu ", *it);
+        s += tmp;
+    }
+    return s;
+}
+
+
 void AutomatonHelper::printNodes(FILE* f) const
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    for (it = m_states.begin(); it != m_states.end(); it++)
    {
-        astate s = *it;
-        string sid = s_state_id_to_str(s->id);
+        State * s = *it;
+        string sid = idToString(s->getId());
        fprintf(f, "\t\"%s\" [label = \"%s\"", sid.c_str(), sid.c_str());
        if (s == m_initState)
        {
            fprintf(f, ", style = filled, color=lightgrey");
        }
-        if (s->accept)
+        if (s->m_accept)
        {
            fprintf(f, ", shape = doublecircle");
        }
@ -555,18 +534,16 @@ void AutomatonHelper::printNodes(FILE* f) const

 void AutomatonHelper::printEdges(FILE* f) const
 {
-    list<astate>::const_iterator it;
+    list<State *>::const_iterator it;
    for (it = m_states.begin(); it != m_states.end(); it++)
    {
-        astate s = *it;
+        State * s = *it;
        for (int letter = 0; letter < 255; letter++)
        {
-            if (s->next[letter])
+            if (s->m_next[letter])
            {
-                string sid = s_state_id_to_str(s->id);
-                fprintf(f, "\t\"%s\" -> ", sid.c_str());
-                sid = s_state_id_to_str(s->next[letter]->id);
-                fprintf(f, "\"%s\" [label = \"", sid.c_str());
+                fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
+                fprintf(f, "\"%s\" [label = \"", idToString(s->m_next[letter]->getId()).c_str());
                regexp_print_letter(f, letter);
                fprintf(f, "\"];\n");
            }
--- a/dic/dic.cpp
+++ b/dic/dic.cpp
@ -161,7 +161,7 @@ bool Dictionary::validateLetters(const wstring &iLetters,
 }


-const dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
+dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
 {
     if (!isLast(e))
         return e + 1;
@ -169,7 +169,7 @@ const dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
 }


-const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
+dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
 {
    if (m_header->getVersion() == 0)
        return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
@ -178,13 +178,13 @@ const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
 }


-const dic_elt_t Dictionary::getRoot() const
+dic_elt_t Dictionary::getRoot() const
 {
    return m_header->getRoot();
 }


-const dic_code_t Dictionary::getCode(const dic_elt_t &e) const
+dic_code_t Dictionary::getCode(const dic_elt_t &e) const
 {
    if (m_header->getVersion() == 0)
        return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;
--- a/dic/dic.h
+++ b/dic/dic.h
@ -103,7 +103,7 @@ public:
     * codes may range from 0 to 63. 0 is the null character.
     * @returns code for the encoded character
     */
-    const dic_code_t getCode(const dic_elt_t &elt) const;
+    dic_code_t getCode(const dic_elt_t &elt) const;

    /**
     * Returns the wide character associated with an element.
@ -129,13 +129,13 @@ public:
     * Returns the root of the dictionary
     * @returns root element
     */
-    const dic_elt_t getRoot() const;
+    dic_elt_t getRoot() const;

    /**
     * Returns the next available neighbor (see isLast())
     * @returns next dictionary element at the same depth
     */
-    const dic_elt_t getNext(const dic_elt_t &elt) const;
+    dic_elt_t getNext(const dic_elt_t &elt) const;

    /**
     * Returns the first element available at the next depth
@ -143,7 +143,7 @@ public:
     * @params elt : current dictionary element
     * @returns next element (successor)
     */
-    const dic_elt_t getSucc(const dic_elt_t &elt) const;
+    dic_elt_t getSucc(const dic_elt_t &elt) const;

    /**
     * Find the dictionary element matching the pattern starting
@ -226,7 +226,8 @@ public:
     */
    void searchRegExp(const wstring &iRegexp,
                      vector<wstring> &oWordList,
-                      struct search_RegE_list_t *iList,
+                      unsigned int iMinLength,
+                      unsigned int iMaxLength,
                      unsigned int iMaxResults = 0) const;


--- a/dic/dic_search.cpp
+++ b/dic/dic_search.cpp
@ -453,7 +453,6 @@ struct params_regexp_t
    int minlength;
    int maxlength;
    Automaton *automaton_field;
-    struct search_RegE_list_t *charlist;
    wchar_t word[DIC_WORD_MAX];
    int  wordlen;
 };
@ -500,9 +499,40 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
 }


+static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t &iList)
+{
+    memset(&iList, 0, sizeof(iList));
+    // Prepare the space for 5 items
+    iList.symbl.assign(5, 0);
+
+    iList.valid[0] = true; // all letters
+    iList.symbl[0] = RE_ALL_MATCH;
+    iList.valid[1] = true; // vowels
+    iList.symbl[1] = RE_VOWL_MATCH;
+    iList.valid[2] = true; // consonants
+    iList.symbl[2] = RE_CONS_MATCH;
+    iList.letters[0][0] = false;
+    iList.letters[1][0] = false;
+    iList.letters[2][0] = false;
+    const wstring &allLetters = iDic.getHeader().getLetters();
+    for (size_t i = 1; i <= allLetters.size(); ++i)
+    {
+        iList.letters[0][i] = true;
+        iList.letters[1][i] = iDic.getHeader().isVowel(i);
+        iList.letters[2][i] = iDic.getHeader().isConsonant(i);
+    }
+
+    iList.valid[3] = false; // user defined list 1
+    iList.symbl[3] = RE_USR1_MATCH;
+    iList.valid[4] = false; // user defined list 2
+    iList.symbl[4] = RE_USR2_MATCH;
+}
+
+
 void Dictionary::searchRegExp(const wstring &iRegexp,
                              vector<wstring> &oWordList,
-                              struct search_RegE_list_t *iList,
+                              unsigned int iMinLength,
+                              unsigned int iMaxLength,
                              unsigned int iMaxResults) const
 {
    if (iRegexp == L"")
@ -514,27 +544,21 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
    else
        oWordList.reserve(DEFAULT_VECT_ALLOC);

-    struct regexp_error_report_t report;
-    report.pos1 = 0;
-    report.pos2 = 0;
-    report.msg[0] = '\0';
-
-    /* parsing */
+    // Parsing
    Node *root = NULL;
-    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, iList);
+    struct search_RegE_list_t llist;
+    init_letter_lists(*this, llist);
+    bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, &llist);

    if (!parsingOk)
    {
-#if 0
-        fprintf(stderr, "parser error at pos %d - %d: %s\n",
-                report.pos1, report.pos2, report.msg);
-#endif
+        // TODO
        delete root;
        return;
    }

    int ptl[REGEXP_MAX+1];
-    uint64_t PS [REGEXP_MAX+1];
+    uint64_t PS[REGEXP_MAX+1];

    for (int i = 0; i < REGEXP_MAX; i++)
    {
@ -550,14 +574,13 @@ void Dictionary::searchRegExp(const wstring &iRegexp,

    root->nextPos(PS);

-    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, iList);
+    Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
    if (a)
    {
        struct params_regexp_t params;
-        params.minlength = iList->minlength;
-        params.maxlength = iList->maxlength;
+        params.minlength = iMinLength;
+        params.maxlength = iMaxLength;
        params.automaton_field = a;
-        params.charlist = iList;
        memset(params.word, L'\0', sizeof(params.word));
        params.wordlen = 0;
        if (getHeader().getVersion() == 0)
--- a/dic/grammar.cpp
+++ b/dic/grammar.cpp
@ -104,7 +104,7 @@ struct RegexpGrammar : grammar<RegexpGrammar>
                ;

            alphavar
-                = chset<>(self.m_allLetters.c_str())
+                = chset<wchar_t>(self.m_allLetters.c_str())
                ;
        }

@ -137,30 +137,6 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
    }
    else if (i->value.id() == RegexpGrammar::choiceId)
    {
-#if 0
-        assert(i->children.size() == 0);
-
-        string choiceLetters(i->value.begin(), i->value.end());
-        int j;
-        for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; j++)
-        {
-            if (!iList->valid[j])
-            {
-                iList->valid[j] = true;
-                iList->symbl[j] = RE_ALL_MATCH + j;
-                iList->letters[j][0] = false;
-                for (int k = 1; k < DIC_LETTERS; k++)
-                {
-                    bool contains = (choiceLetters.find(k + L'a' - 1) != string::npos);
-                    iList->letters[j][k] = (contains ? !negate : negate);
-                }
-                break;
-            }
-        }
-        Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
-        evalStack.push(node);
-#endif
-#if 1
        assert(i->children.size() == 0);

        wstring choiceLetters(i->value.begin(), i->value.end());
@ -176,7 +152,7 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
            if (!iList->valid[j])
            {
                iList->valid[j] = true;
-                iList->symbl[j] = RE_ALL_MATCH + j;
+                iList->symbl.push_back(RE_ALL_MATCH + j);
                iList->letters[j][0] = false;
                for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
                {
@ -189,7 +165,6 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
        }
        Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
        evalStack.push(node);
-#endif
    }
    else if (i->value.id() == RegexpGrammar::varId)
    {
--- a/dic/regexp.h
+++ b/dic/regexp.h
@ -139,18 +139,14 @@ private:
 #define DIC_SEARCH_REGE_LIST (REGEXP_MAX)

 /**
- * Structure used for Dic_search_RegE \n
- * this structure is used to explicit letters list that will be matched
+ * Structure used for dic.searchRegExp
+ * This structure is used to explicit letters list that will be matched
 * against special tokens in the regular expression search
 */
 struct search_RegE_list_t
 {
-    /** maximum length for results */
-    int minlength;
-    /** maximum length for results */
-    int maxlength;
    /** special symbol associated with the list */
-    char symbl[DIC_SEARCH_REGE_LIST];
+    vector<char> symbl;
    /** 0 or 1 if list is valid */
    bool valid[DIC_SEARCH_REGE_LIST];
    /** 0 or 1 if letter is present in the list */
@ -163,21 +159,12 @@ struct search_RegE_list_t
 #define RE_LIST_USER_BEGIN 3
 #define RE_LIST_USER_END   4

-#define MAX_REGEXP_ERROR_LENGTH 500
-
-struct regexp_error_report_t
-{
-    int pos1;
-    int pos2;
-    char msg[MAX_REGEXP_ERROR_LENGTH];
-};
-
 #include <cstdio>

-void  regexp_print_letter(FILE* f, char l);
-void  regexp_print_letter2(FILE* f, char l);
-void  regexp_print_PS(int PS[]);
-void  regexp_print_ptl(int ptl[]);
+void regexp_print_letter(FILE* f, char l);
+void regexp_print_letter2(FILE* f, char l);
+void regexp_print_PS(int PS[]);
+void regexp_print_ptl(int ptl[]);

 #endif /* _REGEXP_H_ */

--- a/dic/regexpmain.cpp
+++ b/dic/regexpmain.cpp
@ -1,7 +1,8 @@
 /*****************************************************************************
 * Eliot
- * Copyright (C) 2005-2007 Antoine Fraboulet
+ * Copyright (C) 2005-2008 Antoine Fraboulet & Olivier Teulière
 * Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
+ *          Olivier Teulière  <ipkiss @@ gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
@ -40,39 +41,9 @@

 #include "dic.h"
 #include "header.h"
-#include "regexp.h"
 #include "encoding.h"


-void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t *iList)
-{
-    memset(iList, 0, sizeof(*iList));
-    iList->minlength = 1;
-    iList->maxlength = 15;
-    iList->valid[0] = true; // all letters
-    iList->symbl[0] = RE_ALL_MATCH;
-    iList->valid[1] = true; // vowels
-    iList->symbl[1] = RE_VOWL_MATCH;
-    iList->valid[2] = true; // consonants
-    iList->symbl[2] = RE_CONS_MATCH;
-    iList->letters[0][0] = false;
-    iList->letters[1][0] = false;
-    iList->letters[2][0] = false;
-    const wstring &allLetters = iDic.getHeader().getLetters();
-    for (size_t i = 1; i <= allLetters.size(); ++i)
-    {
-        iList->letters[0][i] = true;
-        iList->letters[1][i] = iDic.getHeader().isVowel(i);
-        iList->letters[2][i] = iDic.getHeader().isConsonant(i);
-    }
-
-    iList->valid[3] = false; // user defined list 1
-    iList->symbl[3] = RE_USR1_MATCH;
-    iList->valid[4] = false; // user defined list 2
-    iList->symbl[4] = RE_USR2_MATCH;
-}
-
-
 void usage(const char *iBinaryName)
 {
    cerr << _("usage: %s dictionary") << iBinaryName << endl;
@ -103,20 +74,18 @@ int main(int argc, char* argv[])
    {
        Dictionary dic(argv[1]);

-        struct search_RegE_list_t regList;
        string line;
        cout << "**************************************************************" << endl;
        cout << "**************************************************************" << endl;
-        cout << _("enter a regular expression:") << endl;
+        cout << _("Enter a regular expression:") << endl;
        while (getline(cin, line))
        {
            if (line == "")
                break;

            /* Automaton */
-            init_letter_lists(dic, &regList);
            vector<wstring> wordList;
-            dic.searchRegExp(convertToWc(line), wordList, &regList);
+            dic.searchRegExp(convertToWc(line), wordList, 1, 15);

            cout << _("result:") << endl;
            vector<wstring>::const_iterator it;
@ -126,7 +95,7 @@ int main(int argc, char* argv[])
            }
            cout << "**************************************************************" << endl;
            cout << "**************************************************************" << endl;
-            cout << _("enter a regular expression:") << endl;
+            cout << _("Enter a regular expression:") << endl;
        }

        return 0;
--- a/utils/eliottxt.cpp
+++ b/utils/eliottxt.cpp
@ -35,7 +35,6 @@
 #endif

 #include "dic.h"
-#include "regexp.h"
 #include "game_io.h"
 #include "game_factory.h"
 #include "training.h"
@ -786,53 +785,6 @@ void loop_duplicate(Duplicate &iGame)
 }


-void eliot_regexp_build_default_llist(const Dictionary &iDic,
-                                      struct search_RegE_list_t &llist)
-{
-    memset(&llist, 0, sizeof(llist));
-
-    llist.minlength = 1;
-    llist.maxlength = 15;
-
-    llist.symbl[0] = RE_ALL_MATCH;
-    llist.symbl[1] = RE_VOWL_MATCH;
-    llist.symbl[2] = RE_CONS_MATCH;
-    llist.symbl[3] = RE_USR1_MATCH;
-    llist.symbl[5] = RE_USR2_MATCH;
-
-    llist.valid[0] = true; // all letters
-    llist.valid[1] = true; // vowels
-    llist.valid[2] = true; // consonants
-    llist.valid[3] = false; // user defined list 1
-    llist.valid[4] = false; // user defined list 2
-
-    for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
-    {
-        memset(llist.letters[i], 0, sizeof(llist.letters[i]));
-    }
-
-    const vector<Tile>& allTiles = iDic.getAllTiles();
-    vector<Tile>::const_iterator it;
-    for (it = allTiles.begin(); it != allTiles.end(); it++)
-    {
-        if (! it->isJoker() && ! it->isEmpty())
-        {
-            // all tiles
-            llist.letters[0][it->toCode()] = 1;
-            // vowels
-            if (it->isVowel())
-            {
-                llist.letters[1][it->toCode()] = 1;
-            }
-            // consonants
-            if (it->isConsonant())
-            {
-                llist.letters[2][it->toCode()] = 1;
-            }
-        }
-    }
-}
-
 void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
                  const wchar_t *delim, wchar_t **state)
 {
@ -844,11 +796,6 @@ void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
    printf("          {3} longueur maximum d'un mot\n");
    */

-#define DIC_RE_MAX (3*DIC_WORD_MAX) // yes, it's 3
-
-    struct search_RegE_list_t llist;
-    eliot_regexp_build_default_llist(iDic, llist);
-
    wchar_t *regexp = _wcstok(NULL, delim, state);
    wchar_t *cnres = _wcstok(NULL, delim, state);
    wchar_t *clmin = _wcstok(NULL, delim, state);
@ -858,16 +805,11 @@ void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
    {
        return;
    }
-    int nres = cnres ? _wtoi(cnres) : 50;
-    int lmin = clmin ? _wtoi(clmin) : 1;
-    int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
+    unsigned int nres = cnres ? _wtoi(cnres) : 50;
+    unsigned int lmin = clmin ? _wtoi(clmin) : 1;
+    unsigned int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;

-    if (lmax <= (DIC_WORD_MAX - 1) && lmin >= 1 && lmin <= lmax)
-    {
-        llist.minlength = lmin;
-        llist.maxlength = lmax;
-    }
-    else
+    if (lmax > (DIC_WORD_MAX - 1) || lmin < 1 || lmin > lmax)
    {
        printf("bad length -%s,%s-\n", (const char*)clmin, (const char*)clmax);
        return;
@ -877,16 +819,14 @@ void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
           nres, lmin, lmax);

    vector<wstring> wordList;
-    iDic.searchRegExp(regexp, wordList, &llist);
+    iDic.searchRegExp(regexp, wordList, lmin, lmax, nres);

-    int nresult = 0;
    vector<wstring>::const_iterator it;
-    for (it = wordList.begin(); it != wordList.end() && nresult < nres; it++)
+    for (it = wordList.begin(); it != wordList.end(); it++)
    {
        printf("%s\n", convertToMb(*it).c_str());
-        nresult++;
    }
-    printf("%d printed results\n", nresult);
+    printf("%d printed results\n", wordList.size());
 }