mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2024-11-17 07:48:27 +01:00
- Fixed a bug in the parser, preventing to parse correctly some regular expressions with wide chars. The regular expressions seem now to work fine with a dictionary using polish words.
- More C++ in the automaton code - Simplified the code in several places
This commit is contained in:
parent
7ea51d2f23
commit
90badeca21
8 changed files with 200 additions and 328 deletions
|
@ -54,8 +54,41 @@ using namespace std;
|
|||
|
||||
#define MAX_TRANSITION_LETTERS 256
|
||||
|
||||
typedef struct automaton_state_t *astate;
|
||||
|
||||
/* ************************************************** *
|
||||
Definition of the automaton state
|
||||
* ************************************************** */
|
||||
|
||||
static string idToString(const set<uint64_t> &iId);
|
||||
|
||||
class State
|
||||
{
|
||||
public:
|
||||
State(const set<uint64_t> iId) : m_id(iId) { init(); }
|
||||
State(uint64_t iId)
|
||||
{
|
||||
m_id.insert(iId);
|
||||
init();
|
||||
}
|
||||
|
||||
const set<uint64_t> & getId() const { return m_id; }
|
||||
|
||||
// FIXME: should be private
|
||||
bool m_accept;
|
||||
int id_static;
|
||||
State * m_next[MAX_TRANSITION_LETTERS];
|
||||
|
||||
private:
|
||||
set<uint64_t> m_id;
|
||||
|
||||
void init()
|
||||
{
|
||||
m_accept = false;
|
||||
id_static = 0;
|
||||
memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
|
||||
DMSG(printf("** state %s creation\n", idToString(m_id).c_str()));
|
||||
}
|
||||
};
|
||||
|
||||
/* ************************************************** *
|
||||
Helper class, allowing to build a NFA, then a DFA
|
||||
|
@ -64,10 +97,10 @@ typedef struct automaton_state_t *astate;
|
|||
class AutomatonHelper
|
||||
{
|
||||
public:
|
||||
AutomatonHelper(astate iInitState);
|
||||
AutomatonHelper(State * iInitState);
|
||||
~AutomatonHelper();
|
||||
|
||||
astate getInitState() const { return m_initState; }
|
||||
State * getInitState() const { return m_initState; }
|
||||
#ifdef DEBUG_AUTOMATON
|
||||
void dump(const string &iFileName) const;
|
||||
#endif
|
||||
|
@ -77,38 +110,21 @@ public:
|
|||
struct search_RegE_list_t *iList);
|
||||
|
||||
/// List of states
|
||||
list<astate> m_states;
|
||||
list<State *> m_states;
|
||||
|
||||
private:
|
||||
/// Initial state of the automaton
|
||||
astate m_initState;
|
||||
State * m_initState;
|
||||
|
||||
void addState(astate s);
|
||||
astate getState(const set<uint64_t> &iId) const;
|
||||
void addState(State * s);
|
||||
State * getState(const set<uint64_t> &iId) const;
|
||||
void printNodes(FILE* f) const;
|
||||
void printEdges(FILE* f) const;
|
||||
void setAccept(astate s) const;
|
||||
void setAccept(State * s) const;
|
||||
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct search_RegE_list_t *iList) const;
|
||||
};
|
||||
|
||||
|
||||
/* ************************************************** *
|
||||
State handling
|
||||
* ************************************************** */
|
||||
|
||||
static set<uint64_t> s_state_id_create(uint64_t id);
|
||||
static string s_state_id_to_str(const set<uint64_t> &iId);
|
||||
static astate s_state_create (const set<uint64_t> &iId);
|
||||
|
||||
struct automaton_state_t
|
||||
{
|
||||
set<uint64_t> id;
|
||||
bool accept;
|
||||
int id_static;
|
||||
astate next[MAX_TRANSITION_LETTERS];
|
||||
};
|
||||
|
||||
|
||||
/* ************************************************** *
|
||||
Definition of the Automaton class
|
||||
* ************************************************** */
|
||||
|
@ -125,7 +141,7 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_
|
|||
|
||||
finalize(*dfa);
|
||||
DMSG(printf("\n final automaton OK \n\n"));
|
||||
DMSG(automaton_dump("auto_fin"));
|
||||
DMSG(dump("auto_fin"));
|
||||
|
||||
delete nfa;
|
||||
delete dfa;
|
||||
|
@ -157,7 +173,7 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
|
|||
}
|
||||
|
||||
/* Create new id for states */
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
int i;
|
||||
for (i = 1, it = iHelper.m_states.begin();
|
||||
it != iHelper.m_states.end(); it++, i++)
|
||||
|
@ -168,18 +184,18 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
|
|||
/* Build new automaton */
|
||||
for (it = iHelper.m_states.begin(); it != iHelper.m_states.end(); it++)
|
||||
{
|
||||
astate s = *it;
|
||||
State * s = *it;
|
||||
int i = s->id_static;
|
||||
|
||||
if (s == iHelper.getInitState())
|
||||
m_init = i;
|
||||
if (s->accept)
|
||||
if (s->m_accept)
|
||||
m_acceptors[i] = true;
|
||||
|
||||
for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
|
||||
{
|
||||
if (s->next[l])
|
||||
m_transitions[i][l] = s->next[l]->id_static;
|
||||
if (s->m_next[l])
|
||||
m_transitions[i][l] = s->m_next[l]->id_static;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -231,49 +247,11 @@ void Automaton::dump(const string &iFileName) const
|
|||
}
|
||||
|
||||
|
||||
/* ************************************************** *
|
||||
Definition of the state handling methods
|
||||
* ************************************************** */
|
||||
|
||||
static set<uint64_t> s_state_id_create(uint64_t id)
|
||||
{
|
||||
set<uint64_t> l;
|
||||
l.insert(id);
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
static string s_state_id_to_str(const set<uint64_t> &iId)
|
||||
{
|
||||
string s;
|
||||
set<uint64_t>::const_iterator it;
|
||||
for (it = iId.begin(); it != iId.end(); it++)
|
||||
{
|
||||
char tmp[50];
|
||||
sprintf(tmp, "%llu ", *it);
|
||||
s += tmp;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
static astate s_state_create(const set<uint64_t> &iId)
|
||||
{
|
||||
astate s = new automaton_state_t();
|
||||
// TODO: use copy constructor
|
||||
s->id = iId;
|
||||
s->accept = false;
|
||||
memset(s->next, 0, sizeof(astate)*MAX_TRANSITION_LETTERS);
|
||||
DMSG(printf("** state %s creation\n", s_state_id_to_str(iId).c_str()));
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
/* ************************************************** *
|
||||
Definition of the AutomatonHelper class
|
||||
* ************************************************** */
|
||||
|
||||
AutomatonHelper::AutomatonHelper(astate iInitState)
|
||||
AutomatonHelper::AutomatonHelper(State * iInitState)
|
||||
: m_initState(iInitState)
|
||||
{
|
||||
}
|
||||
|
@ -281,7 +259,7 @@ AutomatonHelper::AutomatonHelper(astate iInitState)
|
|||
|
||||
AutomatonHelper::~AutomatonHelper()
|
||||
{
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
for (it = m_states.begin(); it != m_states.end(); it++)
|
||||
{
|
||||
delete *it;
|
||||
|
@ -289,22 +267,22 @@ AutomatonHelper::~AutomatonHelper()
|
|||
}
|
||||
|
||||
|
||||
void AutomatonHelper::addState(astate s)
|
||||
void AutomatonHelper::addState(State * s)
|
||||
{
|
||||
m_states.push_front(s);
|
||||
DMSG(printf("** state %s added to automaton\n", s_state_id_to_str(s->id).c_str()));
|
||||
DMSG(printf("** state %s added to automaton\n", idToString(s->getId()).c_str()));
|
||||
}
|
||||
|
||||
|
||||
astate AutomatonHelper::getState(const set<uint64_t> &iId) const
|
||||
State * AutomatonHelper::getState(const set<uint64_t> &iId) const
|
||||
{
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
for (it = m_states.begin(); it != m_states.end(); it++)
|
||||
{
|
||||
astate s = *it;
|
||||
if (s->id == iId)
|
||||
State * s = *it;
|
||||
if (s->getId() == iId)
|
||||
{
|
||||
//DMSG(printf("** get state %s ok\n", s_state_id_to_str(s->id).c_str()));
|
||||
//DMSG(printf("** get state %s ok\n", idToString(s->getId()).c_str()));
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
@ -318,66 +296,62 @@ astate AutomatonHelper::getState(const set<uint64_t> &iId) const
|
|||
AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint64_t *PS)
|
||||
{
|
||||
uint64_t maxpos = PS[0];
|
||||
astate current_state;
|
||||
char used_letter[MAX_TRANSITION_LETTERS];
|
||||
State * current_state;
|
||||
bool used_letter[MAX_TRANSITION_LETTERS];
|
||||
|
||||
|
||||
/* 1: init_state = root->PP */
|
||||
set<uint64_t> temp_id0 = s_state_id_create(init_state_id);
|
||||
astate temp_state = s_state_create(temp_id0);
|
||||
State * temp_state = new State(init_state_id);
|
||||
AutomatonHelper *nfa = new AutomatonHelper(temp_state);
|
||||
nfa->addState(temp_state);
|
||||
list<astate> L;
|
||||
list<State *> L;
|
||||
L.push_front(temp_state);
|
||||
/* 2: while \exist state \in state_list */
|
||||
while (! L.empty())
|
||||
{
|
||||
current_state = L.front();
|
||||
L.pop_front();
|
||||
DMSG(printf("** current state = %s\n", s_state_id_to_str(current_state->id).c_str()));
|
||||
DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str()));
|
||||
memset(used_letter, 0, sizeof(used_letter));
|
||||
/* 3: \foreach l in \sigma | l \neq # */
|
||||
for (uint32_t p = 1; p < maxpos; p++)
|
||||
{
|
||||
int current_letter = ptl[p];
|
||||
if (used_letter[current_letter] == 0)
|
||||
if (used_letter[current_letter] == false)
|
||||
{
|
||||
/* 4: int set = \cup { PS(pos) | pos \in state \wedge pos == l } */
|
||||
uint64_t ens = 0;
|
||||
for (uint32_t pos = 1; pos <= maxpos; pos++)
|
||||
{
|
||||
if (ptl[pos] == current_letter &&
|
||||
(unsigned int)*(current_state->id.begin()) & (1 << (pos - 1)))
|
||||
(unsigned int)*(current_state->getId().begin()) & (1 << (pos - 1)))
|
||||
ens |= PS[pos];
|
||||
}
|
||||
/* 5: transition from current_state to temp_state */
|
||||
if (ens)
|
||||
{
|
||||
set<uint64_t> temp_id = s_state_id_create(ens);
|
||||
set<uint64_t> temp_id;
|
||||
temp_id.insert(ens);
|
||||
temp_state = nfa->getState(temp_id);
|
||||
if (temp_state == NULL)
|
||||
{
|
||||
temp_state = s_state_create(temp_id);
|
||||
temp_state = new State(temp_id);
|
||||
nfa->addState(temp_state);
|
||||
current_state->next[current_letter] = temp_state;
|
||||
L.push_front(temp_state);
|
||||
}
|
||||
else
|
||||
{
|
||||
current_state->next[current_letter] = temp_state;
|
||||
}
|
||||
current_state->m_next[current_letter] = temp_state;
|
||||
}
|
||||
used_letter[current_letter] = 1;
|
||||
used_letter[current_letter] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
for (it = nfa->m_states.begin(); it != nfa->m_states.end(); it++)
|
||||
{
|
||||
astate s = *it;
|
||||
if (*(s->id.begin()) & (1 << (maxpos - 1)))
|
||||
s->accept = true;
|
||||
State * s = *it;
|
||||
if (*(s->getId().begin()) & (1 << (maxpos - 1)))
|
||||
s->m_accept = true;
|
||||
}
|
||||
|
||||
return nfa;
|
||||
|
@ -395,24 +369,26 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
|||
set<uint64_t>::const_iterator it;
|
||||
for (it = S.begin(); it != S.end(); it++) /* \forall y \in S */
|
||||
{
|
||||
astate y, z;
|
||||
|
||||
set<uint64_t> t = s_state_id_create(*it);
|
||||
assert(y = getState(t));
|
||||
set<uint64_t> t;
|
||||
t.insert(*it);
|
||||
State *y = getState(t);
|
||||
assert(y != NULL);
|
||||
|
||||
set<uint64_t> Ry; /* Ry = \empty */
|
||||
|
||||
if ((z = y->next[letter]) != NULL) /* \delta (y,z) = l */
|
||||
State *z;
|
||||
if ((z = y->m_next[letter]) != NULL) /* \delta (y,z) = l */
|
||||
{
|
||||
r = getSuccessor(z->id, RE_EPSILON, iList);
|
||||
r = getSuccessor(z->getId(), RE_EPSILON, iList);
|
||||
Ry.insert(r.begin(), r.end());
|
||||
Ry.insert(z->id.begin(), z->id.end()); /* Ry = Ry \cup succ(z) */
|
||||
Ry.insert(z->getId().begin(), z->getId().end()); /* Ry = Ry \cup succ(z) */
|
||||
}
|
||||
|
||||
/* \epsilon transition from start node */
|
||||
if ((z = y->next[RE_EPSILON]) != NULL) /* \delta (y,z) = \epsilon */
|
||||
if ((z = y->m_next[RE_EPSILON]) != NULL) /* \delta (y,z) = \epsilon */
|
||||
{
|
||||
r = getSuccessor(z->id, letter, iList);
|
||||
r = getSuccessor(z->getId(), letter, iList);
|
||||
Ry.insert(r.begin(), r.end()); /* Ry = Ry \cup succ(z) */
|
||||
}
|
||||
|
||||
|
@ -422,26 +398,21 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
|||
{
|
||||
if (iList->valid[i])
|
||||
{
|
||||
if (iList->letters[i][letter] && (z = y->next[(int)iList->symbl[i]]) != NULL)
|
||||
if (iList->letters[i][letter] && (z = y->m_next[(int)iList->symbl[i]]) != NULL)
|
||||
{
|
||||
DMSG(printf("*** letter "));
|
||||
DMSG(regexp_print_letter(stdout, letter));
|
||||
DMSG(printf("is in "));
|
||||
DMSG(regexp_print_letter(stdout, i));
|
||||
|
||||
r = getSuccessor(z->id, RE_EPSILON, iList);
|
||||
r = getSuccessor(z->getId(), RE_EPSILON, iList);
|
||||
Ry.insert(r.begin(), r.end());
|
||||
Ry.insert(z->id.begin(), z->id.end());
|
||||
Ry.insert(z->getId().begin(), z->getId().end());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (alist_is_empty(Ry)) /* Ry = \empty */
|
||||
return Ry;
|
||||
#endif
|
||||
|
||||
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */
|
||||
}
|
||||
|
||||
|
@ -449,19 +420,19 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
|||
}
|
||||
|
||||
|
||||
void AutomatonHelper::setAccept(astate s) const
|
||||
void AutomatonHelper::setAccept(State * s) const
|
||||
{
|
||||
DMSG(printf("=== setting accept for node (%s) :", s_state_id_to_str(s->id).c_str()));
|
||||
list<astate>::const_iterator it;
|
||||
DMSG(printf("=== setting accept for node (%s) :", idToString(s->getId()).c_str()));
|
||||
list<State *>::const_iterator it;
|
||||
for (it = m_states.begin(); it != m_states.end(); it++)
|
||||
{
|
||||
astate ns = *it;
|
||||
int idx = *(ns->id.begin());
|
||||
DMSG(printf("%s ", s_state_id_to_str(ns->id).c_str()));
|
||||
if (ns->accept && (std::find(s->id.begin(), s->id.end(), idx) != s->id.end()))
|
||||
State * ns = *it;
|
||||
uint64_t idx = *(ns->getId().begin());
|
||||
DMSG(printf("%s ", idToString(ns->getId()).c_str()));
|
||||
if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), idx) != s->getId().end()))
|
||||
{
|
||||
DMSG(printf("(ok) "));
|
||||
s->accept = true;
|
||||
s->m_accept = true;
|
||||
}
|
||||
}
|
||||
DMSG(printf("\n"));
|
||||
|
@ -471,13 +442,12 @@ void AutomatonHelper::setAccept(astate s) const
|
|||
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
||||
struct search_RegE_list_t *iList)
|
||||
{
|
||||
astate current_state;
|
||||
State * current_state;
|
||||
|
||||
list<astate> L;
|
||||
list<State *> L;
|
||||
|
||||
// Clone the list
|
||||
set<uint64_t> temp_id0 = iNfa.m_initState->id;
|
||||
astate temp_state = s_state_create(temp_id0);
|
||||
State * temp_state = new State(iNfa.m_initState->getId());
|
||||
AutomatonHelper *dfa = new AutomatonHelper(temp_state);
|
||||
dfa->addState(temp_state);
|
||||
L.push_front(temp_state);
|
||||
|
@ -485,40 +455,35 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
|||
{
|
||||
current_state = L.front();
|
||||
L.pop_front();
|
||||
DMSG(printf("** current state = %s\n", s_state_id_to_str(current_state->id).c_str()));
|
||||
DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str()));
|
||||
for (int letter = 1; letter < DIC_LETTERS; letter++)
|
||||
{
|
||||
// DMSG(printf("*** start successor of %s\n", s_state_id_to_str(current_state->id).c_str()));
|
||||
// DMSG(printf("*** start successor of %s\n", idToString(current_state->getId()).c_str()));
|
||||
|
||||
set<uint64_t> temp_id = iNfa.getSuccessor(current_state->id, letter, iList);
|
||||
set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), letter, iList);
|
||||
|
||||
if (! temp_id.empty())
|
||||
{
|
||||
|
||||
DMSG(printf("*** successor of %s for ", s_state_id_to_str(current_state->id).c_str()));
|
||||
DMSG(printf("*** successor of %s for ", idToString(current_state->getId()).c_str()));
|
||||
DMSG(regexp_print_letter(stdout, letter));
|
||||
DMSG(printf(" = %s\n", s_state_id_to_str(temp_id).c_str()));
|
||||
DMSG(printf(" = %s\n", idToString(temp_id).c_str()));
|
||||
|
||||
temp_state = dfa->getState(temp_id);
|
||||
|
||||
// DMSG(printf("*** automaton get state -%s- ok\n", s_state_id_to_str(temp_id).c_str()));
|
||||
// DMSG(printf("*** automaton get state -%s- ok\n", idToString(temp_id).c_str()));
|
||||
|
||||
if (temp_state == NULL)
|
||||
{
|
||||
temp_state = s_state_create(temp_id);
|
||||
temp_state = new State(temp_id);
|
||||
dfa->addState(temp_state);
|
||||
current_state->next[letter] = temp_state;
|
||||
L.push_front(temp_state);
|
||||
}
|
||||
else
|
||||
{
|
||||
current_state->next[letter] = temp_state;
|
||||
}
|
||||
current_state->m_next[letter] = temp_state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
for (it = dfa->m_states.begin(); it != dfa->m_states.end(); it++)
|
||||
{
|
||||
iNfa.setAccept(*it);
|
||||
|
@ -531,19 +496,33 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
|||
* ************************************************** *
|
||||
* ************************************************** */
|
||||
|
||||
static string idToString(const set<uint64_t> &iId)
|
||||
{
|
||||
string s;
|
||||
set<uint64_t>::const_iterator it;
|
||||
for (it = iId.begin(); it != iId.end(); it++)
|
||||
{
|
||||
char tmp[50];
|
||||
sprintf(tmp, "%llu ", *it);
|
||||
s += tmp;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
void AutomatonHelper::printNodes(FILE* f) const
|
||||
{
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
for (it = m_states.begin(); it != m_states.end(); it++)
|
||||
{
|
||||
astate s = *it;
|
||||
string sid = s_state_id_to_str(s->id);
|
||||
State * s = *it;
|
||||
string sid = idToString(s->getId());
|
||||
fprintf(f, "\t\"%s\" [label = \"%s\"", sid.c_str(), sid.c_str());
|
||||
if (s == m_initState)
|
||||
{
|
||||
fprintf(f, ", style = filled, color=lightgrey");
|
||||
}
|
||||
if (s->accept)
|
||||
if (s->m_accept)
|
||||
{
|
||||
fprintf(f, ", shape = doublecircle");
|
||||
}
|
||||
|
@ -555,18 +534,16 @@ void AutomatonHelper::printNodes(FILE* f) const
|
|||
|
||||
void AutomatonHelper::printEdges(FILE* f) const
|
||||
{
|
||||
list<astate>::const_iterator it;
|
||||
list<State *>::const_iterator it;
|
||||
for (it = m_states.begin(); it != m_states.end(); it++)
|
||||
{
|
||||
astate s = *it;
|
||||
State * s = *it;
|
||||
for (int letter = 0; letter < 255; letter++)
|
||||
{
|
||||
if (s->next[letter])
|
||||
if (s->m_next[letter])
|
||||
{
|
||||
string sid = s_state_id_to_str(s->id);
|
||||
fprintf(f, "\t\"%s\" -> ", sid.c_str());
|
||||
sid = s_state_id_to_str(s->next[letter]->id);
|
||||
fprintf(f, "\"%s\" [label = \"", sid.c_str());
|
||||
fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
|
||||
fprintf(f, "\"%s\" [label = \"", idToString(s->m_next[letter]->getId()).c_str());
|
||||
regexp_print_letter(f, letter);
|
||||
fprintf(f, "\"];\n");
|
||||
}
|
||||
|
|
|
@ -161,7 +161,7 @@ bool Dictionary::validateLetters(const wstring &iLetters,
|
|||
}
|
||||
|
||||
|
||||
const dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
|
||||
dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
|
||||
{
|
||||
if (!isLast(e))
|
||||
return e + 1;
|
||||
|
@ -169,7 +169,7 @@ const dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
|
|||
}
|
||||
|
||||
|
||||
const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
|
||||
dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
|
||||
|
@ -178,13 +178,13 @@ const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
|
|||
}
|
||||
|
||||
|
||||
const dic_elt_t Dictionary::getRoot() const
|
||||
dic_elt_t Dictionary::getRoot() const
|
||||
{
|
||||
return m_header->getRoot();
|
||||
}
|
||||
|
||||
|
||||
const dic_code_t Dictionary::getCode(const dic_elt_t &e) const
|
||||
dic_code_t Dictionary::getCode(const dic_elt_t &e) const
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;
|
||||
|
|
11
dic/dic.h
11
dic/dic.h
|
@ -103,7 +103,7 @@ public:
|
|||
* codes may range from 0 to 63. 0 is the null character.
|
||||
* @returns code for the encoded character
|
||||
*/
|
||||
const dic_code_t getCode(const dic_elt_t &elt) const;
|
||||
dic_code_t getCode(const dic_elt_t &elt) const;
|
||||
|
||||
/**
|
||||
* Returns the wide character associated with an element.
|
||||
|
@ -129,13 +129,13 @@ public:
|
|||
* Returns the root of the dictionary
|
||||
* @returns root element
|
||||
*/
|
||||
const dic_elt_t getRoot() const;
|
||||
dic_elt_t getRoot() const;
|
||||
|
||||
/**
|
||||
* Returns the next available neighbor (see isLast())
|
||||
* @returns next dictionary element at the same depth
|
||||
*/
|
||||
const dic_elt_t getNext(const dic_elt_t &elt) const;
|
||||
dic_elt_t getNext(const dic_elt_t &elt) const;
|
||||
|
||||
/**
|
||||
* Returns the first element available at the next depth
|
||||
|
@ -143,7 +143,7 @@ public:
|
|||
* @params elt : current dictionary element
|
||||
* @returns next element (successor)
|
||||
*/
|
||||
const dic_elt_t getSucc(const dic_elt_t &elt) const;
|
||||
dic_elt_t getSucc(const dic_elt_t &elt) const;
|
||||
|
||||
/**
|
||||
* Find the dictionary element matching the pattern starting
|
||||
|
@ -226,7 +226,8 @@ public:
|
|||
*/
|
||||
void searchRegExp(const wstring &iRegexp,
|
||||
vector<wstring> &oWordList,
|
||||
struct search_RegE_list_t *iList,
|
||||
unsigned int iMinLength,
|
||||
unsigned int iMaxLength,
|
||||
unsigned int iMaxResults = 0) const;
|
||||
|
||||
|
||||
|
|
|
@ -453,7 +453,6 @@ struct params_regexp_t
|
|||
int minlength;
|
||||
int maxlength;
|
||||
Automaton *automaton_field;
|
||||
struct search_RegE_list_t *charlist;
|
||||
wchar_t word[DIC_WORD_MAX];
|
||||
int wordlen;
|
||||
};
|
||||
|
@ -500,9 +499,40 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
|
|||
}
|
||||
|
||||
|
||||
static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t &iList)
|
||||
{
|
||||
memset(&iList, 0, sizeof(iList));
|
||||
// Prepare the space for 5 items
|
||||
iList.symbl.assign(5, 0);
|
||||
|
||||
iList.valid[0] = true; // all letters
|
||||
iList.symbl[0] = RE_ALL_MATCH;
|
||||
iList.valid[1] = true; // vowels
|
||||
iList.symbl[1] = RE_VOWL_MATCH;
|
||||
iList.valid[2] = true; // consonants
|
||||
iList.symbl[2] = RE_CONS_MATCH;
|
||||
iList.letters[0][0] = false;
|
||||
iList.letters[1][0] = false;
|
||||
iList.letters[2][0] = false;
|
||||
const wstring &allLetters = iDic.getHeader().getLetters();
|
||||
for (size_t i = 1; i <= allLetters.size(); ++i)
|
||||
{
|
||||
iList.letters[0][i] = true;
|
||||
iList.letters[1][i] = iDic.getHeader().isVowel(i);
|
||||
iList.letters[2][i] = iDic.getHeader().isConsonant(i);
|
||||
}
|
||||
|
||||
iList.valid[3] = false; // user defined list 1
|
||||
iList.symbl[3] = RE_USR1_MATCH;
|
||||
iList.valid[4] = false; // user defined list 2
|
||||
iList.symbl[4] = RE_USR2_MATCH;
|
||||
}
|
||||
|
||||
|
||||
void Dictionary::searchRegExp(const wstring &iRegexp,
|
||||
vector<wstring> &oWordList,
|
||||
struct search_RegE_list_t *iList,
|
||||
unsigned int iMinLength,
|
||||
unsigned int iMaxLength,
|
||||
unsigned int iMaxResults) const
|
||||
{
|
||||
if (iRegexp == L"")
|
||||
|
@ -514,27 +544,21 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
|
|||
else
|
||||
oWordList.reserve(DEFAULT_VECT_ALLOC);
|
||||
|
||||
struct regexp_error_report_t report;
|
||||
report.pos1 = 0;
|
||||
report.pos2 = 0;
|
||||
report.msg[0] = '\0';
|
||||
|
||||
/* parsing */
|
||||
// Parsing
|
||||
Node *root = NULL;
|
||||
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, iList);
|
||||
struct search_RegE_list_t llist;
|
||||
init_letter_lists(*this, llist);
|
||||
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, &llist);
|
||||
|
||||
if (!parsingOk)
|
||||
{
|
||||
#if 0
|
||||
fprintf(stderr, "parser error at pos %d - %d: %s\n",
|
||||
report.pos1, report.pos2, report.msg);
|
||||
#endif
|
||||
// TODO
|
||||
delete root;
|
||||
return;
|
||||
}
|
||||
|
||||
int ptl[REGEXP_MAX+1];
|
||||
uint64_t PS [REGEXP_MAX+1];
|
||||
uint64_t PS[REGEXP_MAX+1];
|
||||
|
||||
for (int i = 0; i < REGEXP_MAX; i++)
|
||||
{
|
||||
|
@ -550,14 +574,13 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
|
|||
|
||||
root->nextPos(PS);
|
||||
|
||||
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, iList);
|
||||
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
|
||||
if (a)
|
||||
{
|
||||
struct params_regexp_t params;
|
||||
params.minlength = iList->minlength;
|
||||
params.maxlength = iList->maxlength;
|
||||
params.minlength = iMinLength;
|
||||
params.maxlength = iMaxLength;
|
||||
params.automaton_field = a;
|
||||
params.charlist = iList;
|
||||
memset(params.word, L'\0', sizeof(params.word));
|
||||
params.wordlen = 0;
|
||||
if (getHeader().getVersion() == 0)
|
||||
|
|
|
@ -104,7 +104,7 @@ struct RegexpGrammar : grammar<RegexpGrammar>
|
|||
;
|
||||
|
||||
alphavar
|
||||
= chset<>(self.m_allLetters.c_str())
|
||||
= chset<wchar_t>(self.m_allLetters.c_str())
|
||||
;
|
||||
}
|
||||
|
||||
|
@ -137,30 +137,6 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
|
|||
}
|
||||
else if (i->value.id() == RegexpGrammar::choiceId)
|
||||
{
|
||||
#if 0
|
||||
assert(i->children.size() == 0);
|
||||
|
||||
string choiceLetters(i->value.begin(), i->value.end());
|
||||
int j;
|
||||
for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; j++)
|
||||
{
|
||||
if (!iList->valid[j])
|
||||
{
|
||||
iList->valid[j] = true;
|
||||
iList->symbl[j] = RE_ALL_MATCH + j;
|
||||
iList->letters[j][0] = false;
|
||||
for (int k = 1; k < DIC_LETTERS; k++)
|
||||
{
|
||||
bool contains = (choiceLetters.find(k + L'a' - 1) != string::npos);
|
||||
iList->letters[j][k] = (contains ? !negate : negate);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
|
||||
evalStack.push(node);
|
||||
#endif
|
||||
#if 1
|
||||
assert(i->children.size() == 0);
|
||||
|
||||
wstring choiceLetters(i->value.begin(), i->value.end());
|
||||
|
@ -176,7 +152,7 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
|
|||
if (!iList->valid[j])
|
||||
{
|
||||
iList->valid[j] = true;
|
||||
iList->symbl[j] = RE_ALL_MATCH + j;
|
||||
iList->symbl.push_back(RE_ALL_MATCH + j);
|
||||
iList->letters[j][0] = false;
|
||||
for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
|
||||
{
|
||||
|
@ -189,7 +165,6 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
|
|||
}
|
||||
Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
|
||||
evalStack.push(node);
|
||||
#endif
|
||||
}
|
||||
else if (i->value.id() == RegexpGrammar::varId)
|
||||
{
|
||||
|
|
27
dic/regexp.h
27
dic/regexp.h
|
@ -139,18 +139,14 @@ private:
|
|||
#define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
|
||||
|
||||
/**
|
||||
* Structure used for Dic_search_RegE \n
|
||||
* this structure is used to explicit letters list that will be matched
|
||||
* Structure used for dic.searchRegExp
|
||||
* This structure is used to explicit letters list that will be matched
|
||||
* against special tokens in the regular expression search
|
||||
*/
|
||||
struct search_RegE_list_t
|
||||
{
|
||||
/** maximum length for results */
|
||||
int minlength;
|
||||
/** maximum length for results */
|
||||
int maxlength;
|
||||
/** special symbol associated with the list */
|
||||
char symbl[DIC_SEARCH_REGE_LIST];
|
||||
vector<char> symbl;
|
||||
/** 0 or 1 if list is valid */
|
||||
bool valid[DIC_SEARCH_REGE_LIST];
|
||||
/** 0 or 1 if letter is present in the list */
|
||||
|
@ -163,21 +159,12 @@ struct search_RegE_list_t
|
|||
#define RE_LIST_USER_BEGIN 3
|
||||
#define RE_LIST_USER_END 4
|
||||
|
||||
#define MAX_REGEXP_ERROR_LENGTH 500
|
||||
|
||||
struct regexp_error_report_t
|
||||
{
|
||||
int pos1;
|
||||
int pos2;
|
||||
char msg[MAX_REGEXP_ERROR_LENGTH];
|
||||
};
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
void regexp_print_letter(FILE* f, char l);
|
||||
void regexp_print_letter2(FILE* f, char l);
|
||||
void regexp_print_PS(int PS[]);
|
||||
void regexp_print_ptl(int ptl[]);
|
||||
void regexp_print_letter(FILE* f, char l);
|
||||
void regexp_print_letter2(FILE* f, char l);
|
||||
void regexp_print_PS(int PS[]);
|
||||
void regexp_print_ptl(int ptl[]);
|
||||
|
||||
#endif /* _REGEXP_H_ */
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
/*****************************************************************************
|
||||
* Eliot
|
||||
* Copyright (C) 2005-2007 Antoine Fraboulet
|
||||
* Copyright (C) 2005-2008 Antoine Fraboulet & Olivier Teulière
|
||||
* Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
|
||||
* Olivier Teulière <ipkiss @@ gmail.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -40,39 +41,9 @@
|
|||
|
||||
#include "dic.h"
|
||||
#include "header.h"
|
||||
#include "regexp.h"
|
||||
#include "encoding.h"
|
||||
|
||||
|
||||
void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t *iList)
|
||||
{
|
||||
memset(iList, 0, sizeof(*iList));
|
||||
iList->minlength = 1;
|
||||
iList->maxlength = 15;
|
||||
iList->valid[0] = true; // all letters
|
||||
iList->symbl[0] = RE_ALL_MATCH;
|
||||
iList->valid[1] = true; // vowels
|
||||
iList->symbl[1] = RE_VOWL_MATCH;
|
||||
iList->valid[2] = true; // consonants
|
||||
iList->symbl[2] = RE_CONS_MATCH;
|
||||
iList->letters[0][0] = false;
|
||||
iList->letters[1][0] = false;
|
||||
iList->letters[2][0] = false;
|
||||
const wstring &allLetters = iDic.getHeader().getLetters();
|
||||
for (size_t i = 1; i <= allLetters.size(); ++i)
|
||||
{
|
||||
iList->letters[0][i] = true;
|
||||
iList->letters[1][i] = iDic.getHeader().isVowel(i);
|
||||
iList->letters[2][i] = iDic.getHeader().isConsonant(i);
|
||||
}
|
||||
|
||||
iList->valid[3] = false; // user defined list 1
|
||||
iList->symbl[3] = RE_USR1_MATCH;
|
||||
iList->valid[4] = false; // user defined list 2
|
||||
iList->symbl[4] = RE_USR2_MATCH;
|
||||
}
|
||||
|
||||
|
||||
void usage(const char *iBinaryName)
|
||||
{
|
||||
cerr << _("usage: %s dictionary") << iBinaryName << endl;
|
||||
|
@ -103,20 +74,18 @@ int main(int argc, char* argv[])
|
|||
{
|
||||
Dictionary dic(argv[1]);
|
||||
|
||||
struct search_RegE_list_t regList;
|
||||
string line;
|
||||
cout << "**************************************************************" << endl;
|
||||
cout << "**************************************************************" << endl;
|
||||
cout << _("enter a regular expression:") << endl;
|
||||
cout << _("Enter a regular expression:") << endl;
|
||||
while (getline(cin, line))
|
||||
{
|
||||
if (line == "")
|
||||
break;
|
||||
|
||||
/* Automaton */
|
||||
init_letter_lists(dic, ®List);
|
||||
vector<wstring> wordList;
|
||||
dic.searchRegExp(convertToWc(line), wordList, ®List);
|
||||
dic.searchRegExp(convertToWc(line), wordList, 1, 15);
|
||||
|
||||
cout << _("result:") << endl;
|
||||
vector<wstring>::const_iterator it;
|
||||
|
@ -126,7 +95,7 @@ int main(int argc, char* argv[])
|
|||
}
|
||||
cout << "**************************************************************" << endl;
|
||||
cout << "**************************************************************" << endl;
|
||||
cout << _("enter a regular expression:") << endl;
|
||||
cout << _("Enter a regular expression:") << endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -35,7 +35,6 @@
|
|||
#endif
|
||||
|
||||
#include "dic.h"
|
||||
#include "regexp.h"
|
||||
#include "game_io.h"
|
||||
#include "game_factory.h"
|
||||
#include "training.h"
|
||||
|
@ -786,53 +785,6 @@ void loop_duplicate(Duplicate &iGame)
|
|||
}
|
||||
|
||||
|
||||
void eliot_regexp_build_default_llist(const Dictionary &iDic,
|
||||
struct search_RegE_list_t &llist)
|
||||
{
|
||||
memset(&llist, 0, sizeof(llist));
|
||||
|
||||
llist.minlength = 1;
|
||||
llist.maxlength = 15;
|
||||
|
||||
llist.symbl[0] = RE_ALL_MATCH;
|
||||
llist.symbl[1] = RE_VOWL_MATCH;
|
||||
llist.symbl[2] = RE_CONS_MATCH;
|
||||
llist.symbl[3] = RE_USR1_MATCH;
|
||||
llist.symbl[5] = RE_USR2_MATCH;
|
||||
|
||||
llist.valid[0] = true; // all letters
|
||||
llist.valid[1] = true; // vowels
|
||||
llist.valid[2] = true; // consonants
|
||||
llist.valid[3] = false; // user defined list 1
|
||||
llist.valid[4] = false; // user defined list 2
|
||||
|
||||
for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
|
||||
{
|
||||
memset(llist.letters[i], 0, sizeof(llist.letters[i]));
|
||||
}
|
||||
|
||||
const vector<Tile>& allTiles = iDic.getAllTiles();
|
||||
vector<Tile>::const_iterator it;
|
||||
for (it = allTiles.begin(); it != allTiles.end(); it++)
|
||||
{
|
||||
if (! it->isJoker() && ! it->isEmpty())
|
||||
{
|
||||
// all tiles
|
||||
llist.letters[0][it->toCode()] = 1;
|
||||
// vowels
|
||||
if (it->isVowel())
|
||||
{
|
||||
llist.letters[1][it->toCode()] = 1;
|
||||
}
|
||||
// consonants
|
||||
if (it->isConsonant())
|
||||
{
|
||||
llist.letters[2][it->toCode()] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
|
||||
const wchar_t *delim, wchar_t **state)
|
||||
{
|
||||
|
@ -844,11 +796,6 @@ void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
|
|||
printf(" {3} longueur maximum d'un mot\n");
|
||||
*/
|
||||
|
||||
#define DIC_RE_MAX (3*DIC_WORD_MAX) // yes, it's 3
|
||||
|
||||
struct search_RegE_list_t llist;
|
||||
eliot_regexp_build_default_llist(iDic, llist);
|
||||
|
||||
wchar_t *regexp = _wcstok(NULL, delim, state);
|
||||
wchar_t *cnres = _wcstok(NULL, delim, state);
|
||||
wchar_t *clmin = _wcstok(NULL, delim, state);
|
||||
|
@ -858,16 +805,11 @@ void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
|
|||
{
|
||||
return;
|
||||
}
|
||||
int nres = cnres ? _wtoi(cnres) : 50;
|
||||
int lmin = clmin ? _wtoi(clmin) : 1;
|
||||
int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
|
||||
unsigned int nres = cnres ? _wtoi(cnres) : 50;
|
||||
unsigned int lmin = clmin ? _wtoi(clmin) : 1;
|
||||
unsigned int lmax = clmax ? _wtoi(clmax) : DIC_WORD_MAX - 1;
|
||||
|
||||
if (lmax <= (DIC_WORD_MAX - 1) && lmin >= 1 && lmin <= lmax)
|
||||
{
|
||||
llist.minlength = lmin;
|
||||
llist.maxlength = lmax;
|
||||
}
|
||||
else
|
||||
if (lmax > (DIC_WORD_MAX - 1) || lmin < 1 || lmin > lmax)
|
||||
{
|
||||
printf("bad length -%s,%s-\n", (const char*)clmin, (const char*)clmax);
|
||||
return;
|
||||
|
@ -877,16 +819,14 @@ void eliot_regexp(const Dictionary& iDic, wchar_t __attribute__((unused)) *cmd,
|
|||
nres, lmin, lmax);
|
||||
|
||||
vector<wstring> wordList;
|
||||
iDic.searchRegExp(regexp, wordList, &llist);
|
||||
iDic.searchRegExp(regexp, wordList, lmin, lmax, nres);
|
||||
|
||||
int nresult = 0;
|
||||
vector<wstring>::const_iterator it;
|
||||
for (it = wordList.begin(); it != wordList.end() && nresult < nres; it++)
|
||||
for (it = wordList.begin(); it != wordList.end(); it++)
|
||||
{
|
||||
printf("%s\n", convertToMb(*it).c_str());
|
||||
nresult++;
|
||||
}
|
||||
printf("%d printed results\n", nresult);
|
||||
printf("%d printed results\n", wordList.size());
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue