mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2025-01-13 20:03:23 +01:00
Automaton: use the logging macros for debug messages, and clean up a little
This commit is contained in:
parent
8511c855f5
commit
418bf51781
3 changed files with 57 additions and 83 deletions
|
@ -23,19 +23,18 @@
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <sstream>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#ifdef HAVE_SYS_WAIT_H
|
#ifdef HAVE_SYS_WAIT_H
|
||||||
# include <sys/wait.h>
|
# include <sys/wait.h>
|
||||||
#endif
|
#endif
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include "dic.h"
|
#include "dic.h"
|
||||||
#include "regexp.h"
|
#include "regexp.h"
|
||||||
#include "automaton.h"
|
#include "automaton.h"
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -44,7 +43,7 @@ INIT_LOGGER(dic, Automaton);
|
||||||
|
|
||||||
|
|
||||||
#ifdef DEBUG_AUTOMATON
|
#ifdef DEBUG_AUTOMATON
|
||||||
# define DMSG(a) (a)
|
# define DMSG(a) LOG_DEBUG(a)
|
||||||
#else
|
#else
|
||||||
# define DMSG(a)
|
# define DMSG(a)
|
||||||
#endif
|
#endif
|
||||||
|
@ -60,6 +59,7 @@ static string idToString(const set<uint64_t> &iId);
|
||||||
|
|
||||||
class State
|
class State
|
||||||
{
|
{
|
||||||
|
DEFINE_LOGGER();
|
||||||
public:
|
public:
|
||||||
State(const set<uint64_t> iId) : m_id(iId) { init(); }
|
State(const set<uint64_t> iId) : m_id(iId) { init(); }
|
||||||
State(uint64_t iId)
|
State(uint64_t iId)
|
||||||
|
@ -88,16 +88,19 @@ private:
|
||||||
m_accept = false;
|
m_accept = false;
|
||||||
id_static = 0;
|
id_static = 0;
|
||||||
memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
|
memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
|
||||||
DMSG(printf("** state %s creation\n", idToString(m_id).c_str()));
|
DMSG("** state " << idToString(m_id) << " creation");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
INIT_LOGGER(dic, State);
|
||||||
|
|
||||||
/* ************************************************** *
|
/* ************************************************** *
|
||||||
Helper class, allowing to build a NFA, then a DFA
|
Helper class, allowing to build a NFA, then a DFA
|
||||||
* ************************************************** */
|
* ************************************************** */
|
||||||
|
|
||||||
class AutomatonHelper
|
class AutomatonHelper
|
||||||
{
|
{
|
||||||
|
DEFINE_LOGGER();
|
||||||
public:
|
public:
|
||||||
AutomatonHelper(State * iInitState);
|
AutomatonHelper(State * iInitState);
|
||||||
~AutomatonHelper();
|
~AutomatonHelper();
|
||||||
|
@ -127,6 +130,7 @@ private:
|
||||||
const searchRegExpLists &iList) const;
|
const searchRegExpLists &iList) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
INIT_LOGGER(dic, AutomatonHelper);
|
||||||
|
|
||||||
/* ************************************************** *
|
/* ************************************************** *
|
||||||
Definition of the Automaton class
|
Definition of the Automaton class
|
||||||
|
@ -136,16 +140,22 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS,
|
||||||
const searchRegExpLists &iList)
|
const searchRegExpLists &iList)
|
||||||
{
|
{
|
||||||
AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
|
AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
|
||||||
DMSG(printf("\n non deterministic automaton OK \n\n"));
|
DMSG("Non deterministic automaton OK");
|
||||||
DMSG(nfa->dump("auto_nfa"));
|
#ifdef DEBUG_AUTOMATON
|
||||||
|
nfa->dump("auto_nfa");
|
||||||
|
#endif
|
||||||
|
|
||||||
AutomatonHelper *dfa = AutomatonHelper::nfa2dfa(*nfa, iList);
|
AutomatonHelper *dfa = AutomatonHelper::nfa2dfa(*nfa, iList);
|
||||||
DMSG(printf("\n deterministic automaton OK \n\n"));
|
DMSG("Deterministic automaton OK");
|
||||||
DMSG(dfa->dump("auto_dfa"));
|
#ifdef DEBUG_AUTOMATON
|
||||||
|
dfa->dump("auto_dfa");
|
||||||
|
#endif
|
||||||
|
|
||||||
finalize(*dfa);
|
finalize(*dfa);
|
||||||
DMSG(printf("\n final automaton OK \n\n"));
|
DMSG("Final automaton OK");
|
||||||
DMSG(dump("auto_fin"));
|
#ifdef DEBUG_AUTOMATON
|
||||||
|
dump("auto_fin");
|
||||||
|
#endif
|
||||||
|
|
||||||
delete nfa;
|
delete nfa;
|
||||||
delete dfa;
|
delete dfa;
|
||||||
|
@ -226,7 +236,7 @@ void Automaton::dump(const string &iFileName) const
|
||||||
if (m_transitions[i][l])
|
if (m_transitions[i][l])
|
||||||
{
|
{
|
||||||
fprintf(f, "\t%d -> %d [label = \"", i, m_transitions[i][l]);
|
fprintf(f, "\t%d -> %d [label = \"", i, m_transitions[i][l]);
|
||||||
regexp_print_letter(f, l);
|
fprintf(f, "%s", regexpPrintLetter(l).c_str());
|
||||||
fprintf(f, "\"];\n");
|
fprintf(f, "\"];\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -274,7 +284,7 @@ AutomatonHelper::~AutomatonHelper()
|
||||||
void AutomatonHelper::addState(State * s)
|
void AutomatonHelper::addState(State * s)
|
||||||
{
|
{
|
||||||
m_states.push_front(s);
|
m_states.push_front(s);
|
||||||
DMSG(printf("** state %s added to automaton\n", idToString(s->getId()).c_str()));
|
DMSG("** state " << idToString(s->getId()) << " added to automaton");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -286,7 +296,6 @@ State * AutomatonHelper::getState(const set<uint64_t> &iId) const
|
||||||
State * s = *it;
|
State * s = *it;
|
||||||
if (s->getId() == iId)
|
if (s->getId() == iId)
|
||||||
{
|
{
|
||||||
//DMSG(printf("** get state %s ok\n", idToString(s->getId()).c_str()));
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -315,7 +324,7 @@ AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint6
|
||||||
{
|
{
|
||||||
current_state = L.front();
|
current_state = L.front();
|
||||||
L.pop_front();
|
L.pop_front();
|
||||||
DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str()));
|
DMSG("** current state = " << idToString(current_state->getId()));
|
||||||
memset(used_letter, 0, sizeof(used_letter));
|
memset(used_letter, 0, sizeof(used_letter));
|
||||||
/* 3: \foreach l in \sigma | l \neq # */
|
/* 3: \foreach l in \sigma | l \neq # */
|
||||||
for (uint32_t p = 1; p < maxpos; p++)
|
for (uint32_t p = 1; p < maxpos; p++)
|
||||||
|
@ -377,7 +386,7 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
||||||
set<uint64_t> t;
|
set<uint64_t> t;
|
||||||
t.insert(*it);
|
t.insert(*it);
|
||||||
State *y = getState(t);
|
State *y = getState(t);
|
||||||
assert(y != NULL);
|
ASSERT(y != NULL, "Invalid state");
|
||||||
|
|
||||||
set<uint64_t> Ry; /* Ry = \empty */
|
set<uint64_t> Ry; /* Ry = \empty */
|
||||||
|
|
||||||
|
@ -402,10 +411,8 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
||||||
{
|
{
|
||||||
if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL)
|
if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL)
|
||||||
{
|
{
|
||||||
DMSG(printf("*** letter "));
|
DMSG("*** letter " << regexpPrintLetter(letter)
|
||||||
DMSG(regexp_print_letter(stdout, letter));
|
<< " is in " << regexpPrintLetter(i));
|
||||||
DMSG(printf("is in "));
|
|
||||||
DMSG(regexp_print_letter(stdout, i));
|
|
||||||
|
|
||||||
r = getSuccessor(z->getId(), RE_EPSILON, iList);
|
r = getSuccessor(z->getId(), RE_EPSILON, iList);
|
||||||
Ry.insert(r.begin(), r.end());
|
Ry.insert(r.begin(), r.end());
|
||||||
|
@ -423,28 +430,25 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
||||||
|
|
||||||
void AutomatonHelper::setAccept(State * s) const
|
void AutomatonHelper::setAccept(State * s) const
|
||||||
{
|
{
|
||||||
DMSG(printf("=== setting accept for node (%s) :", idToString(s->getId()).c_str()));
|
DMSG("=== setting accept for node (" << idToString(s->getId()) << ")");
|
||||||
list<State *>::const_iterator it;
|
list<State *>::const_iterator it;
|
||||||
for (it = m_states.begin(); it != m_states.end(); it++)
|
for (it = m_states.begin(); it != m_states.end(); it++)
|
||||||
{
|
{
|
||||||
State * ns = *it;
|
const State * ns = *it;
|
||||||
uint64_t idx = *(ns->getId().begin());
|
uint64_t idx = *(ns->getId().begin());
|
||||||
DMSG(printf("%s ", idToString(ns->getId()).c_str()));
|
|
||||||
if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), idx) != s->getId().end()))
|
if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), idx) != s->getId().end()))
|
||||||
{
|
{
|
||||||
DMSG(printf("(ok) "));
|
DMSG(" --> " << idToString(ns->getId()));
|
||||||
s->m_accept = true;
|
s->m_accept = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DMSG(printf("\n"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
||||||
const searchRegExpLists &iList)
|
const searchRegExpLists &iList)
|
||||||
{
|
{
|
||||||
State * current_state;
|
|
||||||
|
|
||||||
list<State *> L;
|
list<State *> L;
|
||||||
|
|
||||||
// Clone the list
|
// Clone the list
|
||||||
|
@ -454,25 +458,20 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
||||||
L.push_front(temp_state);
|
L.push_front(temp_state);
|
||||||
while (! L.empty())
|
while (! L.empty())
|
||||||
{
|
{
|
||||||
current_state = L.front();
|
State * current_state = L.front();
|
||||||
L.pop_front();
|
L.pop_front();
|
||||||
DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str()));
|
DMSG("** current state = " << idToString(current_state->getId()));
|
||||||
for (int letter = 1; letter < DIC_LETTERS; letter++)
|
for (int letter = 1; letter < DIC_LETTERS; letter++)
|
||||||
{
|
{
|
||||||
// DMSG(printf("*** start successor of %s\n", idToString(current_state->getId()).c_str()));
|
|
||||||
|
|
||||||
set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), letter, iList);
|
set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), letter, iList);
|
||||||
|
|
||||||
if (! temp_id.empty())
|
if (! temp_id.empty())
|
||||||
{
|
{
|
||||||
DMSG(printf("*** successor of %s for ", idToString(current_state->getId()).c_str()));
|
DMSG("*** successor of " << idToString(current_state->getId())
|
||||||
DMSG(regexp_print_letter(stdout, letter));
|
<< " for " << regexpPrintLetter(letter) << " = " << idToString(temp_id));
|
||||||
DMSG(printf(" = %s\n", idToString(temp_id).c_str()));
|
|
||||||
|
|
||||||
temp_state = dfa->getState(temp_id);
|
temp_state = dfa->getState(temp_id);
|
||||||
|
|
||||||
// DMSG(printf("*** automaton get state -%s- ok\n", idToString(temp_id).c_str()));
|
|
||||||
|
|
||||||
if (temp_state == NULL)
|
if (temp_state == NULL)
|
||||||
{
|
{
|
||||||
temp_state = new State(temp_id);
|
temp_state = new State(temp_id);
|
||||||
|
@ -499,15 +498,13 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
||||||
|
|
||||||
static string idToString(const set<uint64_t> &iId)
|
static string idToString(const set<uint64_t> &iId)
|
||||||
{
|
{
|
||||||
string s;
|
ostringstream oss;
|
||||||
set<uint64_t>::const_iterator it;
|
set<uint64_t>::const_iterator it;
|
||||||
for (it = iId.begin(); it != iId.end(); it++)
|
for (it = iId.begin(); it != iId.end(); it++)
|
||||||
{
|
{
|
||||||
char tmp[50];
|
oss << *it << ' ';
|
||||||
sprintf(tmp, "%llu ", *it);
|
|
||||||
s += tmp;
|
|
||||||
}
|
}
|
||||||
return s;
|
return oss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -545,7 +542,7 @@ void AutomatonHelper::printEdges(FILE* f) const
|
||||||
{
|
{
|
||||||
fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
|
fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
|
||||||
fprintf(f, "\"%s\" [label = \"", idToString(s->m_next[letter]->getId()).c_str());
|
fprintf(f, "\"%s\" [label = \"", idToString(s->m_next[letter]->getId()).c_str());
|
||||||
regexp_print_letter(f, letter);
|
fprintf(f, "%s", regexpPrintLetter(letter).c_str());
|
||||||
fprintf(f, "\"];\n");
|
fprintf(f, "\"];\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
|
#include <boost/format.hpp>
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
@ -31,6 +33,8 @@
|
||||||
#include "dic.h"
|
#include "dic.h"
|
||||||
#include "regexp.h"
|
#include "regexp.h"
|
||||||
|
|
||||||
|
using boost::format;
|
||||||
|
|
||||||
|
|
||||||
Node::Node(int type, char v, Node *fg, Node *fd)
|
Node::Node(int type, char v, Node *fg, Node *fd)
|
||||||
: m_type(type), m_var(v), m_fg(fg), m_fd(fd), m_number(0), m_position(0),
|
: m_type(type), m_var(v), m_fg(fg), m_fd(fd), m_number(0), m_position(0),
|
||||||
|
@ -169,45 +173,19 @@ void regexp_print_ptl(int ptl[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
void regexp_print_letter(FILE* f, char l)
|
string regexpPrintLetter(char l)
|
||||||
{
|
{
|
||||||
switch (l)
|
if (l == RE_EPSILON) return (format("( & [%1%])") % l).str();
|
||||||
{
|
if (l == RE_FINAL_TOK) return (format("( # [%1%])") % l).str();
|
||||||
case RE_EPSILON: fprintf(f, "( & [%d])", l); break;
|
if (l == RE_ALL_MATCH) return (format("( . [%1%])") % l).str();
|
||||||
case RE_FINAL_TOK: fprintf(f, "( # [%d])", l); break;
|
if (l == RE_VOWL_MATCH) return (format("(:v: [%1%])") % l).str();
|
||||||
case RE_ALL_MATCH: fprintf(f, "( . [%d])", l); break;
|
if (l == RE_CONS_MATCH) return (format("(:c: [%1%])") % l).str();
|
||||||
case RE_VOWL_MATCH: fprintf(f, "(:v: [%d])", l); break;
|
if (l == RE_USR1_MATCH) return (format("(:1: [%1%])") % l).str();
|
||||||
case RE_CONS_MATCH: fprintf(f, "(:c: [%d])", l); break;
|
if (l == RE_USR2_MATCH) return (format("(:2: [%1%])") % l).str();
|
||||||
case RE_USR1_MATCH: fprintf(f, "(:1: [%d])", l); break;
|
if (l < RE_FINAL_TOK)
|
||||||
case RE_USR2_MATCH: fprintf(f, "(:2: [%d])", l); break;
|
return (format("(%1% [%2%])") % (char)(l + 'a' - 1) % (int)l).str();
|
||||||
default:
|
else
|
||||||
if (l < RE_FINAL_TOK)
|
return (format("(liste %1%)") % (l - RE_LIST_USER_END)).str();
|
||||||
fprintf(f, " (%c [%d]) ", l + 'a' - 1, l);
|
|
||||||
else
|
|
||||||
fprintf(f, " (liste %d)", l - RE_LIST_USER_END);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void regexp_print_letter2(FILE* f, char l)
|
|
||||||
{
|
|
||||||
switch (l)
|
|
||||||
{
|
|
||||||
case RE_EPSILON: fprintf(f, "&"); break;
|
|
||||||
case RE_FINAL_TOK: fprintf(f, "#"); break;
|
|
||||||
case RE_ALL_MATCH: fprintf(f, "."); break;
|
|
||||||
case RE_VOWL_MATCH: fprintf(f, ":v:"); break;
|
|
||||||
case RE_CONS_MATCH: fprintf(f, ":c:"); break;
|
|
||||||
case RE_USR1_MATCH: fprintf(f, ":1:"); break;
|
|
||||||
case RE_USR2_MATCH: fprintf(f, ":2:"); break;
|
|
||||||
default:
|
|
||||||
if (l < RE_FINAL_TOK)
|
|
||||||
fprintf(f, "%c", l + 'a' - 1);
|
|
||||||
else
|
|
||||||
fprintf(f, "l%d", l - RE_LIST_USER_END);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -217,7 +195,7 @@ void Node::printNode(FILE* f, int detail) const
|
||||||
switch (m_type)
|
switch (m_type)
|
||||||
{
|
{
|
||||||
case NODE_VAR:
|
case NODE_VAR:
|
||||||
regexp_print_letter(f, m_var);
|
fprintf(f, "%s", regexpPrintLetter(m_var).c_str());
|
||||||
break;
|
break;
|
||||||
case NODE_OR:
|
case NODE_OR:
|
||||||
fprintf(f, "OR");
|
fprintf(f, "OR");
|
||||||
|
|
|
@ -144,8 +144,7 @@ struct searchRegExpLists
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
void regexp_print_letter(FILE* f, char l);
|
string regexpPrintLetter(char l);
|
||||||
void regexp_print_letter2(FILE* f, char l);
|
|
||||||
void regexp_print_PS(int PS[]);
|
void regexp_print_PS(int PS[]);
|
||||||
void regexp_print_ptl(int ptl[]);
|
void regexp_print_ptl(int ptl[]);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue