Automaton: use the logging macros for debug messages, and clean up a little

This commit is contained in:
Olivier Teulière 2012-05-16 20:43:26 +02:00
parent 8511c855f5
commit 418bf51781
3 changed files with 57 additions and 83 deletions

View file

@ -23,19 +23,18 @@
#include <set> #include <set>
#include <list> #include <list>
#include <algorithm> #include <algorithm>
#include <cassert> #include <sstream>
#include <cstring> #include <cstring>
#include <cstdlib>
#include <cstdio> #include <cstdio>
#include <sys/types.h> #include <sys/types.h>
#ifdef HAVE_SYS_WAIT_H #ifdef HAVE_SYS_WAIT_H
# include <sys/wait.h> # include <sys/wait.h>
#endif #endif
#include <unistd.h>
#include "dic.h" #include "dic.h"
#include "regexp.h" #include "regexp.h"
#include "automaton.h" #include "automaton.h"
#include "debug.h"
using namespace std; using namespace std;
@ -44,7 +43,7 @@ INIT_LOGGER(dic, Automaton);
#ifdef DEBUG_AUTOMATON #ifdef DEBUG_AUTOMATON
# define DMSG(a) (a) # define DMSG(a) LOG_DEBUG(a)
#else #else
# define DMSG(a) # define DMSG(a)
#endif #endif
@ -60,6 +59,7 @@ static string idToString(const set<uint64_t> &iId);
class State class State
{ {
DEFINE_LOGGER();
public: public:
State(const set<uint64_t> iId) : m_id(iId) { init(); } State(const set<uint64_t> iId) : m_id(iId) { init(); }
State(uint64_t iId) State(uint64_t iId)
@ -88,16 +88,19 @@ private:
m_accept = false; m_accept = false;
id_static = 0; id_static = 0;
memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS); memset(m_next, 0, sizeof(State*) * MAX_TRANSITION_LETTERS);
DMSG(printf("** state %s creation\n", idToString(m_id).c_str())); DMSG("** state " << idToString(m_id) << " creation");
} }
}; };
INIT_LOGGER(dic, State);
/* ************************************************** * /* ************************************************** *
Helper class, allowing to build a NFA, then a DFA Helper class, allowing to build a NFA, then a DFA
* ************************************************** */ * ************************************************** */
class AutomatonHelper class AutomatonHelper
{ {
DEFINE_LOGGER();
public: public:
AutomatonHelper(State * iInitState); AutomatonHelper(State * iInitState);
~AutomatonHelper(); ~AutomatonHelper();
@ -127,6 +130,7 @@ private:
const searchRegExpLists &iList) const; const searchRegExpLists &iList) const;
}; };
INIT_LOGGER(dic, AutomatonHelper);
/* ************************************************** * /* ************************************************** *
Definition of the Automaton class Definition of the Automaton class
@ -136,16 +140,22 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS,
const searchRegExpLists &iList) const searchRegExpLists &iList)
{ {
AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS); AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
DMSG(printf("\n non deterministic automaton OK \n\n")); DMSG("Non deterministic automaton OK");
DMSG(nfa->dump("auto_nfa")); #ifdef DEBUG_AUTOMATON
nfa->dump("auto_nfa");
#endif
AutomatonHelper *dfa = AutomatonHelper::nfa2dfa(*nfa, iList); AutomatonHelper *dfa = AutomatonHelper::nfa2dfa(*nfa, iList);
DMSG(printf("\n deterministic automaton OK \n\n")); DMSG("Deterministic automaton OK");
DMSG(dfa->dump("auto_dfa")); #ifdef DEBUG_AUTOMATON
dfa->dump("auto_dfa");
#endif
finalize(*dfa); finalize(*dfa);
DMSG(printf("\n final automaton OK \n\n")); DMSG("Final automaton OK");
DMSG(dump("auto_fin")); #ifdef DEBUG_AUTOMATON
dump("auto_fin");
#endif
delete nfa; delete nfa;
delete dfa; delete dfa;
@ -226,7 +236,7 @@ void Automaton::dump(const string &iFileName) const
if (m_transitions[i][l]) if (m_transitions[i][l])
{ {
fprintf(f, "\t%d -> %d [label = \"", i, m_transitions[i][l]); fprintf(f, "\t%d -> %d [label = \"", i, m_transitions[i][l]);
regexp_print_letter(f, l); fprintf(f, "%s", regexpPrintLetter(l).c_str());
fprintf(f, "\"];\n"); fprintf(f, "\"];\n");
} }
} }
@ -274,7 +284,7 @@ AutomatonHelper::~AutomatonHelper()
void AutomatonHelper::addState(State * s) void AutomatonHelper::addState(State * s)
{ {
m_states.push_front(s); m_states.push_front(s);
DMSG(printf("** state %s added to automaton\n", idToString(s->getId()).c_str())); DMSG("** state " << idToString(s->getId()) << " added to automaton");
} }
@ -286,7 +296,6 @@ State * AutomatonHelper::getState(const set<uint64_t> &iId) const
State * s = *it; State * s = *it;
if (s->getId() == iId) if (s->getId() == iId)
{ {
//DMSG(printf("** get state %s ok\n", idToString(s->getId()).c_str()));
return s; return s;
} }
} }
@ -315,7 +324,7 @@ AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint6
{ {
current_state = L.front(); current_state = L.front();
L.pop_front(); L.pop_front();
DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str())); DMSG("** current state = " << idToString(current_state->getId()));
memset(used_letter, 0, sizeof(used_letter)); memset(used_letter, 0, sizeof(used_letter));
/* 3: \foreach l in \sigma | l \neq # */ /* 3: \foreach l in \sigma | l \neq # */
for (uint32_t p = 1; p < maxpos; p++) for (uint32_t p = 1; p < maxpos; p++)
@ -377,7 +386,7 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
set<uint64_t> t; set<uint64_t> t;
t.insert(*it); t.insert(*it);
State *y = getState(t); State *y = getState(t);
assert(y != NULL); ASSERT(y != NULL, "Invalid state");
set<uint64_t> Ry; /* Ry = \empty */ set<uint64_t> Ry; /* Ry = \empty */
@ -402,10 +411,8 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
{ {
if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL) if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL)
{ {
DMSG(printf("*** letter ")); DMSG("*** letter " << regexpPrintLetter(letter)
DMSG(regexp_print_letter(stdout, letter)); << " is in " << regexpPrintLetter(i));
DMSG(printf("is in "));
DMSG(regexp_print_letter(stdout, i));
r = getSuccessor(z->getId(), RE_EPSILON, iList); r = getSuccessor(z->getId(), RE_EPSILON, iList);
Ry.insert(r.begin(), r.end()); Ry.insert(r.begin(), r.end());
@ -423,28 +430,25 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
void AutomatonHelper::setAccept(State * s) const void AutomatonHelper::setAccept(State * s) const
{ {
DMSG(printf("=== setting accept for node (%s) :", idToString(s->getId()).c_str())); DMSG("=== setting accept for node (" << idToString(s->getId()) << ")");
list<State *>::const_iterator it; list<State *>::const_iterator it;
for (it = m_states.begin(); it != m_states.end(); it++) for (it = m_states.begin(); it != m_states.end(); it++)
{ {
State * ns = *it; const State * ns = *it;
uint64_t idx = *(ns->getId().begin()); uint64_t idx = *(ns->getId().begin());
DMSG(printf("%s ", idToString(ns->getId()).c_str()));
if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), idx) != s->getId().end())) if (ns->m_accept && (std::find(s->getId().begin(), s->getId().end(), idx) != s->getId().end()))
{ {
DMSG(printf("(ok) ")); DMSG(" --> " << idToString(ns->getId()));
s->m_accept = true; s->m_accept = true;
break;
} }
} }
DMSG(printf("\n"));
} }
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa, AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
const searchRegExpLists &iList) const searchRegExpLists &iList)
{ {
State * current_state;
list<State *> L; list<State *> L;
// Clone the list // Clone the list
@ -454,25 +458,20 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
L.push_front(temp_state); L.push_front(temp_state);
while (! L.empty()) while (! L.empty())
{ {
current_state = L.front(); State * current_state = L.front();
L.pop_front(); L.pop_front();
DMSG(printf("** current state = %s\n", idToString(current_state->getId()).c_str())); DMSG("** current state = " << idToString(current_state->getId()));
for (int letter = 1; letter < DIC_LETTERS; letter++) for (int letter = 1; letter < DIC_LETTERS; letter++)
{ {
// DMSG(printf("*** start successor of %s\n", idToString(current_state->getId()).c_str()));
set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), letter, iList); set<uint64_t> temp_id = iNfa.getSuccessor(current_state->getId(), letter, iList);
if (! temp_id.empty()) if (! temp_id.empty())
{ {
DMSG(printf("*** successor of %s for ", idToString(current_state->getId()).c_str())); DMSG("*** successor of " << idToString(current_state->getId())
DMSG(regexp_print_letter(stdout, letter)); << " for " << regexpPrintLetter(letter) << " = " << idToString(temp_id));
DMSG(printf(" = %s\n", idToString(temp_id).c_str()));
temp_state = dfa->getState(temp_id); temp_state = dfa->getState(temp_id);
// DMSG(printf("*** automaton get state -%s- ok\n", idToString(temp_id).c_str()));
if (temp_state == NULL) if (temp_state == NULL)
{ {
temp_state = new State(temp_id); temp_state = new State(temp_id);
@ -499,15 +498,13 @@ AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
static string idToString(const set<uint64_t> &iId) static string idToString(const set<uint64_t> &iId)
{ {
string s; ostringstream oss;
set<uint64_t>::const_iterator it; set<uint64_t>::const_iterator it;
for (it = iId.begin(); it != iId.end(); it++) for (it = iId.begin(); it != iId.end(); it++)
{ {
char tmp[50]; oss << *it << ' ';
sprintf(tmp, "%llu ", *it);
s += tmp;
} }
return s; return oss.str();
} }
@ -545,7 +542,7 @@ void AutomatonHelper::printEdges(FILE* f) const
{ {
fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str()); fprintf(f, "\t\"%s\" -> ", idToString(s->getId()).c_str());
fprintf(f, "\"%s\" [label = \"", idToString(s->m_next[letter]->getId()).c_str()); fprintf(f, "\"%s\" [label = \"", idToString(s->m_next[letter]->getId()).c_str());
regexp_print_letter(f, letter); fprintf(f, "%s", regexpPrintLetter(letter).c_str());
fprintf(f, "\"];\n"); fprintf(f, "\"];\n");
} }
} }

View file

@ -20,6 +20,8 @@
#include "config.h" #include "config.h"
#include <boost/format.hpp>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
@ -31,6 +33,8 @@
#include "dic.h" #include "dic.h"
#include "regexp.h" #include "regexp.h"
using boost::format;
Node::Node(int type, char v, Node *fg, Node *fd) Node::Node(int type, char v, Node *fg, Node *fd)
: m_type(type), m_var(v), m_fg(fg), m_fd(fd), m_number(0), m_position(0), : m_type(type), m_var(v), m_fg(fg), m_fd(fd), m_number(0), m_position(0),
@ -169,45 +173,19 @@ void regexp_print_ptl(int ptl[])
#endif #endif
void regexp_print_letter(FILE* f, char l) string regexpPrintLetter(char l)
{ {
switch (l) if (l == RE_EPSILON) return (format("( & [%1%])") % l).str();
{ if (l == RE_FINAL_TOK) return (format("( # [%1%])") % l).str();
case RE_EPSILON: fprintf(f, "( & [%d])", l); break; if (l == RE_ALL_MATCH) return (format("( . [%1%])") % l).str();
case RE_FINAL_TOK: fprintf(f, "( # [%d])", l); break; if (l == RE_VOWL_MATCH) return (format("(:v: [%1%])") % l).str();
case RE_ALL_MATCH: fprintf(f, "( . [%d])", l); break; if (l == RE_CONS_MATCH) return (format("(:c: [%1%])") % l).str();
case RE_VOWL_MATCH: fprintf(f, "(:v: [%d])", l); break; if (l == RE_USR1_MATCH) return (format("(:1: [%1%])") % l).str();
case RE_CONS_MATCH: fprintf(f, "(:c: [%d])", l); break; if (l == RE_USR2_MATCH) return (format("(:2: [%1%])") % l).str();
case RE_USR1_MATCH: fprintf(f, "(:1: [%d])", l); break;
case RE_USR2_MATCH: fprintf(f, "(:2: [%d])", l); break;
default:
if (l < RE_FINAL_TOK) if (l < RE_FINAL_TOK)
fprintf(f, " (%c [%d]) ", l + 'a' - 1, l); return (format("(%1% [%2%])") % (char)(l + 'a' - 1) % (int)l).str();
else else
fprintf(f, " (liste %d)", l - RE_LIST_USER_END); return (format("(liste %1%)") % (l - RE_LIST_USER_END)).str();
break;
}
}
void regexp_print_letter2(FILE* f, char l)
{
switch (l)
{
case RE_EPSILON: fprintf(f, "&"); break;
case RE_FINAL_TOK: fprintf(f, "#"); break;
case RE_ALL_MATCH: fprintf(f, "."); break;
case RE_VOWL_MATCH: fprintf(f, ":v:"); break;
case RE_CONS_MATCH: fprintf(f, ":c:"); break;
case RE_USR1_MATCH: fprintf(f, ":1:"); break;
case RE_USR2_MATCH: fprintf(f, ":2:"); break;
default:
if (l < RE_FINAL_TOK)
fprintf(f, "%c", l + 'a' - 1);
else
fprintf(f, "l%d", l - RE_LIST_USER_END);
break;
}
} }
@ -217,7 +195,7 @@ void Node::printNode(FILE* f, int detail) const
switch (m_type) switch (m_type)
{ {
case NODE_VAR: case NODE_VAR:
regexp_print_letter(f, m_var); fprintf(f, "%s", regexpPrintLetter(m_var).c_str());
break; break;
case NODE_OR: case NODE_OR:
fprintf(f, "OR"); fprintf(f, "OR");

View file

@ -144,8 +144,7 @@ struct searchRegExpLists
#include <cstdio> #include <cstdio>
void regexp_print_letter(FILE* f, char l); string regexpPrintLetter(char l);
void regexp_print_letter2(FILE* f, char l);
void regexp_print_PS(int PS[]); void regexp_print_PS(int PS[]);
void regexp_print_ptl(int ptl[]); void regexp_print_ptl(int ptl[]);