Added consts, renamed a class, and simplified the code using vectors

This commit is contained in:
Olivier Teulière 2008-07-27 15:28:50 +00:00
parent fa493d4135
commit 2b161abd2c
6 changed files with 77 additions and 90 deletions

View file

@ -18,13 +18,6 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/ *****************************************************************************/
/**
* \file automaton.c
* \brief (Non)Deterministic Finite AutomatonHelper for Regexp
* \author Antoine Fraboulet
* \date 2005
*/
#include "config.h" #include "config.h"
#include <set> #include <set>
@ -79,6 +72,11 @@ public:
State * m_next[MAX_TRANSITION_LETTERS]; State * m_next[MAX_TRANSITION_LETTERS];
private: private:
/**
* Id of the state. For the first automaton, each ID contains only 1
* integer, but the ID of the deterministic automaton will contain
* several integers, according to the usual "determinization" algorithm.
*/
set<uint64_t> m_id; set<uint64_t> m_id;
void init() void init()
@ -107,7 +105,7 @@ public:
static AutomatonHelper *ps2nfa(uint64_t iInitState, int *ptl, uint64_t *PS); static AutomatonHelper *ps2nfa(uint64_t iInitState, int *ptl, uint64_t *PS);
static AutomatonHelper *nfa2dfa(const AutomatonHelper &iNfa, static AutomatonHelper *nfa2dfa(const AutomatonHelper &iNfa,
struct search_RegE_list_t *iList); const searchRegExpLists &iList);
/// List of states /// List of states
list<State *> m_states; list<State *> m_states;
@ -121,7 +119,8 @@ private:
void printNodes(FILE* f) const; void printNodes(FILE* f) const;
void printEdges(FILE* f) const; void printEdges(FILE* f) const;
void setAccept(State * s) const; void setAccept(State * s) const;
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct search_RegE_list_t *iList) const; set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter,
const searchRegExpLists &iList) const;
}; };
@ -129,7 +128,8 @@ private:
Definition of the Automaton class Definition of the Automaton class
* ************************************************** */ * ************************************************** */
Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_RegE_list_t *iList) Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS,
const searchRegExpLists &iList)
{ {
AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS); AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
DMSG(printf("\n non deterministic automaton OK \n\n")); DMSG(printf("\n non deterministic automaton OK \n\n"));
@ -151,7 +151,7 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_
Automaton::~Automaton() Automaton::~Automaton()
{ {
delete[] m_acceptors; delete[] m_acceptors;
for (int i = 0; i <= m_nbStates; i++) for (unsigned int i = 0; i <= m_nbStates; i++)
{ {
delete[] m_transitions[i]; delete[] m_transitions[i];
} }
@ -166,7 +166,7 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
m_acceptors = new bool[m_nbStates + 1]; m_acceptors = new bool[m_nbStates + 1];
memset(m_acceptors, 0, (m_nbStates + 1) * sizeof(bool)); memset(m_acceptors, 0, (m_nbStates + 1) * sizeof(bool));
m_transitions = new int*[m_nbStates + 1]; m_transitions = new int*[m_nbStates + 1];
for (int i = 0; i <= m_nbStates; i++) for (unsigned int i = 0; i <= m_nbStates; i++)
{ {
m_transitions[i] = new int[MAX_TRANSITION_LETTERS]; m_transitions[i] = new int[MAX_TRANSITION_LETTERS];
memset(m_transitions[i], 0, MAX_TRANSITION_LETTERS * sizeof(int)); memset(m_transitions[i], 0, MAX_TRANSITION_LETTERS * sizeof(int));
@ -205,7 +205,7 @@ void Automaton::dump(const string &iFileName) const
{ {
FILE *f = fopen(iFileName.c_str(), "w"); FILE *f = fopen(iFileName.c_str(), "w");
fprintf(f, "digraph automaton {\n"); fprintf(f, "digraph automaton {\n");
for (int i = 1; i <= m_nbStates; i++) for (unsigned int i = 1; i <= m_nbStates; i++)
{ {
fprintf(f, "\t%d [label = \"%d\"", i, i); fprintf(f, "\t%d [label = \"%d\"", i, i);
if (i == m_init) if (i == m_init)
@ -215,7 +215,7 @@ void Automaton::dump(const string &iFileName) const
fprintf(f, "];\n"); fprintf(f, "];\n");
} }
fprintf(f, "\n"); fprintf(f, "\n");
for (int i = 1; i <= m_nbStates; i++) for (unsigned int i = 1; i <= m_nbStates; i++)
{ {
for (int l = 0; l < MAX_TRANSITION_LETTERS; l++) for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
{ {
@ -363,7 +363,7 @@ AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint6
set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S, set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
int letter, int letter,
struct search_RegE_list_t *iList) const const searchRegExpLists &iList) const
{ {
set<uint64_t> R, r; set<uint64_t> R, r;
set<uint64_t>::const_iterator it; set<uint64_t>::const_iterator it;
@ -394,11 +394,9 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
if (letter < RE_FINAL_TOK) if (letter < RE_FINAL_TOK)
{ {
for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++) for (unsigned int i = 0; i < iList.symbl.size(); i++)
{ {
if (iList->valid[i]) if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL)
{
if (iList->letters[i][letter] && (z = y->m_next[(int)iList->symbl[i]]) != NULL)
{ {
DMSG(printf("*** letter ")); DMSG(printf("*** letter "));
DMSG(regexp_print_letter(stdout, letter)); DMSG(regexp_print_letter(stdout, letter));
@ -411,7 +409,6 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
} }
} }
} }
}
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */ R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */
} }
@ -440,7 +437,7 @@ void AutomatonHelper::setAccept(State * s) const
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa, AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
struct search_RegE_list_t *iList) const searchRegExpLists &iList)
{ {
State * current_state; State * current_state;

View file

@ -18,17 +18,11 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/ *****************************************************************************/
/**
* \file automaton.h
* \brief (Non)Deterministic Finite Automaton for Regexp
* \author Antoine Fraboulet
* \date 2005
*/
#ifndef _DIC_AUTOMATON_H_ #ifndef _DIC_AUTOMATON_H_
#define _DIC_AUTOMATON_H_ #define _DIC_AUTOMATON_H_
class AutomatonHelper; class AutomatonHelper;
struct searchRegExpLists;
class Automaton class Automaton
{ {
@ -38,7 +32,8 @@ public:
* Build a static deterministic finite automaton from * Build a static deterministic finite automaton from
* "init_state", "ptl" and "PS" given by the parser * "init_state", "ptl" and "PS" given by the parser
*/ */
Automaton(uint64_t init_state, int *ptl, uint64_t *PS, struct search_RegE_list_t *iList); Automaton(uint64_t init_state, int *ptl, uint64_t *PS,
const searchRegExpLists &iList);
/// Destructor /// Destructor
~Automaton(); ~Automaton();
@ -77,10 +72,10 @@ public:
private: private:
/// Number of states /// Number of states
int m_nbStates; unsigned int m_nbStates;
/// ID of the init state /// ID of the init state
int m_init; uint64_t m_init;
/// Array of booleans, one for each state /// Array of booleans, one for each state
bool *m_acceptors; bool *m_acceptors;

View file

@ -499,18 +499,27 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
} }
static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t &iList) /**
* Initialize the lists of letters with pre-defined lists
* 0: all tiles
* 1: vowels
* 2: consonants
* 3: user defined 1
* 4: user defined 2
* x: lists used during parsing
*/
static void initLetterLists(const Dictionary &iDic,
searchRegExpLists &iList)
{ {
memset(&iList, 0, sizeof(iList)); memset(&iList, 0, sizeof(iList));
// Prepare the space for 5 items // Prepare the space for 5 items
iList.symbl.assign(5, 0); iList.symbl.assign(5, 0);
iList.letters.assign(5, vector<bool>(DIC_LETTERS, false));
iList.valid[0] = true; // all letters iList.symbl[0] = RE_ALL_MATCH; // All letters
iList.symbl[0] = RE_ALL_MATCH; iList.symbl[1] = RE_VOWL_MATCH; // Vowels
iList.valid[1] = true; // vowels iList.symbl[2] = RE_CONS_MATCH; // Consonants
iList.symbl[1] = RE_VOWL_MATCH;
iList.valid[2] = true; // consonants
iList.symbl[2] = RE_CONS_MATCH;
iList.letters[0][0] = false; iList.letters[0][0] = false;
iList.letters[1][0] = false; iList.letters[1][0] = false;
iList.letters[2][0] = false; iList.letters[2][0] = false;
@ -522,10 +531,8 @@ static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t
iList.letters[2][i] = iDic.getHeader().isConsonant(i); iList.letters[2][i] = iDic.getHeader().isConsonant(i);
} }
iList.valid[3] = false; // user defined list 1 iList.symbl[3] = RE_USR1_MATCH; // User defined list 1
iList.symbl[3] = RE_USR1_MATCH; iList.symbl[4] = RE_USR2_MATCH; // User defined list 2
iList.valid[4] = false; // user defined list 2
iList.symbl[4] = RE_USR2_MATCH;
} }
@ -546,9 +553,10 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
// Parsing // Parsing
Node *root = NULL; Node *root = NULL;
struct search_RegE_list_t llist; searchRegExpLists llist;
init_letter_lists(*this, llist); // Initialize the lists of letters
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, &llist); initLetterLists(*this, llist);
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, llist);
if (!parsingOk) if (!parsingOk)
{ {
@ -574,7 +582,7 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
root->nextPos(PS); root->nextPos(PS);
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist); Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, llist);
if (a) if (a)
{ {
struct params_regexp_t params; struct params_regexp_t params;

View file

@ -124,7 +124,7 @@ struct RegexpGrammar : grammar<RegexpGrammar>
void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack, void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
struct search_RegE_list_t *iList, bool negate = false) searchRegExpLists &iList, bool negate = false)
{ {
if (i->value.id() == RegexpGrammar::alphavarId) if (i->value.id() == RegexpGrammar::alphavarId)
{ {
@ -146,24 +146,17 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
// The dictionary letters are already in upper case // The dictionary letters are already in upper case
const wstring &letters = iHeader.getLetters(); const wstring &letters = iHeader.getLetters();
wstring::const_iterator itLetter; wstring::const_iterator itLetter;
int j; // j is the index of the new list we create
for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; ++j) size_t j = iList.symbl.size();
{ iList.symbl.push_back(RE_ALL_MATCH + j);
if (!iList->valid[j]) iList.letters.push_back(vector<bool>(DIC_LETTERS, false));
{
iList->valid[j] = true;
iList->symbl.push_back(RE_ALL_MATCH + j);
iList->letters[j][0] = false;
for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter) for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
{ {
bool contains = (choiceLetters.find(*itLetter) != string::npos); bool contains = (choiceLetters.find(*itLetter) != string::npos);
iList->letters[j][iHeader.getCodeFromChar(*itLetter)] = iList.letters[j][iHeader.getCodeFromChar(*itLetter)] =
(contains ? !negate : negate); (contains ? !negate : negate);
} }
break; Node *node = new Node(NODE_VAR, iList.symbl[j], NULL, NULL);
}
}
Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
evalStack.push(node); evalStack.push(node);
} }
else if (i->value.id() == RegexpGrammar::varId) else if (i->value.id() == RegexpGrammar::varId)
@ -279,7 +272,8 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
} }
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList) bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root,
searchRegExpLists &iList)
{ {
// Create a grammar object // Create a grammar object
RegexpGrammar g(iDic.getHeader().getLetters()); RegexpGrammar g(iDic.getHeader().getLetters());

View file

@ -23,9 +23,12 @@
class Dictionary; class Dictionary;
class Node; class Node;
struct search_RegE_list_t; class searchRegExpLists;
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList); bool parseRegexp(const Dictionary &iDic,
const wchar_t *input,
Node **root,
searchRegExpLists &iList);
#endif #endif

View file

@ -127,30 +127,20 @@ private:
#define RE_USR1_MATCH (DIC_LETTERS + 5) #define RE_USR1_MATCH (DIC_LETTERS + 5)
#define RE_USR2_MATCH (DIC_LETTERS + 6) #define RE_USR2_MATCH (DIC_LETTERS + 6)
/**
* number of lists for regexp letter match \n
* 0 : all tiles \n
* 1 : vowels \n
* 2 : consonants \n
* 3 : user defined 1 \n
* 4 : user defined 2 \n
* x : lists used during parsing \n
*/
#define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
/** /**
* Structure used for dic.searchRegExp * Structure used for dic.searchRegExp
* This structure is used to explicit letters list that will be matched * This structure is used to explicit letters list that will be matched
* against special tokens in the regular expression search * against special tokens in the regular expression search
*/ */
struct search_RegE_list_t struct searchRegExpLists
{ {
/** special symbol associated with the list */ /** special symbol associated with the list */
vector<char> symbl; vector<char> symbl;
/** 0 or 1 if list is valid */ /**
bool valid[DIC_SEARCH_REGE_LIST]; * 0 or 1 if letter is present in the list.
/** 0 or 1 if letter is present in the list */ * The inner vector should have a length of DIC_LETTERS (it is a bitmask)
bool letters[DIC_SEARCH_REGE_LIST][DIC_LETTERS]; */
vector<vector<bool> > letters;
}; };
#define RE_LIST_ALL_MATCH 0 #define RE_LIST_ALL_MATCH 0