Added consts, renamed a class, and simplified the code using vectors

This commit is contained in:
Olivier Teulière 2008-07-27 15:28:50 +00:00
parent fa493d4135
commit 2b161abd2c
6 changed files with 77 additions and 90 deletions

View file

@ -18,13 +18,6 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/**
* \file automaton.c
* \brief (Non)Deterministic Finite AutomatonHelper for Regexp
* \author Antoine Fraboulet
* \date 2005
*/
#include "config.h"
#include <set>
@ -79,6 +72,11 @@ public:
State * m_next[MAX_TRANSITION_LETTERS];
private:
/**
* Id of the state. For the first automaton, each ID contains only 1
* integer, but the ID of the deterministic automaton will contain
* several integers, according to the usual "determinization" algorithm.
*/
set<uint64_t> m_id;
void init()
@ -107,7 +105,7 @@ public:
static AutomatonHelper *ps2nfa(uint64_t iInitState, int *ptl, uint64_t *PS);
static AutomatonHelper *nfa2dfa(const AutomatonHelper &iNfa,
struct search_RegE_list_t *iList);
const searchRegExpLists &iList);
/// List of states
list<State *> m_states;
@ -121,7 +119,8 @@ private:
void printNodes(FILE* f) const;
void printEdges(FILE* f) const;
void setAccept(State * s) const;
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct search_RegE_list_t *iList) const;
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter,
const searchRegExpLists &iList) const;
};
@ -129,7 +128,8 @@ private:
Definition of the Automaton class
* ************************************************** */
Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_RegE_list_t *iList)
Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS,
const searchRegExpLists &iList)
{
AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
DMSG(printf("\n non deterministic automaton OK \n\n"));
@ -151,7 +151,7 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_
Automaton::~Automaton()
{
delete[] m_acceptors;
for (int i = 0; i <= m_nbStates; i++)
for (unsigned int i = 0; i <= m_nbStates; i++)
{
delete[] m_transitions[i];
}
@ -166,7 +166,7 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
m_acceptors = new bool[m_nbStates + 1];
memset(m_acceptors, 0, (m_nbStates + 1) * sizeof(bool));
m_transitions = new int*[m_nbStates + 1];
for (int i = 0; i <= m_nbStates; i++)
for (unsigned int i = 0; i <= m_nbStates; i++)
{
m_transitions[i] = new int[MAX_TRANSITION_LETTERS];
memset(m_transitions[i], 0, MAX_TRANSITION_LETTERS * sizeof(int));
@ -205,7 +205,7 @@ void Automaton::dump(const string &iFileName) const
{
FILE *f = fopen(iFileName.c_str(), "w");
fprintf(f, "digraph automaton {\n");
for (int i = 1; i <= m_nbStates; i++)
for (unsigned int i = 1; i <= m_nbStates; i++)
{
fprintf(f, "\t%d [label = \"%d\"", i, i);
if (i == m_init)
@ -215,7 +215,7 @@ void Automaton::dump(const string &iFileName) const
fprintf(f, "];\n");
}
fprintf(f, "\n");
for (int i = 1; i <= m_nbStates; i++)
for (unsigned int i = 1; i <= m_nbStates; i++)
{
for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
{
@ -363,7 +363,7 @@ AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint6
set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
int letter,
struct search_RegE_list_t *iList) const
const searchRegExpLists &iList) const
{
set<uint64_t> R, r;
set<uint64_t>::const_iterator it;
@ -394,26 +394,23 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
if (letter < RE_FINAL_TOK)
{
for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
for (unsigned int i = 0; i < iList.symbl.size(); i++)
{
if (iList->valid[i])
if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL)
{
if (iList->letters[i][letter] && (z = y->m_next[(int)iList->symbl[i]]) != NULL)
{
DMSG(printf("*** letter "));
DMSG(regexp_print_letter(stdout, letter));
DMSG(printf("is in "));
DMSG(regexp_print_letter(stdout, i));
DMSG(printf("*** letter "));
DMSG(regexp_print_letter(stdout, letter));
DMSG(printf("is in "));
DMSG(regexp_print_letter(stdout, i));
r = getSuccessor(z->getId(), RE_EPSILON, iList);
Ry.insert(r.begin(), r.end());
Ry.insert(z->getId().begin(), z->getId().end());
}
r = getSuccessor(z->getId(), RE_EPSILON, iList);
Ry.insert(r.begin(), r.end());
Ry.insert(z->getId().begin(), z->getId().end());
}
}
}
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */
}
return R;
@ -440,7 +437,7 @@ void AutomatonHelper::setAccept(State * s) const
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
struct search_RegE_list_t *iList)
const searchRegExpLists &iList)
{
State * current_state;

View file

@ -18,17 +18,11 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/**
* \file automaton.h
* \brief (Non)Deterministic Finite Automaton for Regexp
* \author Antoine Fraboulet
* \date 2005
*/
#ifndef _DIC_AUTOMATON_H_
#define _DIC_AUTOMATON_H_
class AutomatonHelper;
struct searchRegExpLists;
class Automaton
{
@ -38,7 +32,8 @@ public:
* Build a static deterministic finite automaton from
* "init_state", "ptl" and "PS" given by the parser
*/
Automaton(uint64_t init_state, int *ptl, uint64_t *PS, struct search_RegE_list_t *iList);
Automaton(uint64_t init_state, int *ptl, uint64_t *PS,
const searchRegExpLists &iList);
/// Destructor
~Automaton();
@ -77,10 +72,10 @@ public:
private:
/// Number of states
int m_nbStates;
unsigned int m_nbStates;
/// ID of the init state
int m_init;
uint64_t m_init;
/// Array of booleans, one for each state
bool *m_acceptors;

View file

@ -499,18 +499,27 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
}
static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t &iList)
/**
* Initialize the lists of letters with pre-defined lists
* 0: all tiles
* 1: vowels
* 2: consonants
* 3: user defined 1
* 4: user defined 2
* x: lists used during parsing
*/
static void initLetterLists(const Dictionary &iDic,
searchRegExpLists &iList)
{
memset(&iList, 0, sizeof(iList));
// Prepare the space for 5 items
iList.symbl.assign(5, 0);
iList.letters.assign(5, vector<bool>(DIC_LETTERS, false));
iList.valid[0] = true; // all letters
iList.symbl[0] = RE_ALL_MATCH;
iList.valid[1] = true; // vowels
iList.symbl[1] = RE_VOWL_MATCH;
iList.valid[2] = true; // consonants
iList.symbl[2] = RE_CONS_MATCH;
iList.symbl[0] = RE_ALL_MATCH; // All letters
iList.symbl[1] = RE_VOWL_MATCH; // Vowels
iList.symbl[2] = RE_CONS_MATCH; // Consonants
iList.letters[0][0] = false;
iList.letters[1][0] = false;
iList.letters[2][0] = false;
@ -522,10 +531,8 @@ static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t
iList.letters[2][i] = iDic.getHeader().isConsonant(i);
}
iList.valid[3] = false; // user defined list 1
iList.symbl[3] = RE_USR1_MATCH;
iList.valid[4] = false; // user defined list 2
iList.symbl[4] = RE_USR2_MATCH;
iList.symbl[3] = RE_USR1_MATCH; // User defined list 1
iList.symbl[4] = RE_USR2_MATCH; // User defined list 2
}
@ -546,9 +553,10 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
// Parsing
Node *root = NULL;
struct search_RegE_list_t llist;
init_letter_lists(*this, llist);
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, &llist);
searchRegExpLists llist;
// Initialize the lists of letters
initLetterLists(*this, llist);
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, llist);
if (!parsingOk)
{
@ -574,7 +582,7 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
root->nextPos(PS);
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, llist);
if (a)
{
struct params_regexp_t params;

View file

@ -124,7 +124,7 @@ struct RegexpGrammar : grammar<RegexpGrammar>
void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
struct search_RegE_list_t *iList, bool negate = false)
searchRegExpLists &iList, bool negate = false)
{
if (i->value.id() == RegexpGrammar::alphavarId)
{
@ -146,24 +146,17 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
// The dictionary letters are already in upper case
const wstring &letters = iHeader.getLetters();
wstring::const_iterator itLetter;
int j;
for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; ++j)
// j is the index of the new list we create
size_t j = iList.symbl.size();
iList.symbl.push_back(RE_ALL_MATCH + j);
iList.letters.push_back(vector<bool>(DIC_LETTERS, false));
for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
{
if (!iList->valid[j])
{
iList->valid[j] = true;
iList->symbl.push_back(RE_ALL_MATCH + j);
iList->letters[j][0] = false;
for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
{
bool contains = (choiceLetters.find(*itLetter) != string::npos);
iList->letters[j][iHeader.getCodeFromChar(*itLetter)] =
(contains ? !negate : negate);
}
break;
}
bool contains = (choiceLetters.find(*itLetter) != string::npos);
iList.letters[j][iHeader.getCodeFromChar(*itLetter)] =
(contains ? !negate : negate);
}
Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
Node *node = new Node(NODE_VAR, iList.symbl[j], NULL, NULL);
evalStack.push(node);
}
else if (i->value.id() == RegexpGrammar::varId)
@ -279,7 +272,8 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
}
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList)
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root,
searchRegExpLists &iList)
{
// Create a grammar object
RegexpGrammar g(iDic.getHeader().getLetters());

View file

@ -23,9 +23,12 @@
class Dictionary;
class Node;
struct search_RegE_list_t;
class searchRegExpLists;
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList);
bool parseRegexp(const Dictionary &iDic,
const wchar_t *input,
Node **root,
searchRegExpLists &iList);
#endif

View file

@ -127,30 +127,20 @@ private:
#define RE_USR1_MATCH (DIC_LETTERS + 5)
#define RE_USR2_MATCH (DIC_LETTERS + 6)
/**
* number of lists for regexp letter match \n
* 0 : all tiles \n
* 1 : vowels \n
* 2 : consonants \n
* 3 : user defined 1 \n
* 4 : user defined 2 \n
* x : lists used during parsing \n
*/
#define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
/**
* Structure used for dic.searchRegExp
* This structure is used to explicit letters list that will be matched
* against special tokens in the regular expression search
*/
struct search_RegE_list_t
struct searchRegExpLists
{
/** special symbol associated with the list */
vector<char> symbl;
/** 0 or 1 if list is valid */
bool valid[DIC_SEARCH_REGE_LIST];
/** 0 or 1 if letter is present in the list */
bool letters[DIC_SEARCH_REGE_LIST][DIC_LETTERS];
/**
* 0 or 1 if letter is present in the list.
* The inner vector should have a length of DIC_LETTERS (it is a bitmask)
*/
vector<vector<bool> > letters;
};
#define RE_LIST_ALL_MATCH 0