mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2024-12-25 21:59:30 +01:00
Added consts, renamed a class, and simplified the code using vectors
This commit is contained in:
parent
fa493d4135
commit
2b161abd2c
6 changed files with 77 additions and 90 deletions
|
@ -18,13 +18,6 @@
|
|||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*****************************************************************************/
|
||||
|
||||
/**
|
||||
* \file automaton.c
|
||||
* \brief (Non)Deterministic Finite AutomatonHelper for Regexp
|
||||
* \author Antoine Fraboulet
|
||||
* \date 2005
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <set>
|
||||
|
@ -79,6 +72,11 @@ public:
|
|||
State * m_next[MAX_TRANSITION_LETTERS];
|
||||
|
||||
private:
|
||||
/**
|
||||
* Id of the state. For the first automaton, each ID contains only 1
|
||||
* integer, but the ID of the deterministic automaton will contain
|
||||
* several integers, according to the usual "determinization" algorithm.
|
||||
*/
|
||||
set<uint64_t> m_id;
|
||||
|
||||
void init()
|
||||
|
@ -107,7 +105,7 @@ public:
|
|||
|
||||
static AutomatonHelper *ps2nfa(uint64_t iInitState, int *ptl, uint64_t *PS);
|
||||
static AutomatonHelper *nfa2dfa(const AutomatonHelper &iNfa,
|
||||
struct search_RegE_list_t *iList);
|
||||
const searchRegExpLists &iList);
|
||||
|
||||
/// List of states
|
||||
list<State *> m_states;
|
||||
|
@ -121,7 +119,8 @@ private:
|
|||
void printNodes(FILE* f) const;
|
||||
void printEdges(FILE* f) const;
|
||||
void setAccept(State * s) const;
|
||||
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter, struct search_RegE_list_t *iList) const;
|
||||
set<uint64_t> getSuccessor(const set<uint64_t> &S, int letter,
|
||||
const searchRegExpLists &iList) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -129,7 +128,8 @@ private:
|
|||
Definition of the Automaton class
|
||||
* ************************************************** */
|
||||
|
||||
Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_RegE_list_t *iList)
|
||||
Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS,
|
||||
const searchRegExpLists &iList)
|
||||
{
|
||||
AutomatonHelper *nfa = AutomatonHelper::ps2nfa(iInitState, ptl, PS);
|
||||
DMSG(printf("\n non deterministic automaton OK \n\n"));
|
||||
|
@ -151,7 +151,7 @@ Automaton::Automaton(uint64_t iInitState, int *ptl, uint64_t *PS, struct search_
|
|||
Automaton::~Automaton()
|
||||
{
|
||||
delete[] m_acceptors;
|
||||
for (int i = 0; i <= m_nbStates; i++)
|
||||
for (unsigned int i = 0; i <= m_nbStates; i++)
|
||||
{
|
||||
delete[] m_transitions[i];
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ void Automaton::finalize(const AutomatonHelper &iHelper)
|
|||
m_acceptors = new bool[m_nbStates + 1];
|
||||
memset(m_acceptors, 0, (m_nbStates + 1) * sizeof(bool));
|
||||
m_transitions = new int*[m_nbStates + 1];
|
||||
for (int i = 0; i <= m_nbStates; i++)
|
||||
for (unsigned int i = 0; i <= m_nbStates; i++)
|
||||
{
|
||||
m_transitions[i] = new int[MAX_TRANSITION_LETTERS];
|
||||
memset(m_transitions[i], 0, MAX_TRANSITION_LETTERS * sizeof(int));
|
||||
|
@ -205,7 +205,7 @@ void Automaton::dump(const string &iFileName) const
|
|||
{
|
||||
FILE *f = fopen(iFileName.c_str(), "w");
|
||||
fprintf(f, "digraph automaton {\n");
|
||||
for (int i = 1; i <= m_nbStates; i++)
|
||||
for (unsigned int i = 1; i <= m_nbStates; i++)
|
||||
{
|
||||
fprintf(f, "\t%d [label = \"%d\"", i, i);
|
||||
if (i == m_init)
|
||||
|
@ -215,7 +215,7 @@ void Automaton::dump(const string &iFileName) const
|
|||
fprintf(f, "];\n");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
for (int i = 1; i <= m_nbStates; i++)
|
||||
for (unsigned int i = 1; i <= m_nbStates; i++)
|
||||
{
|
||||
for (int l = 0; l < MAX_TRANSITION_LETTERS; l++)
|
||||
{
|
||||
|
@ -363,7 +363,7 @@ AutomatonHelper *AutomatonHelper::ps2nfa(uint64_t init_state_id, int *ptl, uint6
|
|||
|
||||
set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
||||
int letter,
|
||||
struct search_RegE_list_t *iList) const
|
||||
const searchRegExpLists &iList) const
|
||||
{
|
||||
set<uint64_t> R, r;
|
||||
set<uint64_t>::const_iterator it;
|
||||
|
@ -394,26 +394,23 @@ set<uint64_t> AutomatonHelper::getSuccessor(const set<uint64_t> &S,
|
|||
|
||||
if (letter < RE_FINAL_TOK)
|
||||
{
|
||||
for (int i = 0; i < DIC_SEARCH_REGE_LIST; i++)
|
||||
for (unsigned int i = 0; i < iList.symbl.size(); i++)
|
||||
{
|
||||
if (iList->valid[i])
|
||||
if (iList.letters[i][letter] && (z = y->m_next[(int)iList.symbl[i]]) != NULL)
|
||||
{
|
||||
if (iList->letters[i][letter] && (z = y->m_next[(int)iList->symbl[i]]) != NULL)
|
||||
{
|
||||
DMSG(printf("*** letter "));
|
||||
DMSG(regexp_print_letter(stdout, letter));
|
||||
DMSG(printf("is in "));
|
||||
DMSG(regexp_print_letter(stdout, i));
|
||||
DMSG(printf("*** letter "));
|
||||
DMSG(regexp_print_letter(stdout, letter));
|
||||
DMSG(printf("is in "));
|
||||
DMSG(regexp_print_letter(stdout, i));
|
||||
|
||||
r = getSuccessor(z->getId(), RE_EPSILON, iList);
|
||||
Ry.insert(r.begin(), r.end());
|
||||
Ry.insert(z->getId().begin(), z->getId().end());
|
||||
}
|
||||
r = getSuccessor(z->getId(), RE_EPSILON, iList);
|
||||
Ry.insert(r.begin(), r.end());
|
||||
Ry.insert(z->getId().begin(), z->getId().end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */
|
||||
R.insert(Ry.begin(), Ry.end()); /* R = R \cup Ry */
|
||||
}
|
||||
|
||||
return R;
|
||||
|
@ -440,7 +437,7 @@ void AutomatonHelper::setAccept(State * s) const
|
|||
|
||||
|
||||
AutomatonHelper *AutomatonHelper::nfa2dfa(const AutomatonHelper &iNfa,
|
||||
struct search_RegE_list_t *iList)
|
||||
const searchRegExpLists &iList)
|
||||
{
|
||||
State * current_state;
|
||||
|
||||
|
|
|
@ -18,17 +18,11 @@
|
|||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*****************************************************************************/
|
||||
|
||||
/**
|
||||
* \file automaton.h
|
||||
* \brief (Non)Deterministic Finite Automaton for Regexp
|
||||
* \author Antoine Fraboulet
|
||||
* \date 2005
|
||||
*/
|
||||
|
||||
#ifndef _DIC_AUTOMATON_H_
|
||||
#define _DIC_AUTOMATON_H_
|
||||
|
||||
class AutomatonHelper;
|
||||
struct searchRegExpLists;
|
||||
|
||||
class Automaton
|
||||
{
|
||||
|
@ -38,7 +32,8 @@ public:
|
|||
* Build a static deterministic finite automaton from
|
||||
* "init_state", "ptl" and "PS" given by the parser
|
||||
*/
|
||||
Automaton(uint64_t init_state, int *ptl, uint64_t *PS, struct search_RegE_list_t *iList);
|
||||
Automaton(uint64_t init_state, int *ptl, uint64_t *PS,
|
||||
const searchRegExpLists &iList);
|
||||
|
||||
/// Destructor
|
||||
~Automaton();
|
||||
|
@ -77,10 +72,10 @@ public:
|
|||
|
||||
private:
|
||||
/// Number of states
|
||||
int m_nbStates;
|
||||
unsigned int m_nbStates;
|
||||
|
||||
/// ID of the init state
|
||||
int m_init;
|
||||
uint64_t m_init;
|
||||
|
||||
/// Array of booleans, one for each state
|
||||
bool *m_acceptors;
|
||||
|
|
|
@ -499,18 +499,27 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
|
|||
}
|
||||
|
||||
|
||||
static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t &iList)
|
||||
/**
|
||||
* Initialize the lists of letters with pre-defined lists
|
||||
|
||||
* 0: all tiles
|
||||
* 1: vowels
|
||||
* 2: consonants
|
||||
* 3: user defined 1
|
||||
* 4: user defined 2
|
||||
* x: lists used during parsing
|
||||
*/
|
||||
static void initLetterLists(const Dictionary &iDic,
|
||||
searchRegExpLists &iList)
|
||||
{
|
||||
memset(&iList, 0, sizeof(iList));
|
||||
// Prepare the space for 5 items
|
||||
iList.symbl.assign(5, 0);
|
||||
iList.letters.assign(5, vector<bool>(DIC_LETTERS, false));
|
||||
|
||||
iList.valid[0] = true; // all letters
|
||||
iList.symbl[0] = RE_ALL_MATCH;
|
||||
iList.valid[1] = true; // vowels
|
||||
iList.symbl[1] = RE_VOWL_MATCH;
|
||||
iList.valid[2] = true; // consonants
|
||||
iList.symbl[2] = RE_CONS_MATCH;
|
||||
iList.symbl[0] = RE_ALL_MATCH; // All letters
|
||||
iList.symbl[1] = RE_VOWL_MATCH; // Vowels
|
||||
iList.symbl[2] = RE_CONS_MATCH; // Consonants
|
||||
iList.letters[0][0] = false;
|
||||
iList.letters[1][0] = false;
|
||||
iList.letters[2][0] = false;
|
||||
|
@ -522,10 +531,8 @@ static void init_letter_lists(const Dictionary &iDic, struct search_RegE_list_t
|
|||
iList.letters[2][i] = iDic.getHeader().isConsonant(i);
|
||||
}
|
||||
|
||||
iList.valid[3] = false; // user defined list 1
|
||||
iList.symbl[3] = RE_USR1_MATCH;
|
||||
iList.valid[4] = false; // user defined list 2
|
||||
iList.symbl[4] = RE_USR2_MATCH;
|
||||
iList.symbl[3] = RE_USR1_MATCH; // User defined list 1
|
||||
iList.symbl[4] = RE_USR2_MATCH; // User defined list 2
|
||||
}
|
||||
|
||||
|
||||
|
@ -546,9 +553,10 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
|
|||
|
||||
// Parsing
|
||||
Node *root = NULL;
|
||||
struct search_RegE_list_t llist;
|
||||
init_letter_lists(*this, llist);
|
||||
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, &llist);
|
||||
searchRegExpLists llist;
|
||||
// Initialize the lists of letters
|
||||
initLetterLists(*this, llist);
|
||||
bool parsingOk = parseRegexp(*this, (iRegexp + L"#").c_str(), &root, llist);
|
||||
|
||||
if (!parsingOk)
|
||||
{
|
||||
|
@ -574,7 +582,7 @@ void Dictionary::searchRegExp(const wstring &iRegexp,
|
|||
|
||||
root->nextPos(PS);
|
||||
|
||||
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, &llist);
|
||||
Automaton *a = new Automaton(root->getFirstPos(), ptl, PS, llist);
|
||||
if (a)
|
||||
{
|
||||
struct params_regexp_t params;
|
||||
|
|
|
@ -124,7 +124,7 @@ struct RegexpGrammar : grammar<RegexpGrammar>
|
|||
|
||||
|
||||
void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
|
||||
struct search_RegE_list_t *iList, bool negate = false)
|
||||
searchRegExpLists &iList, bool negate = false)
|
||||
{
|
||||
if (i->value.id() == RegexpGrammar::alphavarId)
|
||||
{
|
||||
|
@ -146,24 +146,17 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
|
|||
// The dictionary letters are already in upper case
|
||||
const wstring &letters = iHeader.getLetters();
|
||||
wstring::const_iterator itLetter;
|
||||
int j;
|
||||
for (j = RE_LIST_USER_END + 1; j < DIC_SEARCH_REGE_LIST; ++j)
|
||||
// j is the index of the new list we create
|
||||
size_t j = iList.symbl.size();
|
||||
iList.symbl.push_back(RE_ALL_MATCH + j);
|
||||
iList.letters.push_back(vector<bool>(DIC_LETTERS, false));
|
||||
for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
|
||||
{
|
||||
if (!iList->valid[j])
|
||||
{
|
||||
iList->valid[j] = true;
|
||||
iList->symbl.push_back(RE_ALL_MATCH + j);
|
||||
iList->letters[j][0] = false;
|
||||
for (itLetter = letters.begin(); itLetter != letters.end(); ++itLetter)
|
||||
{
|
||||
bool contains = (choiceLetters.find(*itLetter) != string::npos);
|
||||
iList->letters[j][iHeader.getCodeFromChar(*itLetter)] =
|
||||
(contains ? !negate : negate);
|
||||
}
|
||||
break;
|
||||
}
|
||||
bool contains = (choiceLetters.find(*itLetter) != string::npos);
|
||||
iList.letters[j][iHeader.getCodeFromChar(*itLetter)] =
|
||||
(contains ? !negate : negate);
|
||||
}
|
||||
Node *node = new Node(NODE_VAR, iList->symbl[j], NULL, NULL);
|
||||
Node *node = new Node(NODE_VAR, iList.symbl[j], NULL, NULL);
|
||||
evalStack.push(node);
|
||||
}
|
||||
else if (i->value.id() == RegexpGrammar::varId)
|
||||
|
@ -279,7 +272,8 @@ void evaluate(const Header &iHeader, iter_t const& i, stack<Node*> &evalStack,
|
|||
}
|
||||
|
||||
|
||||
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList)
|
||||
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root,
|
||||
searchRegExpLists &iList)
|
||||
{
|
||||
// Create a grammar object
|
||||
RegexpGrammar g(iDic.getHeader().getLetters());
|
||||
|
|
|
@ -23,9 +23,12 @@
|
|||
|
||||
class Dictionary;
|
||||
class Node;
|
||||
struct search_RegE_list_t;
|
||||
class searchRegExpLists;
|
||||
|
||||
bool parseRegexp(const Dictionary &iDic, const wchar_t *input, Node **root, struct search_RegE_list_t *iList);
|
||||
bool parseRegexp(const Dictionary &iDic,
|
||||
const wchar_t *input,
|
||||
Node **root,
|
||||
searchRegExpLists &iList);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
22
dic/regexp.h
22
dic/regexp.h
|
@ -127,30 +127,20 @@ private:
|
|||
#define RE_USR1_MATCH (DIC_LETTERS + 5)
|
||||
#define RE_USR2_MATCH (DIC_LETTERS + 6)
|
||||
|
||||
/**
|
||||
* number of lists for regexp letter match \n
|
||||
* 0 : all tiles \n
|
||||
* 1 : vowels \n
|
||||
* 2 : consonants \n
|
||||
* 3 : user defined 1 \n
|
||||
* 4 : user defined 2 \n
|
||||
* x : lists used during parsing \n
|
||||
*/
|
||||
#define DIC_SEARCH_REGE_LIST (REGEXP_MAX)
|
||||
|
||||
/**
|
||||
* Structure used for dic.searchRegExp
|
||||
* This structure is used to explicit letters list that will be matched
|
||||
* against special tokens in the regular expression search
|
||||
*/
|
||||
struct search_RegE_list_t
|
||||
struct searchRegExpLists
|
||||
{
|
||||
/** special symbol associated with the list */
|
||||
vector<char> symbl;
|
||||
/** 0 or 1 if list is valid */
|
||||
bool valid[DIC_SEARCH_REGE_LIST];
|
||||
/** 0 or 1 if letter is present in the list */
|
||||
bool letters[DIC_SEARCH_REGE_LIST][DIC_LETTERS];
|
||||
/**
|
||||
* 0 or 1 if letter is present in the list.
|
||||
* The inner vector should have a length of DIC_LETTERS (it is a bitmask)
|
||||
*/
|
||||
vector<vector<bool> > letters;
|
||||
};
|
||||
|
||||
#define RE_LIST_ALL_MATCH 0
|
||||
|
|
Loading…
Reference in a new issue