mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2024-12-27 09:58:08 +01:00
Removed support for the old dictionary format. The code becomes simpler and (very slightly) faster.
This commit is contained in:
parent
70be50c64e
commit
66538b4806
8 changed files with 195 additions and 432 deletions
54
dic/dic.cpp
54
dic/dic.cpp
|
@ -50,24 +50,6 @@
|
|||
const Dictionary *Dictionary::m_dic = NULL;
|
||||
|
||||
|
||||
// Note: duplicated in header.cpp
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
static uint32_t swap4(uint32_t v)
|
||||
{
|
||||
uint32_t r;
|
||||
uint8_t *pv = (uint8_t*)&v;
|
||||
uint8_t *pr = (uint8_t*)&r;
|
||||
|
||||
pr[0] = pv[3];
|
||||
pr[1] = pv[2];
|
||||
pr[2] = pv[1];
|
||||
pr[3] = pv[0];
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Dictionary::Dictionary(const string &iPath)
|
||||
: m_dawg(NULL)
|
||||
{
|
||||
|
@ -112,21 +94,9 @@ Dictionary::~Dictionary()
|
|||
|
||||
void Dictionary::convertDataToArch()
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
for (unsigned int i = 0; i < (m_header->getNbEdgesUsed() + 1); i++)
|
||||
{
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
for (unsigned int i = 0; i < (m_header->getNbEdgesUsed() + 1); i++)
|
||||
{
|
||||
m_dawg[i] = swap4(m_dawg[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int i = 0; i < (m_header->getNbEdgesUsed() + 1); i++)
|
||||
{
|
||||
m_dawg[i] = ntohl(m_dawg[i]);
|
||||
}
|
||||
m_dawg[i] = ntohl(m_dawg[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,10 +134,7 @@ dic_elt_t Dictionary::getNext(const dic_elt_t &e) const
|
|||
|
||||
dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->ptr;
|
||||
else
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->ptr;
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->ptr;
|
||||
}
|
||||
|
||||
|
||||
|
@ -179,10 +146,7 @@ dic_elt_t Dictionary::getRoot() const
|
|||
|
||||
dic_code_t Dictionary::getCode(const dic_elt_t &e) const
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->chr;
|
||||
else
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->chr;
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->chr;
|
||||
}
|
||||
|
||||
|
||||
|
@ -194,19 +158,13 @@ wchar_t Dictionary::getChar(const dic_elt_t &e) const
|
|||
|
||||
bool Dictionary::isLast(const dic_elt_t &e) const
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->last;
|
||||
else
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->last;
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->last;
|
||||
}
|
||||
|
||||
|
||||
bool Dictionary::isEndOfWord(const dic_elt_t &e) const
|
||||
{
|
||||
if (m_header->getVersion() == 0)
|
||||
return reinterpret_cast<const DicEdgeOld*>(m_dawg + e)->term;
|
||||
else
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->term;
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + e)->term;
|
||||
}
|
||||
|
||||
|
||||
|
|
56
dic/dic.h
56
dic/dic.h
|
@ -42,6 +42,7 @@ typedef unsigned char dic_code_t;
|
|||
struct params_cross_t;
|
||||
struct params_7plus1_t;
|
||||
struct params_regexp_t;
|
||||
class DicEdge;
|
||||
|
||||
class Dictionary
|
||||
{
|
||||
|
@ -162,7 +163,11 @@ public:
|
|||
unsigned int charLookup(const dic_elt_t &iRoot, const wchar_t *iPattern) const;
|
||||
|
||||
/// Getter for the edge at the given position
|
||||
const uint32_t *getEdgeAt(const dic_elt_t &iElt) const { return m_dawg + iElt; }
|
||||
const DicEdge * getEdgeAt(const dic_elt_t &iElt) const
|
||||
{
|
||||
return reinterpret_cast<const DicEdge*>(m_dawg + iElt);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Search for a word in the dictionary
|
||||
|
@ -245,56 +250,29 @@ private:
|
|||
void convertDataToArch();
|
||||
void initializeTiles();
|
||||
|
||||
/// Template getter for the edge at the given position
|
||||
template <typename DAWG_EDGE>
|
||||
const DAWG_EDGE * getEdgeAt(const dic_elt_t &iElt) const
|
||||
{
|
||||
return reinterpret_cast<const DAWG_EDGE*>(m_dawg + iElt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the dictionary until the end of the word
|
||||
* @param s: current pointer to letters
|
||||
* @param eptr: current edge in the dawg
|
||||
*/
|
||||
template <typename DAWG_EDGE>
|
||||
const DAWG_EDGE * seekEdgePtr(const wchar_t *s, const DAWG_EDGE *eptr) const;
|
||||
|
||||
/// Helper for searchBenj()
|
||||
template <typename DAWG_EDGE>
|
||||
void searchBenjTempl(const wstring &iWord, vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const;
|
||||
|
||||
/// Helper for searchRacc()
|
||||
template <typename DAWG_EDGE>
|
||||
void searchRaccTempl(const wstring &iWord, vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const;
|
||||
const DicEdge * seekEdgePtr(const wchar_t *s, const DicEdge *eptr) const;
|
||||
|
||||
/// Helper for searchCross()
|
||||
template <typename DAWG_EDGE>
|
||||
void searchCrossRecTempl(struct params_cross_t *params,
|
||||
vector<wstring> &oWordList,
|
||||
const DAWG_EDGE *edgeptr,
|
||||
unsigned int iMaxResults) const;
|
||||
void searchCrossRec(struct params_cross_t *params,
|
||||
vector<wstring> &oWordList,
|
||||
const DicEdge *edgeptr,
|
||||
unsigned int iMaxResults) const;
|
||||
|
||||
/// Helper for search7pl1()
|
||||
template <typename DAWG_EDGE>
|
||||
void search7pl1Templ(const wstring &iRack,
|
||||
map<wchar_t, vector<wstring> > &oWordList,
|
||||
bool joker) const;
|
||||
|
||||
/// Second helper for search7pl1()
|
||||
template <typename DAWG_EDGE>
|
||||
void searchWordByLen(struct params_7plus1_t *params,
|
||||
int i, const DAWG_EDGE *edgeptr) const;
|
||||
int i, const DicEdge *edgeptr) const;
|
||||
|
||||
/// Helper for searchRegExp()
|
||||
template <typename DAWG_EDGE>
|
||||
void searchRegexpRecTempl(struct params_regexp_t *params,
|
||||
int state,
|
||||
const DAWG_EDGE *edgeptr,
|
||||
vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const;
|
||||
void searchRegexpRec(struct params_regexp_t *params,
|
||||
int state,
|
||||
const DicEdge *edgeptr,
|
||||
vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const;
|
||||
};
|
||||
|
||||
#endif /* _DIC_H_ */
|
||||
|
|
|
@ -38,22 +38,6 @@
|
|||
* ----------------
|
||||
*/
|
||||
|
||||
struct __attribute__ ((packed)) DicEdgeOld
|
||||
{
|
||||
public:
|
||||
uint32_t
|
||||
ptr : 24,
|
||||
term: 1,
|
||||
last: 1,
|
||||
fill: 1,
|
||||
chr : 5;
|
||||
bool operator==(const DicEdgeOld &iOther) const
|
||||
{
|
||||
return memcmp(this, &iOther, sizeof(*this)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct __attribute__ ((packed)) DicEdge
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -37,18 +37,17 @@
|
|||
static const unsigned int DEFAULT_VECT_ALLOC = 100;
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
const DAWG_EDGE* Dictionary::seekEdgePtr(const wchar_t* s, const DAWG_EDGE *eptr) const
|
||||
const DicEdge* Dictionary::seekEdgePtr(const wchar_t* s, const DicEdge *eptr) const
|
||||
{
|
||||
if (*s)
|
||||
{
|
||||
const DAWG_EDGE *p = getEdgeAt<DAWG_EDGE>(eptr->ptr);
|
||||
const DicEdge *p = getEdgeAt(eptr->ptr);
|
||||
do
|
||||
{
|
||||
if (p->chr == getHeader().getCodeFromChar(*s))
|
||||
return seekEdgePtr(s + 1, p);
|
||||
} while (!(*p++).last);
|
||||
return getEdgeAt<DAWG_EDGE>(0);
|
||||
return getEdgeAt(0);
|
||||
}
|
||||
else
|
||||
return eptr;
|
||||
|
@ -60,18 +59,8 @@ bool Dictionary::searchWord(const wstring &iWord) const
|
|||
if (!validateLetters(iWord))
|
||||
return false;
|
||||
|
||||
if (getHeader().getVersion() == 0)
|
||||
{
|
||||
const DicEdgeOld *e =
|
||||
seekEdgePtr(iWord.c_str(), getEdgeAt<DicEdgeOld>(getRoot()));
|
||||
return e->term;
|
||||
}
|
||||
else
|
||||
{
|
||||
const DicEdge *e =
|
||||
seekEdgePtr(iWord.c_str(), getEdgeAt<DicEdge>(getRoot()));
|
||||
return e->term;
|
||||
}
|
||||
const DicEdge *e = seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot()));
|
||||
return e->term;
|
||||
}
|
||||
|
||||
|
||||
|
@ -93,9 +82,8 @@ struct params_7plus1_t
|
|||
char search_letters[63];
|
||||
};
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void Dictionary::searchWordByLen(struct params_7plus1_t *params,
|
||||
int i, const DAWG_EDGE *edgeptr) const
|
||||
int i, const DicEdge *edgeptr) const
|
||||
{
|
||||
/* depth first search in the dictionary */
|
||||
do
|
||||
|
@ -120,7 +108,7 @@ void Dictionary::searchWordByLen(struct params_7plus1_t *params,
|
|||
}
|
||||
else
|
||||
{
|
||||
searchWordByLen(params, i + 1, getEdgeAt<DAWG_EDGE>(edgeptr->ptr));
|
||||
searchWordByLen(params, i + 1, getEdgeAt(edgeptr->ptr));
|
||||
}
|
||||
params->search_letters[edgeptr->chr] ++;
|
||||
params->search_wordtst[i] = L'\0';
|
||||
|
@ -143,7 +131,7 @@ void Dictionary::searchWordByLen(struct params_7plus1_t *params,
|
|||
}
|
||||
else
|
||||
{
|
||||
searchWordByLen(params, i + 1, getEdgeAt<DAWG_EDGE>(edgeptr->ptr));
|
||||
searchWordByLen(params, i + 1, getEdgeAt(edgeptr->ptr));
|
||||
}
|
||||
params->search_letters[0] ++;
|
||||
params->search_wordtst[i] = L'\0';
|
||||
|
@ -153,10 +141,9 @@ void Dictionary::searchWordByLen(struct params_7plus1_t *params,
|
|||
}
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void Dictionary::search7pl1Templ(const wstring &iRack,
|
||||
map<wchar_t, vector<wstring> > &oWordList,
|
||||
bool joker) const
|
||||
void Dictionary::search7pl1(const wstring &iRack,
|
||||
map<wchar_t, vector<wstring> > &oWordList,
|
||||
bool joker) const
|
||||
{
|
||||
if (iRack == L"" || iRack.size() > DIC_WORD_MAX)
|
||||
return;
|
||||
|
@ -196,8 +183,8 @@ void Dictionary::search7pl1Templ(const wstring &iRack,
|
|||
if (wordlen < 1)
|
||||
return;
|
||||
|
||||
const DAWG_EDGE *root_edge = getEdgeAt<DAWG_EDGE>(getRoot());
|
||||
root_edge = getEdgeAt<DAWG_EDGE>(root_edge->ptr);
|
||||
const DicEdge *root_edge = getEdgeAt(getRoot());
|
||||
root_edge = getEdgeAt(root_edge->ptr);
|
||||
|
||||
params.results = &oWordList;
|
||||
|
||||
|
@ -223,23 +210,12 @@ void Dictionary::search7pl1Templ(const wstring &iRack,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void Dictionary::search7pl1(const wstring &iRack,
|
||||
map<wchar_t, vector<wstring> > &oWordList,
|
||||
bool joker) const
|
||||
{
|
||||
if (getHeader().getVersion() == 0)
|
||||
search7pl1Templ<DicEdgeOld>(iRack, oWordList, joker);
|
||||
else
|
||||
search7pl1Templ<DicEdge>(iRack, oWordList, joker);
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
/****************************************/
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void Dictionary::searchRaccTempl(const wstring &iWord, vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
void Dictionary::searchRacc(const wstring &iWord,
|
||||
vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
{
|
||||
if (iWord == L"")
|
||||
return;
|
||||
|
@ -271,13 +247,13 @@ void Dictionary::searchRaccTempl(const wstring &iWord, vector<wstring> &oWordLis
|
|||
wordtst[i ] = '\0';
|
||||
wordtst[i+1] = '\0';
|
||||
|
||||
const DAWG_EDGE *edge_seek =
|
||||
seekEdgePtr(iWord.c_str(), getEdgeAt<DAWG_EDGE>(getRoot()));
|
||||
const DicEdge *edge_seek =
|
||||
seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot()));
|
||||
|
||||
/* points to what the next letter can be */
|
||||
const DAWG_EDGE *edge = getEdgeAt<DAWG_EDGE>(edge_seek->ptr);
|
||||
const DicEdge *edge = getEdgeAt(edge_seek->ptr);
|
||||
|
||||
if (edge != getEdgeAt<DAWG_EDGE>(0))
|
||||
if (edge != getEdgeAt(0))
|
||||
{
|
||||
do
|
||||
{
|
||||
|
@ -292,21 +268,11 @@ void Dictionary::searchRaccTempl(const wstring &iWord, vector<wstring> &oWordLis
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void Dictionary::searchRacc(const wstring &iWord, vector<wstring> &oWordList, unsigned int iMaxResults) const
|
||||
{
|
||||
if (getHeader().getVersion() == 0)
|
||||
searchRaccTempl<DicEdgeOld>(iWord, oWordList, iMaxResults);
|
||||
else
|
||||
searchRaccTempl<DicEdge>(iWord, oWordList, iMaxResults);
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
/****************************************/
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void Dictionary::searchBenjTempl(const wstring &iWord, vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
void Dictionary::searchBenj(const wstring &iWord, vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
{
|
||||
if (iWord == L"")
|
||||
return;
|
||||
|
@ -319,17 +285,17 @@ void Dictionary::searchBenjTempl(const wstring &iWord, vector<wstring> &oWordLis
|
|||
|
||||
wchar_t wordtst[DIC_WORD_MAX];
|
||||
wcscpy(wordtst + 3, iWord.c_str());
|
||||
const DAWG_EDGE *edge0, *edge1, *edge2, *edgetst;
|
||||
edge0 = getEdgeAt<DAWG_EDGE>(getRoot());
|
||||
edge0 = getEdgeAt<DAWG_EDGE>(edge0->ptr);
|
||||
const DicEdge *edge0, *edge1, *edge2, *edgetst;
|
||||
edge0 = getEdgeAt(getRoot());
|
||||
edge0 = getEdgeAt(edge0->ptr);
|
||||
do
|
||||
{
|
||||
wordtst[0] = getHeader().getCharFromCode(edge0->chr);
|
||||
edge1 = getEdgeAt<DAWG_EDGE>(edge0->ptr);
|
||||
edge1 = getEdgeAt(edge0->ptr);
|
||||
do
|
||||
{
|
||||
wordtst[1] = getHeader().getCharFromCode(edge1->chr);
|
||||
edge2 = getEdgeAt<DAWG_EDGE>(edge1->ptr);
|
||||
edge2 = getEdgeAt(edge1->ptr);
|
||||
do
|
||||
{
|
||||
edgetst = seekEdgePtr(iWord.c_str(), edge2);
|
||||
|
@ -345,16 +311,6 @@ void Dictionary::searchBenjTempl(const wstring &iWord, vector<wstring> &oWordLis
|
|||
} while (!(*edge0++).last);
|
||||
}
|
||||
|
||||
|
||||
void Dictionary::searchBenj(const wstring &iWord, vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
{
|
||||
if (getHeader().getVersion() == 0)
|
||||
searchBenjTempl<DicEdgeOld>(iWord, oWordList, iMaxResults);
|
||||
else
|
||||
searchBenjTempl<DicEdge>(iWord, oWordList, iMaxResults);
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
/****************************************/
|
||||
|
||||
|
@ -365,16 +321,15 @@ struct params_cross_t
|
|||
};
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void Dictionary::searchCrossRecTempl(struct params_cross_t *params,
|
||||
vector<wstring> &oWordList,
|
||||
const DAWG_EDGE *edgeptr,
|
||||
unsigned int iMaxResults) const
|
||||
void Dictionary::searchCrossRec(struct params_cross_t *params,
|
||||
vector<wstring> &oWordList,
|
||||
const DicEdge *edgeptr,
|
||||
unsigned int iMaxResults) const
|
||||
{
|
||||
if (iMaxResults && oWordList.size() >= iMaxResults)
|
||||
return;
|
||||
|
||||
const DAWG_EDGE *current = getEdgeAt<DAWG_EDGE>(edgeptr->ptr);
|
||||
const DicEdge *current = getEdgeAt(edgeptr->ptr);
|
||||
|
||||
if (params->mask[params->wordlen] == '\0')
|
||||
{
|
||||
|
@ -392,7 +347,7 @@ void Dictionary::searchCrossRecTempl(struct params_cross_t *params,
|
|||
{
|
||||
params->mask[params->wordlen] = getHeader().getCharFromCode(current->chr);
|
||||
params->wordlen ++;
|
||||
searchCrossRecTempl(params, oWordList, current, iMaxResults);
|
||||
searchCrossRec(params, oWordList, current, iMaxResults);
|
||||
params->wordlen --;
|
||||
params->mask[params->wordlen] = '.';
|
||||
}
|
||||
|
@ -405,7 +360,7 @@ void Dictionary::searchCrossRecTempl(struct params_cross_t *params,
|
|||
if (current->chr == getHeader().getCodeFromChar(params->mask[params->wordlen]))
|
||||
{
|
||||
params->wordlen ++;
|
||||
searchCrossRecTempl(params, oWordList, current, iMaxResults);
|
||||
searchCrossRec(params, oWordList, current, iMaxResults);
|
||||
params->wordlen --;
|
||||
break;
|
||||
}
|
||||
|
@ -440,16 +395,7 @@ void Dictionary::searchCross(const wstring &iMask, vector<wstring> &oWordList,
|
|||
params.mask[i] = '\0';
|
||||
|
||||
params.wordlen = 0;
|
||||
if (getHeader().getVersion() == 0)
|
||||
{
|
||||
searchCrossRecTempl(¶ms, oWordList,
|
||||
getEdgeAt<DicEdgeOld>(getRoot()), iMaxResults);
|
||||
}
|
||||
else
|
||||
{
|
||||
searchCrossRecTempl(¶ms, oWordList,
|
||||
getEdgeAt<DicEdge>(getRoot()), iMaxResults);
|
||||
}
|
||||
searchCrossRec(¶ms, oWordList, getEdgeAt(getRoot()), iMaxResults);
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
|
@ -461,16 +407,15 @@ struct params_regexp_t
|
|||
int maxlength;
|
||||
Automaton *automaton_field;
|
||||
wchar_t word[DIC_WORD_MAX];
|
||||
int wordlen;
|
||||
int wordlen;
|
||||
};
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
|
||||
int state,
|
||||
const DAWG_EDGE *edgeptr,
|
||||
vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
void Dictionary::searchRegexpRec(struct params_regexp_t *params,
|
||||
int state,
|
||||
const DicEdge *edgeptr,
|
||||
vector<wstring> &oWordList,
|
||||
unsigned int iMaxResults) const
|
||||
{
|
||||
if (iMaxResults && oWordList.size() >= iMaxResults)
|
||||
return;
|
||||
|
@ -487,7 +432,7 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
|
|||
}
|
||||
}
|
||||
/* we now drive the search by exploring the dictionary */
|
||||
const DAWG_EDGE *current = getEdgeAt<DAWG_EDGE>(edgeptr->ptr);
|
||||
const DicEdge *current = getEdgeAt(edgeptr->ptr);
|
||||
do
|
||||
{
|
||||
/* the current letter is current->chr */
|
||||
|
@ -498,7 +443,7 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params,
|
|||
params->word[params->wordlen] =
|
||||
getHeader().getCharFromCode(current->chr);
|
||||
params->wordlen ++;
|
||||
searchRegexpRecTempl(params, next_state, current, oWordList, iMaxResults);
|
||||
searchRegexpRec(params, next_state, current, oWordList, iMaxResults);
|
||||
params->wordlen --;
|
||||
params->word[params->wordlen] = L'\0';
|
||||
}
|
||||
|
@ -599,19 +544,9 @@ bool Dictionary::searchRegExp(const wstring &iRegexp,
|
|||
params.automaton_field = a;
|
||||
memset(params.word, L'\0', sizeof(params.word));
|
||||
params.wordlen = 0;
|
||||
if (getHeader().getVersion() == 0)
|
||||
{
|
||||
searchRegexpRecTempl(¶ms, a->getInitId(),
|
||||
getEdgeAt<DicEdgeOld>(getRoot()), oWordList,
|
||||
iMaxResults ? iMaxResults + 1 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
searchRegexpRecTempl(¶ms, a->getInitId(),
|
||||
getEdgeAt<DicEdge>(getRoot()), oWordList,
|
||||
iMaxResults ? iMaxResults + 1 : 0);
|
||||
}
|
||||
|
||||
searchRegexpRec(¶ms, a->getInitId(),
|
||||
getEdgeAt(getRoot()), oWordList,
|
||||
iMaxResults ? iMaxResults + 1 : 0);
|
||||
delete a;
|
||||
}
|
||||
delete root;
|
||||
|
|
|
@ -36,9 +36,6 @@
|
|||
using namespace boost::spirit;
|
||||
using namespace std;
|
||||
|
||||
// TODO:
|
||||
// - error handling
|
||||
|
||||
// A few typedefs to simplify things
|
||||
typedef const wchar_t *iterator_t;
|
||||
typedef tree_match<iterator_t> parse_tree_match_t;
|
||||
|
|
297
dic/header.cpp
297
dic/header.cpp
|
@ -285,134 +285,78 @@ void Header::read(istream &iStream)
|
|||
|
||||
m_version = aHeader.version;
|
||||
|
||||
// Version 0 corresponds to the dictionary format in the first Eliot
|
||||
// versions, supported until Eliot 1.8 (excluded).
|
||||
// The new version (version 1) was introduced in Eliot 1.6.
|
||||
if (m_version == 0)
|
||||
{
|
||||
throw DicException(_("Too old dictionary format. This format is not "
|
||||
"supported anymore since Eliot 1.8. You can "
|
||||
"create dictionaries in the new format with the "
|
||||
"'compdic' tool provided with Eliot (since "
|
||||
"version 1.6)."));
|
||||
}
|
||||
|
||||
// Handle endianness
|
||||
if (m_version == 0)
|
||||
{
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
aHeader.root = swap4(aHeader.root);
|
||||
aHeader.nwords = swap4(aHeader.nwords);
|
||||
aHeader.nodesused = swap4(aHeader.nodesused);
|
||||
aHeader.edgesused = swap4(aHeader.edgesused);
|
||||
aHeader.nodessaved = swap4(aHeader.nodessaved);
|
||||
aHeader.edgessaved = swap4(aHeader.edgessaved);
|
||||
#endif
|
||||
m_root = aHeader.root;
|
||||
m_nbWords = aHeader.nwords;
|
||||
m_nodesUsed = aHeader.nodesused;
|
||||
m_edgesUsed = aHeader.edgesused;
|
||||
m_nodesSaved = aHeader.nodessaved;
|
||||
m_edgesSaved = aHeader.edgessaved;
|
||||
}
|
||||
m_root = ntohl(aHeader.root);
|
||||
m_nbWords = ntohl(aHeader.nwords);
|
||||
m_nodesUsed = ntohl(aHeader.nodesused);
|
||||
m_edgesUsed = ntohl(aHeader.edgesused);
|
||||
m_nodesSaved = ntohl(aHeader.nodessaved);
|
||||
m_edgesSaved = ntohl(aHeader.edgessaved);
|
||||
|
||||
// After reading the old header, we now read the extension
|
||||
Dict_header_ext aHeaderExt;
|
||||
iStream.read((char*)&aHeaderExt, sizeof(Dict_header_ext));
|
||||
if (iStream.gcount() != sizeof(Dict_header_ext))
|
||||
throw DicException("Header::read: expected to read more bytes");
|
||||
|
||||
// Handle endianness in the extension
|
||||
aHeaderExt.compressDate = ntohll(aHeaderExt.compressDate);
|
||||
aHeaderExt.userHostSize = ntohl(aHeaderExt.userHostSize);
|
||||
aHeaderExt.dicNameSize = ntohl(aHeaderExt.dicNameSize);
|
||||
aHeaderExt.lettersSize = ntohl(aHeaderExt.lettersSize);
|
||||
aHeaderExt.nbLetters = ntohl(aHeaderExt.nbLetters);
|
||||
aHeaderExt.vowels = ntohll(aHeaderExt.vowels);
|
||||
aHeaderExt.consonants = ntohll(aHeaderExt.consonants);
|
||||
|
||||
m_compressDate = aHeaderExt.compressDate;
|
||||
|
||||
if (aHeaderExt.algorithm == kDAWG)
|
||||
m_type = kDAWG;
|
||||
else if (aHeaderExt.algorithm == kGADDAG)
|
||||
m_type = kGADDAG;
|
||||
else
|
||||
throw DicException("Header::read: unrecognized algorithm type");
|
||||
|
||||
m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize,
|
||||
"user and host information");
|
||||
|
||||
// Convert the dictionary letters from UTF-8 to wchar_t*
|
||||
m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize,
|
||||
"dictionary name");
|
||||
|
||||
// Convert the dictionary letters from UTF-8 to wchar_t*
|
||||
m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize,
|
||||
"dictionary letters");
|
||||
// Safety check: correct number of letters?
|
||||
if (m_letters.size() != aHeaderExt.nbLetters)
|
||||
{
|
||||
m_root = ntohl(aHeader.root);
|
||||
m_nbWords = ntohl(aHeader.nwords);
|
||||
m_nodesUsed = ntohl(aHeader.nodesused);
|
||||
m_edgesUsed = ntohl(aHeader.edgesused);
|
||||
m_nodesSaved = ntohl(aHeader.nodessaved);
|
||||
m_edgesSaved = ntohl(aHeader.edgessaved);
|
||||
throw DicException("Header::read: inconsistent header");
|
||||
}
|
||||
|
||||
if (m_version == 0)
|
||||
// Letters points and frequency
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
m_compressDate = 0;
|
||||
m_userHost = convertToWc(_("Unknown (old format)"));
|
||||
m_dicName = convertToWc(_("Unknown (old format)"));
|
||||
|
||||
// In version 0, the letters, points, frequency,
|
||||
// vowels and consonants were hard-coded...
|
||||
m_letters = convertToWc("ABCDEFGHIJKLMNOPQRSTUVWXYZ?");
|
||||
|
||||
static const uint8_t Frenchpoints[] =
|
||||
{
|
||||
// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?
|
||||
1,3,3,2, 1,4,2,4,1,8,10,1,2,1,1,3,8,1,1,1,1,4,10,10,10,10,0
|
||||
};
|
||||
|
||||
static const uint8_t FrenchFrequency[] =
|
||||
{
|
||||
// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?
|
||||
9,2,2,3,15,2,2,2,8,1, 1,5,3,6,6,2,1,6,6,6,6,2, 1, 1, 1, 1,2
|
||||
};
|
||||
|
||||
// The jokers and the 'Y' can be considered both as vowels or consonants
|
||||
static const uint8_t FrenchVowels[] =
|
||||
{
|
||||
// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?
|
||||
1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0,1
|
||||
};
|
||||
|
||||
static const uint8_t FrenchConsonants[] =
|
||||
{
|
||||
// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?
|
||||
0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1,1
|
||||
};
|
||||
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
m_points.push_back(Frenchpoints[i]);
|
||||
m_frequency.push_back(FrenchFrequency[i]);
|
||||
m_vowels.push_back(FrenchVowels[i]);
|
||||
m_consonants.push_back(FrenchConsonants[i]);
|
||||
}
|
||||
m_points.push_back(aHeaderExt.points[i]);
|
||||
m_frequency.push_back(aHeaderExt.frequency[i]);
|
||||
}
|
||||
else
|
||||
|
||||
// Vowels and consonants
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
// This header doesn't use the old serialization format, so read the
|
||||
// extension as well
|
||||
Dict_header_ext aHeaderExt;
|
||||
iStream.read((char*)&aHeaderExt, sizeof(Dict_header_ext));
|
||||
if (iStream.gcount() != sizeof(Dict_header_ext))
|
||||
throw DicException("Header::read: expected to read more bytes");
|
||||
|
||||
// Handle endianness in the extension
|
||||
aHeaderExt.compressDate = ntohll(aHeaderExt.compressDate);
|
||||
aHeaderExt.userHostSize = ntohl(aHeaderExt.userHostSize);
|
||||
aHeaderExt.dicNameSize = ntohl(aHeaderExt.dicNameSize);
|
||||
aHeaderExt.lettersSize = ntohl(aHeaderExt.lettersSize);
|
||||
aHeaderExt.nbLetters = ntohl(aHeaderExt.nbLetters);
|
||||
aHeaderExt.vowels = ntohll(aHeaderExt.vowels);
|
||||
aHeaderExt.consonants = ntohll(aHeaderExt.consonants);
|
||||
|
||||
m_compressDate = aHeaderExt.compressDate;
|
||||
|
||||
if (aHeaderExt.algorithm == kDAWG)
|
||||
m_type = kDAWG;
|
||||
else if (aHeaderExt.algorithm == kGADDAG)
|
||||
m_type = kGADDAG;
|
||||
else
|
||||
throw DicException("Header::read: unrecognized algorithm type");
|
||||
|
||||
m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize,
|
||||
"user and host information");
|
||||
|
||||
// Convert the dictionary letters from UTF-8 to wchar_t*
|
||||
m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize,
|
||||
"dictionary name");
|
||||
|
||||
// Convert the dictionary letters from UTF-8 to wchar_t*
|
||||
m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize,
|
||||
"dictionary letters");
|
||||
// Safety check: correct number of letters?
|
||||
if (m_letters.size() != aHeaderExt.nbLetters)
|
||||
{
|
||||
throw DicException("Header::read: inconsistent header");
|
||||
}
|
||||
|
||||
// Letters points and frequency
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
m_points.push_back(aHeaderExt.points[i]);
|
||||
m_frequency.push_back(aHeaderExt.frequency[i]);
|
||||
}
|
||||
|
||||
// Vowels and consonants
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
m_vowels.push_back(aHeaderExt.vowels & (1 << i));
|
||||
m_consonants.push_back(aHeaderExt.consonants & (1 << i));
|
||||
}
|
||||
|
||||
m_vowels.push_back(aHeaderExt.vowels & (1 << i));
|
||||
m_consonants.push_back(aHeaderExt.consonants & (1 << i));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -434,81 +378,70 @@ void Header::write(ostream &oStream) const
|
|||
if (!oStream.good())
|
||||
throw DicException("Header::write: error when writing to file");
|
||||
|
||||
if (m_version != 0)
|
||||
Dict_header_ext aHeaderExt;
|
||||
aHeaderExt.compressDate = m_compressDate;
|
||||
aHeaderExt.userHostSize =
|
||||
writeInUTF8(m_userHost, aHeaderExt.userHost,
|
||||
_MAX_USER_HOST_, "user and host information");
|
||||
aHeaderExt.algorithm = m_type;
|
||||
|
||||
// Convert the dictionary name to UTF-8
|
||||
aHeaderExt.dicNameSize =
|
||||
writeInUTF8(m_dicName, aHeaderExt.dicName,
|
||||
_MAX_DIC_NAME_SIZE_, "dictionary name");
|
||||
|
||||
// Convert the dictionary letters to UTF-8
|
||||
aHeaderExt.lettersSize =
|
||||
writeInUTF8(m_letters, aHeaderExt.letters,
|
||||
_MAX_LETTERS_SIZE_, "dictionary letters");
|
||||
aHeaderExt.nbLetters = (uint32_t)m_letters.size();
|
||||
|
||||
// Letters points and frequency
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
Dict_header_ext aHeaderExt;
|
||||
aHeaderExt.compressDate = m_compressDate;
|
||||
aHeaderExt.userHostSize =
|
||||
writeInUTF8(m_userHost, aHeaderExt.userHost,
|
||||
_MAX_USER_HOST_, "user and host information");
|
||||
aHeaderExt.algorithm = m_type;
|
||||
|
||||
// Convert the dictionary name to UTF-8
|
||||
aHeaderExt.dicNameSize =
|
||||
writeInUTF8(m_dicName, aHeaderExt.dicName,
|
||||
_MAX_DIC_NAME_SIZE_, "dictionary name");
|
||||
|
||||
// Convert the dictionary letters to UTF-8
|
||||
aHeaderExt.lettersSize =
|
||||
writeInUTF8(m_letters, aHeaderExt.letters,
|
||||
_MAX_LETTERS_SIZE_, "dictionary letters");
|
||||
aHeaderExt.nbLetters = (uint32_t)m_letters.size();
|
||||
|
||||
// Letters points and frequency
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
aHeaderExt.points[i] = m_points[i];
|
||||
aHeaderExt.frequency[i] = m_frequency[i];
|
||||
}
|
||||
|
||||
// Vowels and consonants
|
||||
aHeaderExt.vowels = 0;
|
||||
aHeaderExt.consonants = 0;
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
if (m_vowels[i])
|
||||
aHeaderExt.vowels |= 1 << i;
|
||||
if (m_consonants[i])
|
||||
aHeaderExt.consonants |= 1 << i;
|
||||
}
|
||||
|
||||
// Handle endianness in the extension
|
||||
aHeaderExt.userHostSize = htonl(aHeaderExt.userHostSize);
|
||||
aHeaderExt.compressDate = htonll(aHeaderExt.compressDate);
|
||||
aHeaderExt.dicNameSize = htonl(aHeaderExt.dicNameSize);
|
||||
aHeaderExt.lettersSize = htonl(aHeaderExt.lettersSize);
|
||||
aHeaderExt.nbLetters = htonl(aHeaderExt.nbLetters);
|
||||
aHeaderExt.vowels = htonll(aHeaderExt.vowels);
|
||||
aHeaderExt.consonants = htonll(aHeaderExt.consonants);
|
||||
|
||||
// Write the extension
|
||||
oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext));
|
||||
if (!oStream.good())
|
||||
throw DicException("Header::write: error when writing to file");
|
||||
aHeaderExt.points[i] = m_points[i];
|
||||
aHeaderExt.frequency[i] = m_frequency[i];
|
||||
}
|
||||
|
||||
// Vowels and consonants
|
||||
aHeaderExt.vowels = 0;
|
||||
aHeaderExt.consonants = 0;
|
||||
for (unsigned int i = 0; i < m_letters.size(); ++i)
|
||||
{
|
||||
if (m_vowels[i])
|
||||
aHeaderExt.vowels |= 1 << i;
|
||||
if (m_consonants[i])
|
||||
aHeaderExt.consonants |= 1 << i;
|
||||
}
|
||||
|
||||
// Handle endianness in the extension
|
||||
aHeaderExt.userHostSize = htonl(aHeaderExt.userHostSize);
|
||||
aHeaderExt.compressDate = htonll(aHeaderExt.compressDate);
|
||||
aHeaderExt.dicNameSize = htonl(aHeaderExt.dicNameSize);
|
||||
aHeaderExt.lettersSize = htonl(aHeaderExt.lettersSize);
|
||||
aHeaderExt.nbLetters = htonl(aHeaderExt.nbLetters);
|
||||
aHeaderExt.vowels = htonll(aHeaderExt.vowels);
|
||||
aHeaderExt.consonants = htonll(aHeaderExt.consonants);
|
||||
|
||||
// Write the extension
|
||||
oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext));
|
||||
if (!oStream.good())
|
||||
throw DicException("Header::write: error when writing to file");
|
||||
}
|
||||
|
||||
|
||||
void Header::print() const
|
||||
{
|
||||
printf(_("dictionary name: %s\n"), convertToMb(m_dicName).c_str());
|
||||
if (m_version)
|
||||
{
|
||||
char buf[50];
|
||||
strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate));
|
||||
printf(_("compressed on: %s\n"), buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf(_("compressed on: Unknown date (old format)\n"));
|
||||
}
|
||||
char buf[50];
|
||||
strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate));
|
||||
printf(_("compressed on: %s\n"), buf);
|
||||
printf(_("compressed using a binary compiled by: %s\n"), convertToMb(m_userHost).c_str());
|
||||
printf(_("dictionary type: %s\n"), m_type == kDAWG ? "DAWG" : "GADDAG");
|
||||
printf(_("letters: %s\n"), convertToMb(m_letters).c_str());
|
||||
printf(_("number of letters: %lu\n"), (long unsigned int)m_letters.size());
|
||||
printf(_("number of words: %d\n"), m_nbWords);
|
||||
long unsigned int size =
|
||||
sizeof(Dict_header_old) + (m_version ? sizeof(Dict_header_ext) : 0);
|
||||
long unsigned int size = sizeof(Dict_header_old) + sizeof(Dict_header_ext);
|
||||
printf(_("header size: %lu bytes\n"), size);
|
||||
printf(_("root: %d (edge)\n"), m_root);
|
||||
printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved);
|
||||
|
|
|
@ -46,8 +46,7 @@
|
|||
using namespace std;
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wchar_t *s, DAWG_EDGE i)
|
||||
static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wchar_t *s, DicEdge i)
|
||||
{
|
||||
if (i.term) /* edge points at a complete word */
|
||||
{
|
||||
|
@ -56,7 +55,7 @@ static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wc
|
|||
}
|
||||
if (i.ptr)
|
||||
{ /* Compute index: is it non-zero ? */
|
||||
const DAWG_EDGE *p = reinterpret_cast<const DAWG_EDGE*>(iDic.getEdgeAt(i.ptr));
|
||||
const DicEdge *p = iDic.getEdgeAt(i.ptr);
|
||||
do
|
||||
{ /* for each edge out of this node */
|
||||
*s = iDic.getHeader().getCharFromCode(p->chr);
|
||||
|
@ -67,24 +66,22 @@ static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wc
|
|||
}
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void print_dic_list(const Dictionary &iDic)
|
||||
{
|
||||
static wchar_t buf[80];
|
||||
print_dic_rec(cout, iDic, buf, buf, *reinterpret_cast<const DAWG_EDGE*>(iDic.getEdgeAt(iDic.getRoot())));
|
||||
print_dic_rec(cout, iDic, buf, buf, *iDic.getEdgeAt(iDic.getRoot()));
|
||||
}
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
static void print_node_hex(const Dictionary &dic, int i)
|
||||
{
|
||||
union edge_t
|
||||
{
|
||||
DAWG_EDGE e;
|
||||
DicEdge e;
|
||||
uint32_t s;
|
||||
} ee;
|
||||
|
||||
ee.e = *reinterpret_cast<const DAWG_EDGE*>(dic.getEdgeAt(i));
|
||||
ee.e = *reinterpret_cast<const DicEdge*>(dic.getEdgeAt(i));
|
||||
|
||||
printf("0x%04lx %08x |%4d ptr=%8d t=%d l=%d chr=%2d (%c)\n",
|
||||
(unsigned long)i*sizeof(ee), (unsigned int)(ee.s),
|
||||
|
@ -92,13 +89,12 @@ static void print_node_hex(const Dictionary &dic, int i)
|
|||
}
|
||||
|
||||
|
||||
template <typename DAWG_EDGE>
|
||||
void print_dic_hex(const Dictionary &iDic)
|
||||
{
|
||||
printf(_("offset binary | structure\n"));
|
||||
printf("------ -------- | --------------------\n");
|
||||
for (unsigned int i = 0; i < (iDic.getHeader().getNbEdgesUsed() + 1); i++)
|
||||
print_node_hex<DAWG_EDGE>(iDic, i);
|
||||
print_node_hex(iDic, i);
|
||||
}
|
||||
|
||||
|
||||
|
@ -172,17 +168,11 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
if (option_print_dic_hex || option_print_all)
|
||||
{
|
||||
if (dic.getHeader().getVersion() == 0)
|
||||
print_dic_hex<DicEdgeOld>(dic);
|
||||
else
|
||||
print_dic_hex<DicEdge>(dic);
|
||||
print_dic_hex(dic);
|
||||
}
|
||||
if (option_print_dic_list || option_print_all)
|
||||
{
|
||||
if (dic.getHeader().getVersion() == 0)
|
||||
print_dic_list<DicEdgeOld>(dic);
|
||||
else
|
||||
print_dic_list<DicEdge>(dic);
|
||||
print_dic_list(dic);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -230,21 +230,9 @@ void MainWindow::updateStatusBar(const Dictionary *iDic)
|
|||
if (iDic == NULL)
|
||||
m_dicNameLabel->setText("No dictionary");
|
||||
else {
|
||||
if (iDic->getHeader().getVersion() != 0)
|
||||
{
|
||||
QString dicName = qfw(m_dic->getHeader().getName());
|
||||
m_dicNameLabel->setText(_q("Dictionary: %1").arg(dicName));
|
||||
m_dicNameLabel->setToolTip("");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_dicNameLabel->setText(_q("Dictionary: Unknown (old format)"));
|
||||
QString warning = _q("The dictionary name cannot be "
|
||||
"retrieved, because you are using an old dictionary format.\n"
|
||||
"You can probably download a newer version of the dictionary "
|
||||
"on http://www.nongnu.org/eliot/");
|
||||
m_dicNameLabel->setToolTip(warning);
|
||||
}
|
||||
QString dicName = qfw(m_dic->getHeader().getName());
|
||||
m_dicNameLabel->setText(_q("Dictionary: %1").arg(dicName));
|
||||
m_dicNameLabel->setToolTip("");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue