From 66538b4806cab8227872f35c96b468a6f7bb3de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Teuli=C3=A8re?= Date: Sat, 22 Nov 2008 13:11:48 +0000 Subject: [PATCH] Removed support for the old dictionary format. The code becomes simpler and (very slightly) faster. --- dic/dic.cpp | 54 +------- dic/dic.h | 56 +++------ dic/dic_internals.h | 16 --- dic/dic_search.cpp | 157 +++++++---------------- dic/grammar.cpp | 3 - dic/header.cpp | 297 +++++++++++++++++--------------------------- dic/listdic.cpp | 26 ++-- qt/main_window.cpp | 18 +-- 8 files changed, 195 insertions(+), 432 deletions(-) diff --git a/dic/dic.cpp b/dic/dic.cpp index d707996..e61e6e0 100644 --- a/dic/dic.cpp +++ b/dic/dic.cpp @@ -50,24 +50,6 @@ const Dictionary *Dictionary::m_dic = NULL; -// Note: duplicated in header.cpp -#if defined(WORDS_BIGENDIAN) -static uint32_t swap4(uint32_t v) -{ - uint32_t r; - uint8_t *pv = (uint8_t*)&v; - uint8_t *pr = (uint8_t*)&r; - - pr[0] = pv[3]; - pr[1] = pv[2]; - pr[2] = pv[1]; - pr[3] = pv[0]; - - return r; -} -#endif - - Dictionary::Dictionary(const string &iPath) : m_dawg(NULL) { @@ -112,21 +94,9 @@ Dictionary::~Dictionary() void Dictionary::convertDataToArch() { - if (m_header->getVersion() == 0) + for (unsigned int i = 0; i < (m_header->getNbEdgesUsed() + 1); i++) { -#if defined(WORDS_BIGENDIAN) - for (unsigned int i = 0; i < (m_header->getNbEdgesUsed() + 1); i++) - { - m_dawg[i] = swap4(m_dawg[i]); - } -#endif - } - else - { - for (unsigned int i = 0; i < (m_header->getNbEdgesUsed() + 1); i++) - { - m_dawg[i] = ntohl(m_dawg[i]); - } + m_dawg[i] = ntohl(m_dawg[i]); } } @@ -164,10 +134,7 @@ dic_elt_t Dictionary::getNext(const dic_elt_t &e) const dic_elt_t Dictionary::getSucc(const dic_elt_t &e) const { - if (m_header->getVersion() == 0) - return reinterpret_cast(m_dawg + e)->ptr; - else - return reinterpret_cast(m_dawg + e)->ptr; + return reinterpret_cast(m_dawg + e)->ptr; } @@ -179,10 +146,7 @@ dic_elt_t Dictionary::getRoot() const dic_code_t Dictionary::getCode(const dic_elt_t &e) const { - if (m_header->getVersion() == 0) - return reinterpret_cast(m_dawg + e)->chr; - else - return reinterpret_cast(m_dawg + e)->chr; + return reinterpret_cast(m_dawg + e)->chr; } @@ -194,19 +158,13 @@ wchar_t Dictionary::getChar(const dic_elt_t &e) const bool Dictionary::isLast(const dic_elt_t &e) const { - if (m_header->getVersion() == 0) - return reinterpret_cast(m_dawg + e)->last; - else - return reinterpret_cast(m_dawg + e)->last; + return reinterpret_cast(m_dawg + e)->last; } bool Dictionary::isEndOfWord(const dic_elt_t &e) const { - if (m_header->getVersion() == 0) - return reinterpret_cast(m_dawg + e)->term; - else - return reinterpret_cast(m_dawg + e)->term; + return reinterpret_cast(m_dawg + e)->term; } diff --git a/dic/dic.h b/dic/dic.h index 3d01711..523f40e 100644 --- a/dic/dic.h +++ b/dic/dic.h @@ -42,6 +42,7 @@ typedef unsigned char dic_code_t; struct params_cross_t; struct params_7plus1_t; struct params_regexp_t; +class DicEdge; class Dictionary { @@ -162,7 +163,11 @@ public: unsigned int charLookup(const dic_elt_t &iRoot, const wchar_t *iPattern) const; /// Getter for the edge at the given position - const uint32_t *getEdgeAt(const dic_elt_t &iElt) const { return m_dawg + iElt; } + const DicEdge * getEdgeAt(const dic_elt_t &iElt) const + { + return reinterpret_cast(m_dawg + iElt); + } + /** * Search for a word in the dictionary @@ -245,56 +250,29 @@ private: void convertDataToArch(); void initializeTiles(); - /// Template getter for the edge at the given position - template - const DAWG_EDGE * getEdgeAt(const dic_elt_t &iElt) const - { - return reinterpret_cast(m_dawg + iElt); - } - /** * Walk the dictionary until the end of the word * @param s: current pointer to letters * @param eptr: current edge in the dawg */ - template - const DAWG_EDGE * seekEdgePtr(const wchar_t *s, const DAWG_EDGE *eptr) const; - - /// Helper for searchBenj() - template - void searchBenjTempl(const wstring &iWord, vector &oWordList, - unsigned int iMaxResults) const; - - /// Helper for searchRacc() - template - void searchRaccTempl(const wstring &iWord, vector &oWordList, - unsigned int iMaxResults) const; + const DicEdge * seekEdgePtr(const wchar_t *s, const DicEdge *eptr) const; /// Helper for searchCross() - template - void searchCrossRecTempl(struct params_cross_t *params, - vector &oWordList, - const DAWG_EDGE *edgeptr, - unsigned int iMaxResults) const; + void searchCrossRec(struct params_cross_t *params, + vector &oWordList, + const DicEdge *edgeptr, + unsigned int iMaxResults) const; /// Helper for search7pl1() - template - void search7pl1Templ(const wstring &iRack, - map > &oWordList, - bool joker) const; - - /// Second helper for search7pl1() - template void searchWordByLen(struct params_7plus1_t *params, - int i, const DAWG_EDGE *edgeptr) const; + int i, const DicEdge *edgeptr) const; /// Helper for searchRegExp() - template - void searchRegexpRecTempl(struct params_regexp_t *params, - int state, - const DAWG_EDGE *edgeptr, - vector &oWordList, - unsigned int iMaxResults) const; + void searchRegexpRec(struct params_regexp_t *params, + int state, + const DicEdge *edgeptr, + vector &oWordList, + unsigned int iMaxResults) const; }; #endif /* _DIC_H_ */ diff --git a/dic/dic_internals.h b/dic/dic_internals.h index 01871db..eec5a2b 100644 --- a/dic/dic_internals.h +++ b/dic/dic_internals.h @@ -38,22 +38,6 @@ * ---------------- */ -struct __attribute__ ((packed)) DicEdgeOld -{ - public: - uint32_t - ptr : 24, - term: 1, - last: 1, - fill: 1, - chr : 5; - bool operator==(const DicEdgeOld &iOther) const - { - return memcmp(this, &iOther, sizeof(*this)) == 0; - } -}; - - struct __attribute__ ((packed)) DicEdge { public: diff --git a/dic/dic_search.cpp b/dic/dic_search.cpp index 7b09017..16dd8df 100644 --- a/dic/dic_search.cpp +++ b/dic/dic_search.cpp @@ -37,18 +37,17 @@ static const unsigned int DEFAULT_VECT_ALLOC = 100; -template -const DAWG_EDGE* Dictionary::seekEdgePtr(const wchar_t* s, const DAWG_EDGE *eptr) const +const DicEdge* Dictionary::seekEdgePtr(const wchar_t* s, const DicEdge *eptr) const { if (*s) { - const DAWG_EDGE *p = getEdgeAt(eptr->ptr); + const DicEdge *p = getEdgeAt(eptr->ptr); do { if (p->chr == getHeader().getCodeFromChar(*s)) return seekEdgePtr(s + 1, p); } while (!(*p++).last); - return getEdgeAt(0); + return getEdgeAt(0); } else return eptr; @@ -60,18 +59,8 @@ bool Dictionary::searchWord(const wstring &iWord) const if (!validateLetters(iWord)) return false; - if (getHeader().getVersion() == 0) - { - const DicEdgeOld *e = - seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot())); - return e->term; - } - else - { - const DicEdge *e = - seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot())); - return e->term; - } + const DicEdge *e = seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot())); + return e->term; } @@ -93,9 +82,8 @@ struct params_7plus1_t char search_letters[63]; }; -template void Dictionary::searchWordByLen(struct params_7plus1_t *params, - int i, const DAWG_EDGE *edgeptr) const + int i, const DicEdge *edgeptr) const { /* depth first search in the dictionary */ do @@ -120,7 +108,7 @@ void Dictionary::searchWordByLen(struct params_7plus1_t *params, } else { - searchWordByLen(params, i + 1, getEdgeAt(edgeptr->ptr)); + searchWordByLen(params, i + 1, getEdgeAt(edgeptr->ptr)); } params->search_letters[edgeptr->chr] ++; params->search_wordtst[i] = L'\0'; @@ -143,7 +131,7 @@ void Dictionary::searchWordByLen(struct params_7plus1_t *params, } else { - searchWordByLen(params, i + 1, getEdgeAt(edgeptr->ptr)); + searchWordByLen(params, i + 1, getEdgeAt(edgeptr->ptr)); } params->search_letters[0] ++; params->search_wordtst[i] = L'\0'; @@ -153,10 +141,9 @@ void Dictionary::searchWordByLen(struct params_7plus1_t *params, } -template -void Dictionary::search7pl1Templ(const wstring &iRack, - map > &oWordList, - bool joker) const +void Dictionary::search7pl1(const wstring &iRack, + map > &oWordList, + bool joker) const { if (iRack == L"" || iRack.size() > DIC_WORD_MAX) return; @@ -196,8 +183,8 @@ void Dictionary::search7pl1Templ(const wstring &iRack, if (wordlen < 1) return; - const DAWG_EDGE *root_edge = getEdgeAt(getRoot()); - root_edge = getEdgeAt(root_edge->ptr); + const DicEdge *root_edge = getEdgeAt(getRoot()); + root_edge = getEdgeAt(root_edge->ptr); params.results = &oWordList; @@ -223,23 +210,12 @@ void Dictionary::search7pl1Templ(const wstring &iRack, } } - -void Dictionary::search7pl1(const wstring &iRack, - map > &oWordList, - bool joker) const -{ - if (getHeader().getVersion() == 0) - search7pl1Templ(iRack, oWordList, joker); - else - search7pl1Templ(iRack, oWordList, joker); -} - /****************************************/ /****************************************/ -template -void Dictionary::searchRaccTempl(const wstring &iWord, vector &oWordList, - unsigned int iMaxResults) const +void Dictionary::searchRacc(const wstring &iWord, + vector &oWordList, + unsigned int iMaxResults) const { if (iWord == L"") return; @@ -271,13 +247,13 @@ void Dictionary::searchRaccTempl(const wstring &iWord, vector &oWordLis wordtst[i ] = '\0'; wordtst[i+1] = '\0'; - const DAWG_EDGE *edge_seek = - seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot())); + const DicEdge *edge_seek = + seekEdgePtr(iWord.c_str(), getEdgeAt(getRoot())); /* points to what the next letter can be */ - const DAWG_EDGE *edge = getEdgeAt(edge_seek->ptr); + const DicEdge *edge = getEdgeAt(edge_seek->ptr); - if (edge != getEdgeAt(0)) + if (edge != getEdgeAt(0)) { do { @@ -292,21 +268,11 @@ void Dictionary::searchRaccTempl(const wstring &iWord, vector &oWordLis } } - -void Dictionary::searchRacc(const wstring &iWord, vector &oWordList, unsigned int iMaxResults) const -{ - if (getHeader().getVersion() == 0) - searchRaccTempl(iWord, oWordList, iMaxResults); - else - searchRaccTempl(iWord, oWordList, iMaxResults); -} - /****************************************/ /****************************************/ -template -void Dictionary::searchBenjTempl(const wstring &iWord, vector &oWordList, - unsigned int iMaxResults) const +void Dictionary::searchBenj(const wstring &iWord, vector &oWordList, + unsigned int iMaxResults) const { if (iWord == L"") return; @@ -319,17 +285,17 @@ void Dictionary::searchBenjTempl(const wstring &iWord, vector &oWordLis wchar_t wordtst[DIC_WORD_MAX]; wcscpy(wordtst + 3, iWord.c_str()); - const DAWG_EDGE *edge0, *edge1, *edge2, *edgetst; - edge0 = getEdgeAt(getRoot()); - edge0 = getEdgeAt(edge0->ptr); + const DicEdge *edge0, *edge1, *edge2, *edgetst; + edge0 = getEdgeAt(getRoot()); + edge0 = getEdgeAt(edge0->ptr); do { wordtst[0] = getHeader().getCharFromCode(edge0->chr); - edge1 = getEdgeAt(edge0->ptr); + edge1 = getEdgeAt(edge0->ptr); do { wordtst[1] = getHeader().getCharFromCode(edge1->chr); - edge2 = getEdgeAt(edge1->ptr); + edge2 = getEdgeAt(edge1->ptr); do { edgetst = seekEdgePtr(iWord.c_str(), edge2); @@ -345,16 +311,6 @@ void Dictionary::searchBenjTempl(const wstring &iWord, vector &oWordLis } while (!(*edge0++).last); } - -void Dictionary::searchBenj(const wstring &iWord, vector &oWordList, - unsigned int iMaxResults) const -{ - if (getHeader().getVersion() == 0) - searchBenjTempl(iWord, oWordList, iMaxResults); - else - searchBenjTempl(iWord, oWordList, iMaxResults); -} - /****************************************/ /****************************************/ @@ -365,16 +321,15 @@ struct params_cross_t }; -template -void Dictionary::searchCrossRecTempl(struct params_cross_t *params, - vector &oWordList, - const DAWG_EDGE *edgeptr, - unsigned int iMaxResults) const +void Dictionary::searchCrossRec(struct params_cross_t *params, + vector &oWordList, + const DicEdge *edgeptr, + unsigned int iMaxResults) const { if (iMaxResults && oWordList.size() >= iMaxResults) return; - const DAWG_EDGE *current = getEdgeAt(edgeptr->ptr); + const DicEdge *current = getEdgeAt(edgeptr->ptr); if (params->mask[params->wordlen] == '\0') { @@ -392,7 +347,7 @@ void Dictionary::searchCrossRecTempl(struct params_cross_t *params, { params->mask[params->wordlen] = getHeader().getCharFromCode(current->chr); params->wordlen ++; - searchCrossRecTempl(params, oWordList, current, iMaxResults); + searchCrossRec(params, oWordList, current, iMaxResults); params->wordlen --; params->mask[params->wordlen] = '.'; } @@ -405,7 +360,7 @@ void Dictionary::searchCrossRecTempl(struct params_cross_t *params, if (current->chr == getHeader().getCodeFromChar(params->mask[params->wordlen])) { params->wordlen ++; - searchCrossRecTempl(params, oWordList, current, iMaxResults); + searchCrossRec(params, oWordList, current, iMaxResults); params->wordlen --; break; } @@ -440,16 +395,7 @@ void Dictionary::searchCross(const wstring &iMask, vector &oWordList, params.mask[i] = '\0'; params.wordlen = 0; - if (getHeader().getVersion() == 0) - { - searchCrossRecTempl(¶ms, oWordList, - getEdgeAt(getRoot()), iMaxResults); - } - else - { - searchCrossRecTempl(¶ms, oWordList, - getEdgeAt(getRoot()), iMaxResults); - } + searchCrossRec(¶ms, oWordList, getEdgeAt(getRoot()), iMaxResults); } /****************************************/ @@ -461,16 +407,15 @@ struct params_regexp_t int maxlength; Automaton *automaton_field; wchar_t word[DIC_WORD_MAX]; - int wordlen; + int wordlen; }; -template -void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params, - int state, - const DAWG_EDGE *edgeptr, - vector &oWordList, - unsigned int iMaxResults) const +void Dictionary::searchRegexpRec(struct params_regexp_t *params, + int state, + const DicEdge *edgeptr, + vector &oWordList, + unsigned int iMaxResults) const { if (iMaxResults && oWordList.size() >= iMaxResults) return; @@ -487,7 +432,7 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params, } } /* we now drive the search by exploring the dictionary */ - const DAWG_EDGE *current = getEdgeAt(edgeptr->ptr); + const DicEdge *current = getEdgeAt(edgeptr->ptr); do { /* the current letter is current->chr */ @@ -498,7 +443,7 @@ void Dictionary::searchRegexpRecTempl(struct params_regexp_t *params, params->word[params->wordlen] = getHeader().getCharFromCode(current->chr); params->wordlen ++; - searchRegexpRecTempl(params, next_state, current, oWordList, iMaxResults); + searchRegexpRec(params, next_state, current, oWordList, iMaxResults); params->wordlen --; params->word[params->wordlen] = L'\0'; } @@ -599,19 +544,9 @@ bool Dictionary::searchRegExp(const wstring &iRegexp, params.automaton_field = a; memset(params.word, L'\0', sizeof(params.word)); params.wordlen = 0; - if (getHeader().getVersion() == 0) - { - searchRegexpRecTempl(¶ms, a->getInitId(), - getEdgeAt(getRoot()), oWordList, - iMaxResults ? iMaxResults + 1 : 0); - } - else - { - searchRegexpRecTempl(¶ms, a->getInitId(), - getEdgeAt(getRoot()), oWordList, - iMaxResults ? iMaxResults + 1 : 0); - } - + searchRegexpRec(¶ms, a->getInitId(), + getEdgeAt(getRoot()), oWordList, + iMaxResults ? iMaxResults + 1 : 0); delete a; } delete root; diff --git a/dic/grammar.cpp b/dic/grammar.cpp index 441ca56..3bba9ce 100644 --- a/dic/grammar.cpp +++ b/dic/grammar.cpp @@ -36,9 +36,6 @@ using namespace boost::spirit; using namespace std; -// TODO: -// - error handling - // A few typedefs to simplify things typedef const wchar_t *iterator_t; typedef tree_match parse_tree_match_t; diff --git a/dic/header.cpp b/dic/header.cpp index 2b83813..3326628 100644 --- a/dic/header.cpp +++ b/dic/header.cpp @@ -285,134 +285,78 @@ void Header::read(istream &iStream) m_version = aHeader.version; + // Version 0 corresponds to the dictionary format in the first Eliot + // versions, supported until Eliot 1.8 (excluded). + // The new version (version 1) was introduced in Eliot 1.6. + if (m_version == 0) + { + throw DicException(_("Too old dictionary format. This format is not " + "supported anymore since Eliot 1.8. You can " + "create dictionaries in the new format with the " + "'compdic' tool provided with Eliot (since " + "version 1.6).")); + } + // Handle endianness - if (m_version == 0) - { -#if defined(WORDS_BIGENDIAN) - aHeader.root = swap4(aHeader.root); - aHeader.nwords = swap4(aHeader.nwords); - aHeader.nodesused = swap4(aHeader.nodesused); - aHeader.edgesused = swap4(aHeader.edgesused); - aHeader.nodessaved = swap4(aHeader.nodessaved); - aHeader.edgessaved = swap4(aHeader.edgessaved); -#endif - m_root = aHeader.root; - m_nbWords = aHeader.nwords; - m_nodesUsed = aHeader.nodesused; - m_edgesUsed = aHeader.edgesused; - m_nodesSaved = aHeader.nodessaved; - m_edgesSaved = aHeader.edgessaved; - } + m_root = ntohl(aHeader.root); + m_nbWords = ntohl(aHeader.nwords); + m_nodesUsed = ntohl(aHeader.nodesused); + m_edgesUsed = ntohl(aHeader.edgesused); + m_nodesSaved = ntohl(aHeader.nodessaved); + m_edgesSaved = ntohl(aHeader.edgessaved); + + // After reading the old header, we now read the extension + Dict_header_ext aHeaderExt; + iStream.read((char*)&aHeaderExt, sizeof(Dict_header_ext)); + if (iStream.gcount() != sizeof(Dict_header_ext)) + throw DicException("Header::read: expected to read more bytes"); + + // Handle endianness in the extension + aHeaderExt.compressDate = ntohll(aHeaderExt.compressDate); + aHeaderExt.userHostSize = ntohl(aHeaderExt.userHostSize); + aHeaderExt.dicNameSize = ntohl(aHeaderExt.dicNameSize); + aHeaderExt.lettersSize = ntohl(aHeaderExt.lettersSize); + aHeaderExt.nbLetters = ntohl(aHeaderExt.nbLetters); + aHeaderExt.vowels = ntohll(aHeaderExt.vowels); + aHeaderExt.consonants = ntohll(aHeaderExt.consonants); + + m_compressDate = aHeaderExt.compressDate; + + if (aHeaderExt.algorithm == kDAWG) + m_type = kDAWG; + else if (aHeaderExt.algorithm == kGADDAG) + m_type = kGADDAG; else + throw DicException("Header::read: unrecognized algorithm type"); + + m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize, + "user and host information"); + + // Convert the dictionary letters from UTF-8 to wchar_t* + m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize, + "dictionary name"); + + // Convert the dictionary letters from UTF-8 to wchar_t* + m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize, + "dictionary letters"); + // Safety check: correct number of letters? + if (m_letters.size() != aHeaderExt.nbLetters) { - m_root = ntohl(aHeader.root); - m_nbWords = ntohl(aHeader.nwords); - m_nodesUsed = ntohl(aHeader.nodesused); - m_edgesUsed = ntohl(aHeader.edgesused); - m_nodesSaved = ntohl(aHeader.nodessaved); - m_edgesSaved = ntohl(aHeader.edgessaved); + throw DicException("Header::read: inconsistent header"); } - if (m_version == 0) + // Letters points and frequency + for (unsigned int i = 0; i < m_letters.size(); ++i) { - m_compressDate = 0; - m_userHost = convertToWc(_("Unknown (old format)")); - m_dicName = convertToWc(_("Unknown (old format)")); - - // In version 0, the letters, points, frequency, - // vowels and consonants were hard-coded... - m_letters = convertToWc("ABCDEFGHIJKLMNOPQRSTUVWXYZ?"); - - static const uint8_t Frenchpoints[] = - { - // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ? - 1,3,3,2, 1,4,2,4,1,8,10,1,2,1,1,3,8,1,1,1,1,4,10,10,10,10,0 - }; - - static const uint8_t FrenchFrequency[] = - { - // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ? - 9,2,2,3,15,2,2,2,8,1, 1,5,3,6,6,2,1,6,6,6,6,2, 1, 1, 1, 1,2 - }; - - // The jokers and the 'Y' can be considered both as vowels or consonants - static const uint8_t FrenchVowels[] = - { - // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ? - 1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0,1 - }; - - static const uint8_t FrenchConsonants[] = - { - // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ? - 0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1,1 - }; - - for (unsigned int i = 0; i < m_letters.size(); ++i) - { - m_points.push_back(Frenchpoints[i]); - m_frequency.push_back(FrenchFrequency[i]); - m_vowels.push_back(FrenchVowels[i]); - m_consonants.push_back(FrenchConsonants[i]); - } + m_points.push_back(aHeaderExt.points[i]); + m_frequency.push_back(aHeaderExt.frequency[i]); } - else + + // Vowels and consonants + for (unsigned int i = 0; i < m_letters.size(); ++i) { - // This header doesn't use the old serialization format, so read the - // extension as well - Dict_header_ext aHeaderExt; - iStream.read((char*)&aHeaderExt, sizeof(Dict_header_ext)); - if (iStream.gcount() != sizeof(Dict_header_ext)) - throw DicException("Header::read: expected to read more bytes"); - - // Handle endianness in the extension - aHeaderExt.compressDate = ntohll(aHeaderExt.compressDate); - aHeaderExt.userHostSize = ntohl(aHeaderExt.userHostSize); - aHeaderExt.dicNameSize = ntohl(aHeaderExt.dicNameSize); - aHeaderExt.lettersSize = ntohl(aHeaderExt.lettersSize); - aHeaderExt.nbLetters = ntohl(aHeaderExt.nbLetters); - aHeaderExt.vowels = ntohll(aHeaderExt.vowels); - aHeaderExt.consonants = ntohll(aHeaderExt.consonants); - - m_compressDate = aHeaderExt.compressDate; - - if (aHeaderExt.algorithm == kDAWG) - m_type = kDAWG; - else if (aHeaderExt.algorithm == kGADDAG) - m_type = kGADDAG; - else - throw DicException("Header::read: unrecognized algorithm type"); - - m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize, - "user and host information"); - - // Convert the dictionary letters from UTF-8 to wchar_t* - m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize, - "dictionary name"); - - // Convert the dictionary letters from UTF-8 to wchar_t* - m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize, - "dictionary letters"); - // Safety check: correct number of letters? - if (m_letters.size() != aHeaderExt.nbLetters) - { - throw DicException("Header::read: inconsistent header"); - } - - // Letters points and frequency - for (unsigned int i = 0; i < m_letters.size(); ++i) - { - m_points.push_back(aHeaderExt.points[i]); - m_frequency.push_back(aHeaderExt.frequency[i]); - } - - // Vowels and consonants - for (unsigned int i = 0; i < m_letters.size(); ++i) - { - m_vowels.push_back(aHeaderExt.vowels & (1 << i)); - m_consonants.push_back(aHeaderExt.consonants & (1 << i)); - } - + m_vowels.push_back(aHeaderExt.vowels & (1 << i)); + m_consonants.push_back(aHeaderExt.consonants & (1 << i)); } } @@ -434,81 +378,70 @@ void Header::write(ostream &oStream) const if (!oStream.good()) throw DicException("Header::write: error when writing to file"); - if (m_version != 0) + Dict_header_ext aHeaderExt; + aHeaderExt.compressDate = m_compressDate; + aHeaderExt.userHostSize = + writeInUTF8(m_userHost, aHeaderExt.userHost, + _MAX_USER_HOST_, "user and host information"); + aHeaderExt.algorithm = m_type; + + // Convert the dictionary name to UTF-8 + aHeaderExt.dicNameSize = + writeInUTF8(m_dicName, aHeaderExt.dicName, + _MAX_DIC_NAME_SIZE_, "dictionary name"); + + // Convert the dictionary letters to UTF-8 + aHeaderExt.lettersSize = + writeInUTF8(m_letters, aHeaderExt.letters, + _MAX_LETTERS_SIZE_, "dictionary letters"); + aHeaderExt.nbLetters = (uint32_t)m_letters.size(); + + // Letters points and frequency + for (unsigned int i = 0; i < m_letters.size(); ++i) { - Dict_header_ext aHeaderExt; - aHeaderExt.compressDate = m_compressDate; - aHeaderExt.userHostSize = - writeInUTF8(m_userHost, aHeaderExt.userHost, - _MAX_USER_HOST_, "user and host information"); - aHeaderExt.algorithm = m_type; - - // Convert the dictionary name to UTF-8 - aHeaderExt.dicNameSize = - writeInUTF8(m_dicName, aHeaderExt.dicName, - _MAX_DIC_NAME_SIZE_, "dictionary name"); - - // Convert the dictionary letters to UTF-8 - aHeaderExt.lettersSize = - writeInUTF8(m_letters, aHeaderExt.letters, - _MAX_LETTERS_SIZE_, "dictionary letters"); - aHeaderExt.nbLetters = (uint32_t)m_letters.size(); - - // Letters points and frequency - for (unsigned int i = 0; i < m_letters.size(); ++i) - { - aHeaderExt.points[i] = m_points[i]; - aHeaderExt.frequency[i] = m_frequency[i]; - } - - // Vowels and consonants - aHeaderExt.vowels = 0; - aHeaderExt.consonants = 0; - for (unsigned int i = 0; i < m_letters.size(); ++i) - { - if (m_vowels[i]) - aHeaderExt.vowels |= 1 << i; - if (m_consonants[i]) - aHeaderExt.consonants |= 1 << i; - } - - // Handle endianness in the extension - aHeaderExt.userHostSize = htonl(aHeaderExt.userHostSize); - aHeaderExt.compressDate = htonll(aHeaderExt.compressDate); - aHeaderExt.dicNameSize = htonl(aHeaderExt.dicNameSize); - aHeaderExt.lettersSize = htonl(aHeaderExt.lettersSize); - aHeaderExt.nbLetters = htonl(aHeaderExt.nbLetters); - aHeaderExt.vowels = htonll(aHeaderExt.vowels); - aHeaderExt.consonants = htonll(aHeaderExt.consonants); - - // Write the extension - oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext)); - if (!oStream.good()) - throw DicException("Header::write: error when writing to file"); + aHeaderExt.points[i] = m_points[i]; + aHeaderExt.frequency[i] = m_frequency[i]; } + + // Vowels and consonants + aHeaderExt.vowels = 0; + aHeaderExt.consonants = 0; + for (unsigned int i = 0; i < m_letters.size(); ++i) + { + if (m_vowels[i]) + aHeaderExt.vowels |= 1 << i; + if (m_consonants[i]) + aHeaderExt.consonants |= 1 << i; + } + + // Handle endianness in the extension + aHeaderExt.userHostSize = htonl(aHeaderExt.userHostSize); + aHeaderExt.compressDate = htonll(aHeaderExt.compressDate); + aHeaderExt.dicNameSize = htonl(aHeaderExt.dicNameSize); + aHeaderExt.lettersSize = htonl(aHeaderExt.lettersSize); + aHeaderExt.nbLetters = htonl(aHeaderExt.nbLetters); + aHeaderExt.vowels = htonll(aHeaderExt.vowels); + aHeaderExt.consonants = htonll(aHeaderExt.consonants); + + // Write the extension + oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext)); + if (!oStream.good()) + throw DicException("Header::write: error when writing to file"); } void Header::print() const { printf(_("dictionary name: %s\n"), convertToMb(m_dicName).c_str()); - if (m_version) - { - char buf[50]; - strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate)); - printf(_("compressed on: %s\n"), buf); - } - else - { - printf(_("compressed on: Unknown date (old format)\n")); - } + char buf[50]; + strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate)); + printf(_("compressed on: %s\n"), buf); printf(_("compressed using a binary compiled by: %s\n"), convertToMb(m_userHost).c_str()); printf(_("dictionary type: %s\n"), m_type == kDAWG ? "DAWG" : "GADDAG"); printf(_("letters: %s\n"), convertToMb(m_letters).c_str()); printf(_("number of letters: %lu\n"), (long unsigned int)m_letters.size()); printf(_("number of words: %d\n"), m_nbWords); - long unsigned int size = - sizeof(Dict_header_old) + (m_version ? sizeof(Dict_header_ext) : 0); + long unsigned int size = sizeof(Dict_header_old) + sizeof(Dict_header_ext); printf(_("header size: %lu bytes\n"), size); printf(_("root: %d (edge)\n"), m_root); printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved); diff --git a/dic/listdic.cpp b/dic/listdic.cpp index bc2b406..a6fb43a 100644 --- a/dic/listdic.cpp +++ b/dic/listdic.cpp @@ -46,8 +46,7 @@ using namespace std; -template -static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wchar_t *s, DAWG_EDGE i) +static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wchar_t *s, DicEdge i) { if (i.term) /* edge points at a complete word */ { @@ -56,7 +55,7 @@ static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wc } if (i.ptr) { /* Compute index: is it non-zero ? */ - const DAWG_EDGE *p = reinterpret_cast(iDic.getEdgeAt(i.ptr)); + const DicEdge *p = iDic.getEdgeAt(i.ptr); do { /* for each edge out of this node */ *s = iDic.getHeader().getCharFromCode(p->chr); @@ -67,24 +66,22 @@ static void print_dic_rec(ostream &out, const Dictionary &iDic, wchar_t *buf, wc } -template void print_dic_list(const Dictionary &iDic) { static wchar_t buf[80]; - print_dic_rec(cout, iDic, buf, buf, *reinterpret_cast(iDic.getEdgeAt(iDic.getRoot()))); + print_dic_rec(cout, iDic, buf, buf, *iDic.getEdgeAt(iDic.getRoot())); } -template static void print_node_hex(const Dictionary &dic, int i) { union edge_t { - DAWG_EDGE e; + DicEdge e; uint32_t s; } ee; - ee.e = *reinterpret_cast(dic.getEdgeAt(i)); + ee.e = *reinterpret_cast(dic.getEdgeAt(i)); printf("0x%04lx %08x |%4d ptr=%8d t=%d l=%d chr=%2d (%c)\n", (unsigned long)i*sizeof(ee), (unsigned int)(ee.s), @@ -92,13 +89,12 @@ static void print_node_hex(const Dictionary &dic, int i) } -template void print_dic_hex(const Dictionary &iDic) { printf(_("offset binary | structure\n")); printf("------ -------- | --------------------\n"); for (unsigned int i = 0; i < (iDic.getHeader().getNbEdgesUsed() + 1); i++) - print_node_hex(iDic, i); + print_node_hex(iDic, i); } @@ -172,17 +168,11 @@ int main(int argc, char *argv[]) } if (option_print_dic_hex || option_print_all) { - if (dic.getHeader().getVersion() == 0) - print_dic_hex(dic); - else - print_dic_hex(dic); + print_dic_hex(dic); } if (option_print_dic_list || option_print_all) { - if (dic.getHeader().getVersion() == 0) - print_dic_list(dic); - else - print_dic_list(dic); + print_dic_list(dic); } return 0; } diff --git a/qt/main_window.cpp b/qt/main_window.cpp index ea8a615..5cbb9cb 100644 --- a/qt/main_window.cpp +++ b/qt/main_window.cpp @@ -230,21 +230,9 @@ void MainWindow::updateStatusBar(const Dictionary *iDic) if (iDic == NULL) m_dicNameLabel->setText("No dictionary"); else { - if (iDic->getHeader().getVersion() != 0) - { - QString dicName = qfw(m_dic->getHeader().getName()); - m_dicNameLabel->setText(_q("Dictionary: %1").arg(dicName)); - m_dicNameLabel->setToolTip(""); - } - else - { - m_dicNameLabel->setText(_q("Dictionary: Unknown (old format)")); - QString warning = _q("The dictionary name cannot be " - "retrieved, because you are using an old dictionary format.\n" - "You can probably download a newer version of the dictionary " - "on http://www.nongnu.org/eliot/"); - m_dicNameLabel->setToolTip(warning); - } + QString dicName = qfw(m_dic->getHeader().getName()); + m_dicNameLabel->setText(_q("Dictionary: %1").arg(dicName)); + m_dicNameLabel->setToolTip(""); } }