2008-01-08 14:52:32 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
* Eliot
|
|
|
|
* Copyright (C) 2002-2007 Antoine Fraboulet & Olivier Teulière
|
|
|
|
* Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
|
|
|
|
* Olivier Teulière <ipkiss @@ gmail.com>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*****************************************************************************/
|
2004-04-08 11:43:06 +02:00
|
|
|
|
|
|
|
#ifndef _DIC_H_
|
|
|
|
#define _DIC_H_
|
2005-11-04 21:00:05 +01:00
|
|
|
|
2008-01-08 14:52:32 +01:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <map>
|
|
|
|
|
|
|
|
#include "tile.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
2005-11-04 21:00:05 +01:00
|
|
|
/**
|
|
|
|
* max length of words (including last \0)
|
|
|
|
*/
|
2005-04-19 18:26:50 +02:00
|
|
|
#define DIC_WORD_MAX 16
|
|
|
|
|
2008-01-08 14:52:32 +01:00
|
|
|
class Header;
|
2005-04-20 20:11:00 +02:00
|
|
|
typedef unsigned int dic_elt_t;
|
2006-01-01 20:51:00 +01:00
|
|
|
typedef unsigned char dic_code_t;
|
2008-01-08 14:52:32 +01:00
|
|
|
struct params_cross_t;
|
|
|
|
struct params_7plus1_t;
|
|
|
|
struct params_regexp_t;
|
2008-11-22 14:11:48 +01:00
|
|
|
class DicEdge;
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2009-06-23 14:41:53 +02:00
|
|
|
/**
|
|
|
|
* A wdstring is a display string, i.e. it can contains more chars thani
|
|
|
|
* the represented string. The difference arises in languages such as Catalan,
|
|
|
|
* where for example "QU" is made of 2 real characters, but corresponds to a
|
|
|
|
* single tile.
|
|
|
|
*
|
|
|
|
* The wdstring type has no particular interest other than signaling
|
|
|
|
* a bit more precisely the type of contents of the string.
|
|
|
|
*/
|
|
|
|
typedef wstring wdstring;
|
|
|
|
|
2008-01-08 14:52:32 +01:00
|
|
|
class Dictionary
|
|
|
|
{
|
|
|
|
public:
|
2006-04-16 13:27:19 +02:00
|
|
|
/**
|
2008-01-08 14:52:32 +01:00
|
|
|
* Dictionary creation and loading from a file
|
|
|
|
* @param path: compressed dictionary path
|
2006-04-16 13:27:19 +02:00
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
Dictionary(const string &path);
|
|
|
|
|
|
|
|
/// Destructor
|
|
|
|
~Dictionary();
|
2006-04-16 13:27:19 +02:00
|
|
|
|
2004-04-08 11:43:06 +02:00
|
|
|
/**
|
2008-01-08 14:52:32 +01:00
|
|
|
* Return the current instance of the dictionary object
|
|
|
|
* XXX: This is ugly, but I don't see any clean way apart from carrying
|
|
|
|
* a reference to a Dictionary object in many places...
|
|
|
|
* Other more or less ugly options:
|
|
|
|
* - Make the dictionary a singleton (2 dictionaries cannot coexist...)
|
|
|
|
* - Make many classes inherit from a common base class with a dictionary
|
|
|
|
* member (possibly bad for performances)
|
|
|
|
* A new created dictionary replaces the previous instance, even if the
|
|
|
|
* previous instance is not destroyed yet
|
|
|
|
* If no dictionary object is instanciated when this method is called,
|
|
|
|
* it will probably crash...
|
2004-04-08 11:43:06 +02:00
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
static const Dictionary& GetDic() { return *m_dic; }
|
|
|
|
|
|
|
|
/** Give access to the dictionary header */
|
|
|
|
const Header& getHeader() const { return *m_header; }
|
2004-04-08 11:43:06 +02:00
|
|
|
|
|
|
|
/**
|
2008-01-08 14:52:32 +01:00
|
|
|
* Check whether all the given letters are present in the dictionary,
|
|
|
|
* or are one of the other accepted letters.
|
|
|
|
* Return true if this is the case, false otherwise
|
2004-04-08 11:43:06 +02:00
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
bool validateLetters(const wstring &iLetters,
|
|
|
|
const wstring &iAccepted = L"") const;
|
|
|
|
|
|
|
|
/** Return a vector containing one of each possible tile */
|
|
|
|
const vector<Tile>& getAllTiles() const { return m_tilesVect; }
|
|
|
|
|
|
|
|
/** Return the number of different tiles (including the joker) */
|
|
|
|
unsigned int getTileNumber() const { return m_tilesVect.size(); }
|
|
|
|
|
|
|
|
/** Return a tile from its code */
|
|
|
|
const Tile &getTileFromCode(unsigned int iCode) const { return m_tilesVect[iCode - 1]; }
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
/**
|
2008-01-08 14:52:32 +01:00
|
|
|
* Returns the character code associated with an element,
|
2008-07-07 19:29:59 +02:00
|
|
|
* codes may range from 0 to 63. 0 is the null character.
|
2006-01-01 20:51:00 +01:00
|
|
|
* @returns code for the encoded character
|
2005-04-19 18:26:50 +02:00
|
|
|
*/
|
2008-07-27 15:32:47 +02:00
|
|
|
dic_code_t getCode(const dic_elt_t &elt) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the wide character associated with an element.
|
|
|
|
* @returns wide character for the element
|
|
|
|
*/
|
|
|
|
wchar_t getChar(const dic_elt_t &elt) const;
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2005-04-20 20:11:00 +02:00
|
|
|
/**
|
|
|
|
* Returns a boolean to show if there is another available
|
|
|
|
* character in the current depth (a neighbor in the tree)
|
2008-07-07 19:29:59 +02:00
|
|
|
* @return true if the character is the last one at the current depth
|
2005-04-20 20:11:00 +02:00
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
bool isLast(const dic_elt_t &elt) const;
|
2005-04-20 20:11:00 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a boolean to show if we are at the end of a word
|
2008-07-07 19:29:59 +02:00
|
|
|
* (see getNext())
|
|
|
|
* @return true if this is the end of a word
|
2005-04-20 20:11:00 +02:00
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
bool isEndOfWord(const dic_elt_t &elt) const;
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2005-11-04 21:00:05 +01:00
|
|
|
/**
|
2005-04-20 20:11:00 +02:00
|
|
|
* Returns the root of the dictionary
|
|
|
|
* @returns root element
|
|
|
|
*/
|
2008-07-27 15:32:47 +02:00
|
|
|
dic_elt_t getRoot() const;
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2005-04-20 20:11:00 +02:00
|
|
|
/**
|
2008-07-07 19:29:59 +02:00
|
|
|
* Returns the next available neighbor (see isLast())
|
2005-04-20 20:11:00 +02:00
|
|
|
* @returns next dictionary element at the same depth
|
|
|
|
*/
|
2008-07-27 15:32:47 +02:00
|
|
|
dic_elt_t getNext(const dic_elt_t &elt) const;
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
/**
|
2005-04-20 20:11:00 +02:00
|
|
|
* Returns the first element available at the next depth
|
|
|
|
* in the dictionary
|
|
|
|
* @params elt : current dictionary element
|
|
|
|
* @returns next element (successor)
|
2005-04-19 18:26:50 +02:00
|
|
|
*/
|
2008-07-27 15:32:47 +02:00
|
|
|
dic_elt_t getSucc(const dic_elt_t &elt) const;
|
2004-04-08 11:43:06 +02:00
|
|
|
|
2006-01-01 20:51:00 +01:00
|
|
|
/**
|
|
|
|
* Find the dictionary element matching the pattern starting
|
|
|
|
* from the given root node by walking the dictionary tree
|
|
|
|
* @params root : starting dictionary node for the search
|
|
|
|
* @params pattern : string encoded according to the dictionary codes,
|
|
|
|
* the pattern must be null ('\0') terminated
|
|
|
|
* @returns 0 if the string cannot be matched otherwise returns the
|
|
|
|
* element that results from walking the dictionary according to the
|
|
|
|
* pattern
|
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
unsigned int lookup(const dic_elt_t &root, const dic_code_t *pattern) const;
|
2006-01-01 20:51:00 +01:00
|
|
|
|
2005-04-20 20:11:00 +02:00
|
|
|
/**
|
|
|
|
* Find the dictionary element matching the pattern starting
|
|
|
|
* from the given root node by walking the dictionary tree
|
|
|
|
* @params root : starting dictionary node for the search
|
2005-12-24 19:42:44 +01:00
|
|
|
* @params pattern : string made of uppercase characters in the range
|
|
|
|
* ['A'-'Z']. The pattern must be null ('\0') terminated
|
2005-11-04 21:00:05 +01:00
|
|
|
* @returns 0 if the string cannot be matched otherwise returns the
|
2005-04-20 20:11:00 +02:00
|
|
|
* element that results from walking the dictionary according to the
|
|
|
|
* pattern
|
|
|
|
*/
|
2008-01-08 14:52:32 +01:00
|
|
|
unsigned int charLookup(const dic_elt_t &iRoot, const wchar_t *iPattern) const;
|
|
|
|
|
|
|
|
/// Getter for the edge at the given position
|
2008-11-22 14:11:48 +01:00
|
|
|
const DicEdge * getEdgeAt(const dic_elt_t &iElt) const
|
|
|
|
{
|
|
|
|
return reinterpret_cast<const DicEdge*>(m_dawg + iElt);
|
|
|
|
}
|
|
|
|
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Search for a word in the dictionary
|
|
|
|
* @param iWord: lookup word
|
|
|
|
* @return true if the word is valid, false otherwise
|
|
|
|
*/
|
|
|
|
bool searchWord(const wstring &iWord) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Search for benjamins
|
|
|
|
* @param iWord: letters
|
|
|
|
* @param oWordList: results
|
2008-03-02 19:45:10 +01:00
|
|
|
* @param iMaxResults: maximum number of returned results (0 means no limit)
|
2008-01-08 14:52:32 +01:00
|
|
|
*/
|
2009-06-23 14:41:53 +02:00
|
|
|
void searchBenj(const wstring &iWord, vector<wdstring> &oWordList,
|
2008-03-02 19:45:10 +01:00
|
|
|
unsigned int iMaxResults = 0) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Search for all words feasible by adding a letter in front or at the end
|
|
|
|
* @param iWord: word
|
|
|
|
* @param oWordList: results
|
2008-03-02 19:45:10 +01:00
|
|
|
* @param iMaxResults: maximum number of returned results (0 means no limit)
|
2008-01-08 14:52:32 +01:00
|
|
|
*/
|
2009-06-23 14:41:53 +02:00
|
|
|
void searchRacc(const wstring &iWord, vector<wdstring> &oWordList,
|
2008-03-02 19:45:10 +01:00
|
|
|
unsigned int iMaxResults = 0) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Search for all feasible word with "rack" plus one letter
|
|
|
|
* @param iRack: letters
|
2009-06-23 14:41:53 +02:00
|
|
|
* @param oWordlist: results (grouped by added character)
|
2008-01-08 14:52:32 +01:00
|
|
|
* @param joker: true if the search must be performed when a joker is in the rack
|
2008-03-02 19:45:10 +01:00
|
|
|
* @param iMaxResults: maximum number of returned results (0 means no limit)
|
2008-01-08 14:52:32 +01:00
|
|
|
*/
|
|
|
|
void search7pl1(const wstring &iRack,
|
2009-06-23 14:41:53 +02:00
|
|
|
map<wstring, vector<wdstring> > &oWordList,
|
2008-01-08 14:52:32 +01:00
|
|
|
bool joker) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Search for words matching a regular expression
|
|
|
|
* @param iRegexp: regular expression
|
|
|
|
* @param oWordList: results
|
|
|
|
* @param iList: parameters for the search (?)
|
2008-03-02 19:45:10 +01:00
|
|
|
* @param iMaxResults: maximum number of returned results (0 means no limit)
|
2008-08-27 23:20:03 +02:00
|
|
|
* @return true if all the matching words were returned, false otherwise
|
|
|
|
* (i.e. if the maximum number of results was reached, and there are
|
|
|
|
* additional results)
|
2008-08-31 13:48:11 +02:00
|
|
|
* @throw InvalidRegexpException When the regular expression cannot be parsed
|
2008-01-08 14:52:32 +01:00
|
|
|
*/
|
2008-08-27 23:20:03 +02:00
|
|
|
bool searchRegExp(const wstring &iRegexp,
|
2009-06-23 14:41:53 +02:00
|
|
|
vector<wdstring> &oWordList,
|
2008-07-27 15:32:47 +02:00
|
|
|
unsigned int iMinLength,
|
|
|
|
unsigned int iMaxLength,
|
2008-03-02 19:45:10 +01:00
|
|
|
unsigned int iMaxResults = 0) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
private:
|
|
|
|
// Prevent from copying the dictionary!
|
|
|
|
Dictionary &operator=(const Dictionary&);
|
|
|
|
Dictionary(const Dictionary&);
|
|
|
|
|
|
|
|
Header *m_header;
|
|
|
|
uint32_t *m_dawg;
|
|
|
|
|
|
|
|
/** Letters of the dictionary, both in uppercase and lowercase */
|
|
|
|
wstring m_allLetters;
|
|
|
|
|
|
|
|
/// Vector of available tiles
|
|
|
|
vector<Tile> m_tilesVect;
|
|
|
|
|
|
|
|
static const Dictionary *m_dic;
|
|
|
|
|
|
|
|
void convertDataToArch();
|
|
|
|
void initializeTiles();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Walk the dictionary until the end of the word
|
|
|
|
* @param s: current pointer to letters
|
|
|
|
* @param eptr: current edge in the dawg
|
|
|
|
*/
|
2008-11-22 14:11:48 +01:00
|
|
|
const DicEdge * seekEdgePtr(const wchar_t *s, const DicEdge *eptr) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
/// Helper for search7pl1()
|
2009-06-23 14:41:53 +02:00
|
|
|
void searchWordByLen(struct params_7plus1_t ¶ms,
|
2008-11-22 14:11:48 +01:00
|
|
|
int i, const DicEdge *edgeptr) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
/// Helper for searchRegExp()
|
2009-06-23 14:41:53 +02:00
|
|
|
void searchRegexpRec(const struct params_regexp_t ¶ms,
|
2008-11-22 14:11:48 +01:00
|
|
|
int state,
|
|
|
|
const DicEdge *edgeptr,
|
2009-06-23 14:41:53 +02:00
|
|
|
vector<wdstring> &oWordList,
|
|
|
|
unsigned int iMaxResults,
|
|
|
|
const wdstring &iCurrWord = L"",
|
|
|
|
unsigned int iNbChars = 0) const;
|
2008-01-08 14:52:32 +01:00
|
|
|
};
|
2004-08-07 20:10:42 +02:00
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
#endif /* _DIC_H_ */
|
2006-01-01 20:51:00 +01:00
|
|
|
|