2010-05-15 14:14:53 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
* Eliot
|
|
|
|
* Copyright (C) 1999-2007 Antoine Fraboulet & Olivier Teulière
|
|
|
|
* Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
|
|
|
|
* Olivier Teulière <ipkiss @@ gmail.com>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <vector>
|
|
|
|
#include <boost/format.hpp>
|
|
|
|
#include <boost/foreach.hpp>
|
|
|
|
#include <boost/tokenizer.hpp>
|
|
|
|
#include <getopt.h>
|
|
|
|
|
|
|
|
#if ENABLE_NLS
|
|
|
|
# include <libintl.h>
|
|
|
|
# define _(String) gettext(String)
|
|
|
|
#else
|
|
|
|
# define _(String) String
|
|
|
|
#endif
|
|
|
|
#ifdef WIN32
|
|
|
|
# include <windows.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "compdic.h"
|
|
|
|
#include "dic_exception.h"
|
|
|
|
#include "encoding.h"
|
|
|
|
#include "header.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
// Useful shortcut
|
|
|
|
#define fmt(a) boost::format(a)
|
|
|
|
|
|
|
|
|
|
|
|
void readLetters(const string &iFileName, CompDic &ioBuilder)
|
|
|
|
{
|
|
|
|
ifstream in(iFileName.c_str());
|
|
|
|
if (!in.is_open())
|
|
|
|
throw DicException((fmt(_("Could not open file '%1%'")) % iFileName).str());
|
|
|
|
|
|
|
|
// Use a more friendly type name
|
|
|
|
typedef boost::tokenizer<boost::char_separator<wchar_t>,
|
|
|
|
std::wstring::const_iterator,
|
|
|
|
std::wstring> Tokenizer;
|
|
|
|
|
|
|
|
int lineNb = 1;
|
|
|
|
string line;
|
|
|
|
while (getline(in, line))
|
|
|
|
{
|
|
|
|
// Ignore empty lines
|
|
|
|
if (line == "" || line == "\r" || line == "\n")
|
|
|
|
continue;
|
|
|
|
|
2011-07-16 23:13:42 +02:00
|
|
|
// If there is a BOM in the file, remove it from the first line
|
|
|
|
if (ioBuilder.getLettersCount() == 0 && line.size() >= 3 &&
|
|
|
|
(uint8_t)line[0] == 0xEF &&
|
|
|
|
(uint8_t)line[1] == 0xBB &&
|
|
|
|
(uint8_t)line[2] == 0xBF)
|
|
|
|
{
|
|
|
|
line = line.substr(3);
|
|
|
|
}
|
|
|
|
|
2010-05-15 14:14:53 +02:00
|
|
|
// Convert the line to a wstring
|
2011-07-30 21:46:47 +02:00
|
|
|
const wstring &wline = readFromUTF8(line, "readLetters (1)");
|
2010-05-15 14:14:53 +02:00
|
|
|
// Split the lines on space characters
|
|
|
|
boost::char_separator<wchar_t> sep(L" ");
|
|
|
|
Tokenizer tok(wline, sep);
|
|
|
|
Tokenizer::iterator it;
|
|
|
|
vector<wstring> tokens(tok.begin(), tok.end());
|
|
|
|
|
|
|
|
// We expect at least 5 fields on the line
|
|
|
|
if (tokens.size() < 5)
|
|
|
|
{
|
|
|
|
ostringstream ss;
|
|
|
|
ss << fmt(_("readLetters: Not enough fields "
|
|
|
|
"in %1% (line %2%)")) % iFileName % lineNb;
|
|
|
|
throw DicException(ss.str());
|
|
|
|
}
|
|
|
|
|
|
|
|
// The first field is a single character
|
|
|
|
wstring letter = tokens[0];
|
|
|
|
if (letter.size() != 1)
|
|
|
|
{
|
|
|
|
ostringstream ss;
|
|
|
|
ss << fmt(_("readLetters: Invalid letter at line %1% "
|
|
|
|
"(only one character allowed)")) % lineNb;
|
|
|
|
throw DicException(ss.str());
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<wstring> inputs;
|
|
|
|
if (tokens.size() > 5)
|
|
|
|
{
|
|
|
|
inputs = vector<wstring>(tokens.begin() + 5, tokens.end());
|
|
|
|
}
|
2010-05-20 22:27:47 +02:00
|
|
|
ioBuilder.addLetter(letter[0], wtoi(tokens[1].c_str()),
|
|
|
|
wtoi(tokens[2].c_str()), wtoi(tokens[3].c_str()),
|
|
|
|
wtoi(tokens[4].c_str()), inputs);
|
2010-05-15 14:14:53 +02:00
|
|
|
|
|
|
|
++lineNb;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void printUsage(const string &iBinaryName)
|
|
|
|
{
|
|
|
|
cout << "Usage: " << iBinaryName << " [options]" << endl
|
|
|
|
<< _("Mandatory options:") << endl
|
|
|
|
<< _(" -d, --dicname <string> Set the dictionary name and version") << endl
|
|
|
|
<< _(" -l, --letters <string> Path to the file containing the letters (see below)") << endl
|
|
|
|
<< _(" -i, --input <string> Path to the uncompressed dictionary file (encoded in UTF-8)") << endl
|
|
|
|
<< _(" The words must be in alphabetical order, without duplicates") << endl
|
2011-01-08 23:27:59 +01:00
|
|
|
<< _(" -o, --output <string> Path to the generated compressed dictionary file") << endl
|
2010-05-15 14:14:53 +02:00
|
|
|
<< _("Other options:") << endl
|
|
|
|
<< _(" -h, --help Print this help and exit") << endl
|
|
|
|
<< _("Example:") << endl
|
|
|
|
<< " " << iBinaryName << _(" -d 'ODS 5.0' -l letters.txt -i ods5.txt -o ods5.dawg") << endl
|
|
|
|
<< endl
|
|
|
|
<< _("The file containing the letters (--letters switch) must be UTF-8 encoded.") << endl
|
|
|
|
<< _("Each line corresponds to one letter, and must contain at least 5 fields separated with "
|
|
|
|
"one or more space(s).") << endl
|
|
|
|
<< _(" - 1st field: the letter itself, as stored in the input file (single character)") << endl
|
|
|
|
<< _(" - 2nd field: the points of the letter") << endl
|
|
|
|
<< _(" - 3rd field: the frequency of the letter (how many letters of this kind in the game)") << endl
|
|
|
|
<< _(" - 4th field: 1 if the letter is considered as a vowel in Scrabble game, 0 otherwise") << endl
|
|
|
|
<< _(" - 5th field: 1 if the letter is considered as a consonant in Scrabble game, 0 otherwise") << endl
|
|
|
|
<< _(" - 6th field (optional): display string for the letter (default: the letter itself)") << endl
|
|
|
|
<< _(" - other fields (optional): input strings for the letter, in addition to the display string") << endl
|
|
|
|
<< endl
|
|
|
|
<< _("Example for french:") << endl
|
|
|
|
<< "A 1 9 1 0" << endl
|
|
|
|
<< "[...]" << endl
|
|
|
|
<< "Z 10 1 0 1" << endl
|
|
|
|
<< "? 0 2 1 1" << endl
|
|
|
|
<< endl
|
|
|
|
<< _("Example for catalan:") << endl
|
|
|
|
<< "A 1 12 1 0" << endl
|
|
|
|
<< "[...]" << endl
|
|
|
|
// TRANSLATORS: the first "L.L" must be translated "L·L",
|
|
|
|
// and the last one translated "ĿL"
|
|
|
|
<< _("W 10 1 0 1 L.L L.L L-L L.L") << endl
|
|
|
|
<< "X 10 1 0 1" << endl
|
|
|
|
<< "Y 10 1 0 1 NY" << endl
|
|
|
|
<< "[...]" << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
#if HAVE_SETLOCALE
|
|
|
|
// Set locale via LC_ALL
|
|
|
|
setlocale(LC_ALL, "");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if ENABLE_NLS
|
|
|
|
// Set the message domain
|
|
|
|
#ifdef WIN32
|
|
|
|
// Get the absolute path, as returned by GetFullPathName()
|
|
|
|
char baseDir[MAX_PATH];
|
|
|
|
GetFullPathName(argv[0], MAX_PATH, baseDir, NULL);
|
|
|
|
char *pos = strrchr(baseDir, L'\\');
|
|
|
|
if (pos)
|
|
|
|
*pos = '\0';
|
|
|
|
const string localeDir = baseDir + string("\\locale");
|
|
|
|
#else
|
|
|
|
static const string localeDir = LOCALEDIR;
|
|
|
|
#endif
|
|
|
|
bindtextdomain(PACKAGE, localeDir.c_str());
|
|
|
|
textdomain(PACKAGE);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static const struct option long_options[] =
|
|
|
|
{
|
|
|
|
{"help", no_argument, NULL, 'h'},
|
|
|
|
{"dicname", required_argument, NULL, 'd'},
|
|
|
|
{"letters", required_argument, NULL, 'l'},
|
|
|
|
{"input", required_argument, NULL, 'i'},
|
|
|
|
{"output", required_argument, NULL, 'o'},
|
|
|
|
{0, 0, 0, 0}
|
|
|
|
};
|
|
|
|
static const char short_options[] = "hd:l:i:o:";
|
|
|
|
|
|
|
|
bool found_d = false;
|
|
|
|
bool found_l = false;
|
|
|
|
bool found_i = false;
|
|
|
|
bool found_o = false;
|
|
|
|
string dicName;
|
|
|
|
string inFileName;
|
|
|
|
string outFileName;
|
|
|
|
CompDic builder;
|
|
|
|
|
|
|
|
int res;
|
|
|
|
int option_index = 1;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
while ((res = getopt_long(argc, argv, short_options,
|
|
|
|
long_options, &option_index)) != -1)
|
|
|
|
{
|
|
|
|
switch (res)
|
|
|
|
{
|
|
|
|
case 'h':
|
|
|
|
printUsage(argv[0]);
|
|
|
|
exit(0);
|
|
|
|
case 'd':
|
|
|
|
found_d = true;
|
|
|
|
dicName = optarg;
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
found_l = true;
|
|
|
|
readLetters(optarg, builder);
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
found_i = true;
|
|
|
|
inFileName = optarg;
|
|
|
|
break;
|
|
|
|
case 'o':
|
|
|
|
found_o = true;
|
|
|
|
outFileName = optarg;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check mandatory options
|
|
|
|
if (!found_d || !found_l || !found_i || !found_o)
|
|
|
|
{
|
|
|
|
cerr << _("A mandatory option is missing") << endl;
|
|
|
|
printUsage(argv[0]);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the dictionary
|
|
|
|
const Header &header =
|
|
|
|
builder.generateDawg(inFileName, outFileName, dicName);
|
|
|
|
|
|
|
|
// Print the header
|
|
|
|
header.print();
|
|
|
|
|
|
|
|
cout << fmt(_(" Load time: %1% s")) % builder.getLoadTime() << endl;
|
|
|
|
cout << fmt(_(" Compression time: %1% s")) % builder.getBuildTime() << endl;
|
|
|
|
#ifdef CHECK_RECURSION
|
|
|
|
cout << fmt(_(" Maximum recursion level reached: %1%")) % builder.getMaxRecursion() << endl;
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
}
|
2012-10-12 23:16:23 +02:00
|
|
|
catch (const BaseException &e)
|
|
|
|
{
|
|
|
|
cerr << "Exception caught: " << e.what() << "\n" << e.getStackTrace();
|
|
|
|
}
|
2010-05-15 14:14:53 +02:00
|
|
|
catch (std::exception &e)
|
|
|
|
{
|
2012-10-12 23:16:23 +02:00
|
|
|
cerr << "Exception caught: " << e.what() << endl;
|
2010-05-15 14:14:53 +02:00
|
|
|
}
|
2012-10-12 23:16:23 +02:00
|
|
|
return 1;
|
2010-05-15 14:14:53 +02:00
|
|
|
}
|
|
|
|
|