/***************************************************************************** * Eliot * Copyright (C) 1999-2007 Antoine Fraboulet & Olivier Teulière * Authors: Antoine Fraboulet * Olivier Teulière * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *****************************************************************************/ #include "config.h" #include #include #include #include #include #include #include #include #if ENABLE_NLS # include # define _(String) gettext(String) #else # define _(String) String #endif #ifdef WIN32 # include #endif #include "compdic.h" #include "dic_exception.h" #include "encoding.h" #include "header.h" using namespace std; // Useful shortcut #define fmt(a) boost::format(a) void readLetters(const string &iFileName, CompDic &ioBuilder) { ifstream in(iFileName.c_str()); if (!in.is_open()) throw DicException((fmt(_("Could not open file '%1%'")) % iFileName).str()); // Use a more friendly type name typedef boost::tokenizer, std::wstring::const_iterator, std::wstring> Tokenizer; int lineNb = 1; string line; while (getline(in, line)) { // Ignore empty lines if (line == "" || line == "\r" || line == "\n") continue; // Convert the line to a wstring const wstring &wline = readFromUTF8(line.c_str(), line.size(), "readLetters (1)"); // Split the lines on space characters boost::char_separator sep(L" "); Tokenizer tok(wline, sep); Tokenizer::iterator it; vector tokens(tok.begin(), tok.end()); // We expect at least 5 fields on the line if (tokens.size() < 5) { ostringstream ss; ss << fmt(_("readLetters: Not enough fields " "in %1% (line %2%)")) % iFileName % lineNb; throw DicException(ss.str()); } // The first field is a single character wstring letter = tokens[0]; if (letter.size() != 1) { ostringstream ss; ss << fmt(_("readLetters: Invalid letter at line %1% " "(only one character allowed)")) % lineNb; throw DicException(ss.str()); } vector inputs; if (tokens.size() > 5) { inputs = vector(tokens.begin() + 5, tokens.end()); } ioBuilder.addLetter(letter[0], wtoi(tokens[1].c_str()), wtoi(tokens[2].c_str()), wtoi(tokens[3].c_str()), wtoi(tokens[4].c_str()), inputs); ++lineNb; } } void printUsage(const string &iBinaryName) { cout << "Usage: " << iBinaryName << " [options]" << endl << _("Mandatory options:") << endl << _(" -d, --dicname Set the dictionary name and version") << endl << _(" -l, --letters Path to the file containing the letters (see below)") << endl << _(" -i, --input Path to the uncompressed dictionary file (encoded in UTF-8)") << endl << _(" The words must be in alphabetical order, without duplicates") << endl << _(" -o, --output