eliot/dic/header.cpp

/*****************************************************************************
 * Eliot
 * Copyright (C) 2007 Olivier Teulière
 * Authors: Olivier Teulière <ipkiss @@ gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *****************************************************************************/

#include "config.h"

#include <string>
#include <sstream>
#include <iostream>

// For ntohl & Co.
#ifdef WIN32
#   include <winsock2.h>
#else
#    if HAVE_NETINET_IN_H
#       include <netinet/in.h>
#    endif
#    if HAVE_ARPA_INET_H
#       include <arpa/inet.h>
#    endif
#endif

#if ENABLE_NLS
#   include <libintl.h>
#   define _(String) gettext(String)
#else
#   define _(String) String
#endif

#include "header.h"
#include "encoding.h"
#include "dic_exception.h"


// Note: swap4 is duplicated in dic.cpp
#if defined(WORDS_BIGENDIAN)
static uint32_t swap4(uint32_t v)
{
    uint32_t r;
    uint8_t *pv = (uint8_t*)&v;
    uint8_t *pr = (uint8_t*)&r;

    pr[0] = pv[3];
    pr[1] = pv[2];
    pr[2] = pv[1];
    pr[3] = pv[0];

    return r;
}

// Nothing to do on big-endian machines
#   define ntohll(x) (x)
#   define htonll(x) (x)
#else
static inline uint64_t htonll(uint64_t host64)
{
    return (((uint64_t)htonl((host64 << 32) >> 32)) << 32) | htonl(host64 >> 32);
}

static inline uint64_t ntohll(uint64_t net64)
{
    return htonll(net64);
}
#endif


/**
 * Keyword included in dictionary headers
 * Implies little endian storage on words
 */
#define _COMPIL_KEYWORD_ "_COMPILED_DICTIONARY_"

/** Old format of the header (i.e. version 0) */
struct Dict_header_old
{
    /// Identification string
    char ident[sizeof(_COMPIL_KEYWORD_)];
    /// Version of the serialization format
    uint8_t version;
    /// Unused at the moment, reserved for further use
    char unused;
    uint32_t root;
    uint32_t nwords;
    /// Information about the compression
    //@{
    uint32_t edgesused;
    uint32_t nodesused;
    uint32_t nodessaved;
    uint32_t edgessaved;
    //@}
};

// Do not change these values, as they impact the size of the structure!
// Note: they are chosen carefully to avoid alignment issues
#define _MAX_USER_HOST_ 32
#define _MAX_DIC_NAME_SIZE_ 30
#define _MAX_LETTERS_NB_ 63
#define _MAX_LETTERS_SIZE_ 80

/** Extension of the old format (used in version 1)*/
struct Dict_header_ext
{
    // Time when the dictionary was compressed
    // (number of seconds since the Epoch)
    uint64_t compressDate;
    // Build information
    char userHost[_MAX_USER_HOST_];
    // Size taken by the build information
    uint32_t userHostSize;

    // --- we have a multiple of 64 bytes here

    // Compression algorithm (1 = DAWG, 2 = GADDAG)
    uint8_t algorithm;
    // Variant used in the rules (XXX: currently unused)
    uint8_t variant;

    // --- we have a multiple of 64 bytes here

    // Dictionary official name and version (e.g.: ODS 5.0)
    char dicName[_MAX_DIC_NAME_SIZE_];
    // Size taken by the dictionary name
    uint32_t dicNameSize;

    // --- we have a multiple of 64 bytes here

    // Letters used in the dictionary
    // We should have: nbLetters <= lettersSize <= _MAX_LETTERS_SIZE_
    // and:            nbLetters <= _MAX_LETTERS_NB_
    // The letters themselves, in UTF-8
    char letters[_MAX_LETTERS_SIZE_];
    // Size taken by the letters
    uint32_t lettersSize;
    // Number of letters (XXX: in theory useless, but allows a sanity check)
    uint32_t nbLetters;

    // --- we have a multiple of 64 bytes here

    // Points of the letters (indexed by their code)
    // The "+ 1" is there for struct alignment
    uint8_t points[_MAX_LETTERS_NB_ + 1];
    // Frequency of the letters (indexed by their code)
    // The "+ 1" is there for struct alignment
    uint8_t frequency[_MAX_LETTERS_NB_ + 1];
    // Bitfield indicating whether letters are vowels
    uint64_t vowels;
    // Bitfield indicating whether letters are consonants
    uint64_t consonants;

    // --- we have a multiple of 64 bytes here
};


Header::Header(istream &iStream)
    : m_root(0), m_nbWords(0), m_nodesUsed(0), m_edgesUsed(0),
      m_nodesSaved(0), m_edgesSaved(0), m_type(kDAWG)
{
    // Simply delegate to the read() method
    // The code is not moved here because I find it more natural to have a
    // read() method symmetrical to the write() one
    read(iStream);
    buildMapCodeFromChar();
}


Header::Header(const DictHeaderInfo &iInfo)
{
    // Use the latest serialization format
    m_version = 1;

    // Sanity checks
    if (iInfo.letters.size() > _MAX_LETTERS_NB_)
    {
        ostringstream ss;
        ss << _MAX_LETTERS_NB_;
        throw DicException("Header::Header: Too many different letters for "
                           "the current format; only " + ss.str() +
                           " are supported");
    }
    if (iInfo.points.size() != iInfo.letters.size())
    {
        throw DicException("Header::Header: Different sizes for "
                           "iInfo.points and iInfo.letters");
    }
    if (iInfo.frequency.size() != iInfo.letters.size())
    {
        throw DicException("Header::Header: Different sizes for "
                           "iInfo.frequency and iInfo.letters");
    }
    if (iInfo.vowels.size() != iInfo.letters.size())
    {
        throw DicException("Header::Header: Different sizes for "
                           "iInfo.vowels and iInfo.letters");
    }
    if (iInfo.consonants.size() != iInfo.letters.size())
    {
        throw DicException("Header::Header: Different sizes for "
                           "iInfo.consonants and iInfo.letters");
    }

    m_compressDate = time(NULL);
    m_userHost = convertToWc(ELIOT_COMPILE_BY + string("@") + ELIOT_COMPILE_HOST);
    m_root = iInfo.root;
    m_nbWords = iInfo.nwords;
    m_nodesUsed = iInfo.nodesused;
    m_edgesUsed = iInfo.edgesused;
    m_nodesSaved = iInfo.nodessaved;
    m_edgesSaved = iInfo.edgessaved;
    m_type = iInfo.dawg ? kDAWG : kGADDAG;
    m_dicName = iInfo.dicName;
    m_letters = iInfo.letters;
    m_points = iInfo.points;
    m_frequency = iInfo.frequency;
    m_vowels = iInfo.vowels;
    m_consonants = iInfo.consonants;

    buildMapCodeFromChar();
}


void Header::buildMapCodeFromChar()
{
    for (unsigned int i = 0; i < m_letters.size(); ++i)
    {
        // We don't differentiate uppercase and lowercase letters
        m_mapCodeFromChar[towlower(m_letters[i])] = i + 1;
        m_mapCodeFromChar[towupper(m_letters[i])] = i + 1;
    }
}


wchar_t Header::getCharFromCode(unsigned int iCode) const
{
    // Safety check
    if (iCode == 0 || iCode > m_letters.size())
    {
        ostringstream ss;
        ss << iCode;
        throw DicException("Header::getCharFromCode: no letter for code " + ss.str());
    }
    return m_letters[iCode - 1];
}


unsigned int Header::getCodeFromChar(wchar_t iChar) const
{
    map<wchar_t, unsigned int>::const_iterator pair =
        m_mapCodeFromChar.find(iChar);
    if (pair == m_mapCodeFromChar.end())
    {
        throw DicException("Header::getCodeFromChar: No code for letter " +
                           convertToMb(iChar));
    }
    return pair->second;
}


void Header::read(istream &iStream)
{
    Dict_header_old aHeader;
    iStream.read((char*)&aHeader, sizeof(Dict_header_old));
    if (iStream.gcount() != sizeof(Dict_header_old))
        throw DicException("Header::read: expected to read more bytes");

    // Check the identification string
    if (string(aHeader.ident) != _COMPIL_KEYWORD_)
        throw DicException("Header::read: incorrect header keyword; is it a dictionary file?");

    m_version = aHeader.version;

    // Handle endianness
    if (m_version == 0)
    {
#if defined(WORDS_BIGENDIAN)
        aHeader.root       = swap4(aHeader.root);
        aHeader.nwords     = swap4(aHeader.nwords);
        aHeader.nodesused  = swap4(aHeader.nodesused);
        aHeader.edgesused  = swap4(aHeader.edgesused);
        aHeader.nodessaved = swap4(aHeader.nodessaved);
        aHeader.edgessaved = swap4(aHeader.edgessaved);
#endif
        m_root = aHeader.root;
        m_nbWords = aHeader.nwords;
        m_nodesUsed = aHeader.nodesused;
        m_edgesUsed = aHeader.edgesused;
        m_nodesSaved = aHeader.nodessaved;
        m_edgesSaved = aHeader.edgessaved;
    }
    else
    {
        m_root = ntohl(aHeader.root);
        m_nbWords = ntohl(aHeader.nwords);
        m_nodesUsed = ntohl(aHeader.nodesused);
        m_edgesUsed = ntohl(aHeader.edgesused);
        m_nodesSaved = ntohl(aHeader.nodessaved);
        m_edgesSaved = ntohl(aHeader.edgessaved);
    }

    if (m_version == 0)
    {
        m_compressDate = 0;
        m_userHost = convertToWc(_("Unknown (old format)"));
        m_dicName = convertToWc(_("Unknown (old format)"));

        // In version 0, the letters, points, frequency,
        // vowels and consonants were hard-coded...
        m_letters = convertToWc("ABCDEFGHIJKLMNOPQRSTUVWXYZ?");

        static const uint8_t Frenchpoints[] =
        {
            // A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z ?
               1,3,3,2, 1,4,2,4,1,8,10,1,2,1,1,3,8,1,1,1,1,4,10,10,10,10,0
        };

        static const uint8_t FrenchFrequency[] =
        {
            // A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z ?
               9,2,2,3,15,2,2,2,8,1, 1,5,3,6,6,2,1,6,6,6,6,2, 1, 1, 1, 1,2
        };

        // The jokers and the 'Y' can be considered both as vowels or consonants
        static const uint8_t FrenchVowels[] =
        {
            // A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z ?
               1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0,1
        };

        static const uint8_t FrenchConsonants[] =
        {
            // A B C D  E F G H I J  K L M N O P Q R S T U V  W  X  Y  Z ?
               0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1,1
        };

        for (unsigned int i = 0; i < m_letters.size(); ++i)
        {
            m_points.push_back(Frenchpoints[i]);
            m_frequency.push_back(FrenchFrequency[i]);
            m_vowels.push_back(FrenchVowels[i]);
            m_consonants.push_back(FrenchConsonants[i]);
        }
    }
    else
    {
        // This header doesn't use the old serialization format, so read the
        // extension as well
        Dict_header_ext aHeaderExt;
        iStream.read((char*)&aHeaderExt, sizeof(Dict_header_ext));
        if (iStream.gcount() != sizeof(Dict_header_ext))
            throw DicException("Header::read: expected to read more bytes");

        // Handle endianness in the extension
        aHeaderExt.compressDate = ntohll(aHeaderExt.compressDate);
        aHeaderExt.userHostSize = ntohl(aHeaderExt.userHostSize);
        aHeaderExt.dicNameSize = ntohl(aHeaderExt.dicNameSize);
        aHeaderExt.lettersSize = ntohl(aHeaderExt.lettersSize);
        aHeaderExt.nbLetters = ntohl(aHeaderExt.nbLetters);
        aHeaderExt.vowels = ntohll(aHeaderExt.vowels);
        aHeaderExt.consonants = ntohll(aHeaderExt.consonants);

        m_compressDate = aHeaderExt.compressDate;

        if (aHeaderExt.algorithm == kDAWG)
            m_type = kDAWG;
        else if (aHeaderExt.algorithm == kGADDAG)
            m_type = kGADDAG;
        else
            throw DicException("Header::read: unrecognized algorithm type");

        m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize,
                                  "user and host information");

        // Convert the dictionary letters from UTF-8 to wchar_t*
        m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize,
                                 "dictionary name");

        // Convert the dictionary letters from UTF-8 to wchar_t*
        m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize,
                                 "dictionary letters");
        // Safety check: correct number of letters?
        if (m_letters.size() != aHeaderExt.nbLetters)
        {
            throw DicException("Header::read: inconsistent header");
        }

        // Letters points and frequency
        for (unsigned int i = 0; i < m_letters.size(); ++i)
        {
            m_points.push_back(aHeaderExt.points[i]);
            m_frequency.push_back(aHeaderExt.frequency[i]);
        }

        // Vowels and consonants
        for (unsigned int i = 0; i < m_letters.size(); ++i)
        {
            m_vowels.push_back(aHeaderExt.vowels & (1 << i));
            m_consonants.push_back(aHeaderExt.consonants & (1 << i));
        }

    }
}


void Header::write(ostream &oStream) const
{
    Dict_header_old aHeader;
    strcpy(aHeader.ident, _COMPIL_KEYWORD_);
    aHeader.version = m_version;
    aHeader.unused = 0;
    aHeader.root = htonl(m_root);
    aHeader.nwords = htonl(m_nbWords);
    aHeader.nodesused = htonl(m_nodesUsed);
    aHeader.edgesused = htonl(m_edgesUsed);
    aHeader.nodessaved = htonl(m_nodesSaved);
    aHeader.edgessaved = htonl(m_edgesSaved);

    oStream.write((char*)&aHeader, sizeof(Dict_header_old));
    if (!oStream.good())
        throw DicException("Header::write: error when writing to file");

    if (m_version != 0)
    {
        Dict_header_ext aHeaderExt;
        aHeaderExt.compressDate = m_compressDate;
        aHeaderExt.userHostSize =
            writeInUTF8(m_userHost, aHeaderExt.userHost,
                        _MAX_USER_HOST_, "user and host information");
        aHeaderExt.algorithm = m_type;

        // Convert the dictionary name to UTF-8
        aHeaderExt.dicNameSize =
            writeInUTF8(m_dicName, aHeaderExt.dicName,
                        _MAX_DIC_NAME_SIZE_, "dictionary name");

        // Convert the dictionary letters to UTF-8
        aHeaderExt.lettersSize =
            writeInUTF8(m_letters, aHeaderExt.letters,
                        _MAX_LETTERS_SIZE_, "dictionary letters");
        aHeaderExt.nbLetters = (uint32_t)m_letters.size();

        // Letters points and frequency
        for (unsigned int i = 0; i < m_letters.size(); ++i)
        {
            aHeaderExt.points[i] = m_points[i];
            aHeaderExt.frequency[i] = m_frequency[i];
        }

        // Vowels and consonants
        aHeaderExt.vowels = 0;
        aHeaderExt.consonants = 0;
        for (unsigned int i = 0; i < m_letters.size(); ++i)
        {
            if (m_vowels[i])
                aHeaderExt.vowels |= 1 << i;
            if (m_consonants[i])
                aHeaderExt.consonants |= 1 << i;
        }

        // Handle endianness in the extension
        aHeaderExt.userHostSize = htonl(aHeaderExt.userHostSize);
        aHeaderExt.compressDate = htonll(aHeaderExt.compressDate);
        aHeaderExt.dicNameSize = htonl(aHeaderExt.dicNameSize);
        aHeaderExt.lettersSize = htonl(aHeaderExt.lettersSize);
        aHeaderExt.nbLetters = htonl(aHeaderExt.nbLetters);
        aHeaderExt.vowels = htonll(aHeaderExt.vowels);
        aHeaderExt.consonants = htonll(aHeaderExt.consonants);

        // Write the extension
        oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext));
        if (!oStream.good())
            throw DicException("Header::write: error when writing to file");
    }
}


void Header::print() const
{
    printf(_("dictionary name: %s\n"), convertToMb(m_dicName).c_str());
    if (m_version)
    {
        char buf[50];
        strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate));
        printf(_("compressed on: %s\n"), buf);
    }
    else
    {
        printf(_("compressed on: Unknown date (old format)\n"));
    }
    printf(_("compressed using a binary compiled by: %s\n"), convertToMb(m_userHost).c_str());
    printf(_("dictionary type: %s\n"), m_type == kDAWG ? "DAWG" : "GADDAG");
    printf(_("letters: %s\n"), convertToMb(m_letters).c_str());
    printf(_("number of letters: %lu\n"), (long unsigned int)m_letters.size());
    printf(_("number of words: %d\n"), m_nbWords);
    long unsigned int size =
        sizeof(Dict_header_old) + (m_version ? sizeof(Dict_header_ext) : 0);
    printf(_("header size: %lu bytes\n"), size);
    printf(_("root: %d (edge)\n"), m_root);
    printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved);
    printf(_("edges: %d used + %d saved\n"), m_edgesUsed, m_edgesSaved);
    printf("===============================================\n");
    printf(_("letter | points | frequency | vowel | consonant\n"));
    printf("-------+--------+-----------+-------+----------\n");
    for (unsigned int i = 0; i < m_letters.size(); ++i)
    {
        printf("  %s   |   %2d   |    %2d     |   %d   |    %d\n",
               padAndConvert(wstring(1, m_letters[i]), 2).c_str(),
               m_points[i], m_frequency[i], m_vowels[i], m_consonants[i]);
    }
    printf("===============================================\n");
}
Merged the "cppdic" branch back into HEAD. There are too many change to list properly, here is an overview of the main changes: - the dictionary is now in C++ - the dictionary has a new format, where it is possible to specify the letters, their points, their frequency, ... It is backwards compatible. - Eliot now supports non-ASCII characters everywhere - i18n of the compdic, listdic, regexpmain binaries - i18n of the wxWidgets interface (now in english by default) 2008-01-08 14:52:32 +01:00			`/*****************************************************************************`
			`* Eliot`
			`* Copyright (C) 2007 Olivier Teulière`
			`* Authors: Olivier Teulière <ipkiss @@ gmail.com>`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA`
			`*****************************************************************************/`

			`#include "config.h"`

			`#include <string>`
			`#include <sstream>`
			`#include <iostream>`

			`// For ntohl & Co.`
			`#ifdef WIN32`
			`# include <winsock2.h>`
			`#else`
			`# if HAVE_NETINET_IN_H`
			`# include <netinet/in.h>`
			`# endif`
			`# if HAVE_ARPA_INET_H`
			`# include <arpa/inet.h>`
			`# endif`
			`#endif`

			`#if ENABLE_NLS`
			`# include <libintl.h>`
			`# define _(String) gettext(String)`
			`#else`
			`# define _(String) String`
			`#endif`

			`#include "header.h"`
			`#include "encoding.h"`
			`#include "dic_exception.h"`


			`// Note: swap4 is duplicated in dic.cpp`
			`#if defined(WORDS_BIGENDIAN)`
			`static uint32_t swap4(uint32_t v)`
			`{`
			`uint32_t r;`
			`uint8_t pv = (uint8_t)&v;`
			`uint8_t pr = (uint8_t)&r;`

			`pr[0] = pv[3];`
			`pr[1] = pv[2];`
			`pr[2] = pv[1];`
			`pr[3] = pv[0];`

			`return r;`
			`}`

			`// Nothing to do on big-endian machines`
			`# define ntohll(x) (x)`
			`# define htonll(x) (x)`
			`#else`
			`static inline uint64_t htonll(uint64_t host64)`
			`{`
			`return (((uint64_t)htonl((host64 << 32) >> 32)) << 32) \| htonl(host64 >> 32);`
			`}`

			`static inline uint64_t ntohll(uint64_t net64)`
			`{`
			`return htonll(net64);`
			`}`
			`#endif`


			`/**`
			`* Keyword included in dictionary headers`
			`* Implies little endian storage on words`
			`*/`
			`#define _COMPIL_KEYWORD_ "_COMPILED_DICTIONARY_"`

			`/** Old format of the header (i.e. version 0) */`
			`struct Dict_header_old`
			`{`
			`/// Identification string`
			`char ident[sizeof(_COMPIL_KEYWORD_)];`
			`/// Version of the serialization format`
			`uint8_t version;`
			`/// Unused at the moment, reserved for further use`
			`char unused;`
			`uint32_t root;`
			`uint32_t nwords;`
			`/// Information about the compression`
			`//@{`
			`uint32_t edgesused;`
			`uint32_t nodesused;`
			`uint32_t nodessaved;`
			`uint32_t edgessaved;`
			`//@}`
			`};`

			`// Do not change these values, as they impact the size of the structure!`
			`// Note: they are chosen carefully to avoid alignment issues`
			`#define _MAX_USER_HOST_ 32`
			`#define _MAX_DIC_NAME_SIZE_ 30`
			`#define _MAX_LETTERS_NB_ 63`
			`#define _MAX_LETTERS_SIZE_ 80`

			`/** Extension of the old format (used in version 1)*/`
			`struct Dict_header_ext`
			`{`
			`// Time when the dictionary was compressed`
			`// (number of seconds since the Epoch)`
			`uint64_t compressDate;`
			`// Build information`
			`char userHost[_MAX_USER_HOST_];`
			`// Size taken by the build information`
			`uint32_t userHostSize;`

			`// --- we have a multiple of 64 bytes here`

			`// Compression algorithm (1 = DAWG, 2 = GADDAG)`
			`uint8_t algorithm;`
			`// Variant used in the rules (XXX: currently unused)`
			`uint8_t variant;`

			`// --- we have a multiple of 64 bytes here`

			`// Dictionary official name and version (e.g.: ODS 5.0)`
			`char dicName[_MAX_DIC_NAME_SIZE_];`
			`// Size taken by the dictionary name`
			`uint32_t dicNameSize;`

			`// --- we have a multiple of 64 bytes here`

			`// Letters used in the dictionary`
			`// We should have: nbLetters <= lettersSize <= _MAX_LETTERS_SIZE_`
			`// and: nbLetters <= _MAX_LETTERS_NB_`
			`// The letters themselves, in UTF-8`
			`char letters[_MAX_LETTERS_SIZE_];`
			`// Size taken by the letters`
			`uint32_t lettersSize;`
			`// Number of letters (XXX: in theory useless, but allows a sanity check)`
			`uint32_t nbLetters;`

			`// --- we have a multiple of 64 bytes here`

			`// Points of the letters (indexed by their code)`
			`// The "+ 1" is there for struct alignment`
			`uint8_t points[_MAX_LETTERS_NB_ + 1];`
			`// Frequency of the letters (indexed by their code)`
			`// The "+ 1" is there for struct alignment`
			`uint8_t frequency[_MAX_LETTERS_NB_ + 1];`
			`// Bitfield indicating whether letters are vowels`
			`uint64_t vowels;`
			`// Bitfield indicating whether letters are consonants`
			`uint64_t consonants;`

			`// --- we have a multiple of 64 bytes here`
			`};`


			`Header::Header(istream &iStream)`
			`: m_root(0), m_nbWords(0), m_nodesUsed(0), m_edgesUsed(0),`
			`m_nodesSaved(0), m_edgesSaved(0), m_type(kDAWG)`
			`{`
			`// Simply delegate to the read() method`
			`// The code is not moved here because I find it more natural to have a`
			`// read() method symmetrical to the write() one`
			`read(iStream);`
			`buildMapCodeFromChar();`
			`}`


			`Header::Header(const DictHeaderInfo &iInfo)`
			`{`
			`// Use the latest serialization format`
			`m_version = 1;`

			`// Sanity checks`
			`if (iInfo.letters.size() > _MAX_LETTERS_NB_)`
			`{`
			`ostringstream ss;`
			`ss << _MAX_LETTERS_NB_;`
			`throw DicException("Header::Header: Too many different letters for "`
			`"the current format; only " + ss.str() +`
			`" are supported");`
			`}`
			`if (iInfo.points.size() != iInfo.letters.size())`
			`{`
			`throw DicException("Header::Header: Different sizes for "`
			`"iInfo.points and iInfo.letters");`
			`}`
			`if (iInfo.frequency.size() != iInfo.letters.size())`
			`{`
			`throw DicException("Header::Header: Different sizes for "`
			`"iInfo.frequency and iInfo.letters");`
			`}`
			`if (iInfo.vowels.size() != iInfo.letters.size())`
			`{`
			`throw DicException("Header::Header: Different sizes for "`
			`"iInfo.vowels and iInfo.letters");`
			`}`
			`if (iInfo.consonants.size() != iInfo.letters.size())`
			`{`
			`throw DicException("Header::Header: Different sizes for "`
			`"iInfo.consonants and iInfo.letters");`
			`}`

			`m_compressDate = time(NULL);`
			`m_userHost = convertToWc(ELIOT_COMPILE_BY + string("@") + ELIOT_COMPILE_HOST);`
			`m_root = iInfo.root;`
			`m_nbWords = iInfo.nwords;`
			`m_nodesUsed = iInfo.nodesused;`
			`m_edgesUsed = iInfo.edgesused;`
			`m_nodesSaved = iInfo.nodessaved;`
			`m_edgesSaved = iInfo.edgessaved;`
			`m_type = iInfo.dawg ? kDAWG : kGADDAG;`
			`m_dicName = iInfo.dicName;`
			`m_letters = iInfo.letters;`
			`m_points = iInfo.points;`
			`m_frequency = iInfo.frequency;`
			`m_vowels = iInfo.vowels;`
			`m_consonants = iInfo.consonants;`

			`buildMapCodeFromChar();`
			`}`


			`void Header::buildMapCodeFromChar()`
			`{`
			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`// We don't differentiate uppercase and lowercase letters`
			`m_mapCodeFromChar[towlower(m_letters[i])] = i + 1;`
			`m_mapCodeFromChar[towupper(m_letters[i])] = i + 1;`
			`}`
			`}`


			`wchar_t Header::getCharFromCode(unsigned int iCode) const`
			`{`
			`// Safety check`
			`if (iCode == 0 \|\| iCode > m_letters.size())`
			`{`
			`ostringstream ss;`
			`ss << iCode;`
			`throw DicException("Header::getCharFromCode: no letter for code " + ss.str());`
			`}`
			`return m_letters[iCode - 1];`
			`}`


			`unsigned int Header::getCodeFromChar(wchar_t iChar) const`
			`{`
			`map<wchar_t, unsigned int>::const_iterator pair =`
			`m_mapCodeFromChar.find(iChar);`
			`if (pair == m_mapCodeFromChar.end())`
			`{`
			`throw DicException("Header::getCodeFromChar: No code for letter " +`
			`convertToMb(iChar));`
			`}`
			`return pair->second;`
			`}`


			`void Header::read(istream &iStream)`
			`{`
			`Dict_header_old aHeader;`
			`iStream.read((char*)&aHeader, sizeof(Dict_header_old));`
			`if (iStream.gcount() != sizeof(Dict_header_old))`
			`throw DicException("Header::read: expected to read more bytes");`

			`// Check the identification string`
			`if (string(aHeader.ident) != _COMPIL_KEYWORD_)`
			`throw DicException("Header::read: incorrect header keyword; is it a dictionary file?");`

			`m_version = aHeader.version;`

			`// Handle endianness`
			`if (m_version == 0)`
			`{`
			`#if defined(WORDS_BIGENDIAN)`
			`aHeader.root = swap4(aHeader.root);`
			`aHeader.nwords = swap4(aHeader.nwords);`
			`aHeader.nodesused = swap4(aHeader.nodesused);`
			`aHeader.edgesused = swap4(aHeader.edgesused);`
			`aHeader.nodessaved = swap4(aHeader.nodessaved);`
			`aHeader.edgessaved = swap4(aHeader.edgessaved);`
			`#endif`
			`m_root = aHeader.root;`
			`m_nbWords = aHeader.nwords;`
			`m_nodesUsed = aHeader.nodesused;`
			`m_edgesUsed = aHeader.edgesused;`
			`m_nodesSaved = aHeader.nodessaved;`
			`m_edgesSaved = aHeader.edgessaved;`
			`}`
			`else`
			`{`
			`m_root = ntohl(aHeader.root);`
			`m_nbWords = ntohl(aHeader.nwords);`
			`m_nodesUsed = ntohl(aHeader.nodesused);`
			`m_edgesUsed = ntohl(aHeader.edgesused);`
			`m_nodesSaved = ntohl(aHeader.nodessaved);`
			`m_edgesSaved = ntohl(aHeader.edgessaved);`
			`}`

			`if (m_version == 0)`
			`{`
			`m_compressDate = 0;`
			`m_userHost = convertToWc(_("Unknown (old format)"));`
			`m_dicName = convertToWc(_("Unknown (old format)"));`

			`// In version 0, the letters, points, frequency,`
			`// vowels and consonants were hard-coded...`
			`m_letters = convertToWc("ABCDEFGHIJKLMNOPQRSTUVWXYZ?");`

			`static const uint8_t Frenchpoints[] =`
			`{`
			`// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?`
			`1,3,3,2, 1,4,2,4,1,8,10,1,2,1,1,3,8,1,1,1,1,4,10,10,10,10,0`
			`};`

			`static const uint8_t FrenchFrequency[] =`
			`{`
			`// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?`
			`9,2,2,3,15,2,2,2,8,1, 1,5,3,6,6,2,1,6,6,6,6,2, 1, 1, 1, 1,2`
			`};`

			`// The jokers and the 'Y' can be considered both as vowels or consonants`
			`static const uint8_t FrenchVowels[] =`
			`{`
			`// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?`
			`1,0,0,0, 1,0,0,0,1,0, 0,0,0,0,1,0,0,0,0,0,1,0, 0, 0, 1, 0,1`
			`};`

			`static const uint8_t FrenchConsonants[] =`
			`{`
			`// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ?`
			`0,1,1,1, 0,1,1,1,0,1, 1,1,1,1,0,1,1,1,1,1,0,1, 1, 1, 1, 1,1`
			`};`

			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`m_points.push_back(Frenchpoints[i]);`
			`m_frequency.push_back(FrenchFrequency[i]);`
			`m_vowels.push_back(FrenchVowels[i]);`
			`m_consonants.push_back(FrenchConsonants[i]);`
			`}`
			`}`
			`else`
			`{`
			`// This header doesn't use the old serialization format, so read the`
			`// extension as well`
			`Dict_header_ext aHeaderExt;`
			`iStream.read((char*)&aHeaderExt, sizeof(Dict_header_ext));`
			`if (iStream.gcount() != sizeof(Dict_header_ext))`
			`throw DicException("Header::read: expected to read more bytes");`

			`// Handle endianness in the extension`
			`aHeaderExt.compressDate = ntohll(aHeaderExt.compressDate);`
			`aHeaderExt.userHostSize = ntohl(aHeaderExt.userHostSize);`
			`aHeaderExt.dicNameSize = ntohl(aHeaderExt.dicNameSize);`
			`aHeaderExt.lettersSize = ntohl(aHeaderExt.lettersSize);`
			`aHeaderExt.nbLetters = ntohl(aHeaderExt.nbLetters);`
			`aHeaderExt.vowels = ntohll(aHeaderExt.vowels);`
			`aHeaderExt.consonants = ntohll(aHeaderExt.consonants);`

			`m_compressDate = aHeaderExt.compressDate;`

			`if (aHeaderExt.algorithm == kDAWG)`
			`m_type = kDAWG;`
			`else if (aHeaderExt.algorithm == kGADDAG)`
			`m_type = kGADDAG;`
			`else`
			`throw DicException("Header::read: unrecognized algorithm type");`

			`m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize,`
			`"user and host information");`

			`// Convert the dictionary letters from UTF-8 to wchar_t*`
			`m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize,`
			`"dictionary name");`

			`// Convert the dictionary letters from UTF-8 to wchar_t*`
			`m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize,`
			`"dictionary letters");`
			`// Safety check: correct number of letters?`
			`if (m_letters.size() != aHeaderExt.nbLetters)`
			`{`
			`throw DicException("Header::read: inconsistent header");`
			`}`

			`// Letters points and frequency`
			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`m_points.push_back(aHeaderExt.points[i]);`
			`m_frequency.push_back(aHeaderExt.frequency[i]);`
			`}`

			`// Vowels and consonants`
			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`m_vowels.push_back(aHeaderExt.vowels & (1 << i));`
			`m_consonants.push_back(aHeaderExt.consonants & (1 << i));`
			`}`

			`}`
			`}`


			`void Header::write(ostream &oStream) const`
			`{`
			`Dict_header_old aHeader;`
			`strcpy(aHeader.ident, _COMPIL_KEYWORD_);`
			`aHeader.version = m_version;`
			`aHeader.unused = 0;`
			`aHeader.root = htonl(m_root);`
			`aHeader.nwords = htonl(m_nbWords);`
			`aHeader.nodesused = htonl(m_nodesUsed);`
			`aHeader.edgesused = htonl(m_edgesUsed);`
			`aHeader.nodessaved = htonl(m_nodesSaved);`
			`aHeader.edgessaved = htonl(m_edgesSaved);`

			`oStream.write((char*)&aHeader, sizeof(Dict_header_old));`
			`if (!oStream.good())`
			`throw DicException("Header::write: error when writing to file");`

			`if (m_version != 0)`
			`{`
			`Dict_header_ext aHeaderExt;`
			`aHeaderExt.compressDate = m_compressDate;`
			`aHeaderExt.userHostSize =`
			`writeInUTF8(m_userHost, aHeaderExt.userHost,`
			`_MAX_USER_HOST_, "user and host information");`
			`aHeaderExt.algorithm = m_type;`

			`// Convert the dictionary name to UTF-8`
			`aHeaderExt.dicNameSize =`
			`writeInUTF8(m_dicName, aHeaderExt.dicName,`
			`_MAX_DIC_NAME_SIZE_, "dictionary name");`

			`// Convert the dictionary letters to UTF-8`
			`aHeaderExt.lettersSize =`
			`writeInUTF8(m_letters, aHeaderExt.letters,`
			`_MAX_LETTERS_SIZE_, "dictionary letters");`
			`aHeaderExt.nbLetters = (uint32_t)m_letters.size();`

			`// Letters points and frequency`
			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`aHeaderExt.points[i] = m_points[i];`
			`aHeaderExt.frequency[i] = m_frequency[i];`
			`}`

			`// Vowels and consonants`
			`aHeaderExt.vowels = 0;`
			`aHeaderExt.consonants = 0;`
			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`if (m_vowels[i])`
			`aHeaderExt.vowels \|= 1 << i;`
			`if (m_consonants[i])`
			`aHeaderExt.consonants \|= 1 << i;`
			`}`

			`// Handle endianness in the extension`
			`aHeaderExt.userHostSize = htonl(aHeaderExt.userHostSize);`
			`aHeaderExt.compressDate = htonll(aHeaderExt.compressDate);`
			`aHeaderExt.dicNameSize = htonl(aHeaderExt.dicNameSize);`
			`aHeaderExt.lettersSize = htonl(aHeaderExt.lettersSize);`
			`aHeaderExt.nbLetters = htonl(aHeaderExt.nbLetters);`
			`aHeaderExt.vowels = htonll(aHeaderExt.vowels);`
			`aHeaderExt.consonants = htonll(aHeaderExt.consonants);`

			`// Write the extension`
			`oStream.write((char*)&aHeaderExt, sizeof(Dict_header_ext));`
			`if (!oStream.good())`
			`throw DicException("Header::write: error when writing to file");`
			`}`
			`}`


			`void Header::print() const`
			`{`
			`printf(_("dictionary name: %s\n"), convertToMb(m_dicName).c_str());`
			`if (m_version)`
			`{`
			`char buf[50];`
			`strftime(buf, sizeof(buf), "%c", gmtime(&m_compressDate));`
			`printf(_("compressed on: %s\n"), buf);`
			`}`
			`else`
			`{`
			`printf(_("compressed on: Unknown date (old format)\n"));`
			`}`
			`printf(_("compressed using a binary compiled by: %s\n"), convertToMb(m_userHost).c_str());`
			`printf(_("dictionary type: %s\n"), m_type == kDAWG ? "DAWG" : "GADDAG");`
			`printf(_("letters: %s\n"), convertToMb(m_letters).c_str());`
			`printf(_("number of letters: %lu\n"), (long unsigned int)m_letters.size());`
			`printf(_("number of words: %d\n"), m_nbWords);`
			`long unsigned int size =`
			`sizeof(Dict_header_old) + (m_version ? sizeof(Dict_header_ext) : 0);`
			`printf(_("header size: %lu bytes\n"), size);`
			`printf(_("root: %d (edge)\n"), m_root);`
			`printf(_("nodes: %d used + %d saved\n"), m_nodesUsed, m_nodesSaved);`
			`printf(_("edges: %d used + %d saved\n"), m_edgesUsed, m_edgesSaved);`
			`printf("===============================================\n");`
			`printf(_("letter \| points \| frequency \| vowel \| consonant\n"));`
			`printf("-------+--------+-----------+-------+----------\n");`
			`for (unsigned int i = 0; i < m_letters.size(); ++i)`
			`{`
			`printf(" %s \| %2d \| %2d \| %d \| %d\n",`
			`padAndConvert(wstring(1, m_letters[i]), 2).c_str(),`
			`m_points[i], m_frequency[i], m_vowels[i], m_consonants[i]);`
			`}`
			`printf("===============================================\n");`
			`}`