Detect and ignore the BOM in the letters file

This commit is contained in:
Olivier Teulière 2011-07-16 21:13:42 +00:00
parent 23771f3f71
commit 1e543111e9
2 changed files with 11 additions and 0 deletions

View file

@ -65,6 +65,8 @@ public:
bool isVowel, bool isConsonant,
const vector<wstring> &iInputs);
unsigned getLettersCount() const { return m_headerInfo.letters.size(); }
/**
* Generate the dictionary. You must have called addLetter() before
* (once for each letter of the word list, and possible once for the

View file

@ -70,6 +70,15 @@ void readLetters(const string &iFileName, CompDic &ioBuilder)
if (line == "" || line == "\r" || line == "\n")
continue;
// If there is a BOM in the file, remove it from the first line
if (ioBuilder.getLettersCount() == 0 && line.size() >= 3 &&
(uint8_t)line[0] == 0xEF &&
(uint8_t)line[1] == 0xBB &&
(uint8_t)line[2] == 0xBF)
{
line = line.substr(3);
}
// Convert the line to a wstring
const wstring &wline =
readFromUTF8(line.c_str(), line.size(), "readLetters (1)");