New utility class to parse/write CSV files

This commit is contained in:
Olivier Teulière 2012-05-31 09:07:23 +02:00
parent 6dbfcc5f99
commit 3800f54527
3 changed files with 238 additions and 0 deletions

View file

@ -31,6 +31,7 @@ libdic_a_SOURCES = \
tile.cpp tile.h \
dic.cpp dic.h \
dic_search.cpp \
csv_helper.cpp csv_helper.h \
encoding.cpp encoding.h \
stacktrace.cpp stacktrace.h \
automaton.cpp automaton.h \

172
dic/csv_helper.cpp Normal file
View file

@ -0,0 +1,172 @@
/*****************************************************************************
* Eliot
* Copyright (C) 2012 Olivier Teulière
* Authors: Olivier Teulière <ipkiss @@ gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include "csv_helper.h"
#include "debug.h"
#if ENABLE_NLS
# include <libintl.h>
# define _(String) gettext(String)
#else
# define _(String) String
#endif
using namespace std;
INIT_LOGGER(qt, CsvHelper);
vector<CsvHelper::DataRow> CsvHelper::readStream(istream &input)
{
vector<DataRow> data;
size_t minLength = (size_t) -1;
size_t maxLength = 0;
string line;
string currField;
// XXX: Using getline() prevents from parsing newline chars in fields.
// But this limitation is not very big, and it makes the code way simpler.
while (getline(input, line))
{
DataRow row;
// Parse the line
bool inQuotes = false;
size_t pos = 0;
while (pos < line.size())
{
char c = line[pos];
if (c == ',' && !inQuotes)
{
// Field separator
row.push_back(currField);
currField.clear();
}
else if (c == '"')
{
if (inQuotes)
{
// Is the next char also a quote?
if (pos + 1 < line.size() && line[pos + 1] == '"')
{
// Escaped quote
currField.push_back(c);
++pos;
}
else
{
// End of quotation
inQuotes = false;
}
}
else
{
if (currField.empty())
{
// Beginning of quotation
inQuotes = true;
}
else
{
// Normal quote in an unquoted field
currField.push_back(c);
}
}
}
else if (c != '\r' && c != '\n')
{
// Normal char
currField.push_back(c);
}
// Next char
++pos;
}
// Add the last field of the line
row.push_back(currField);
currField.clear();
data.push_back(row);
if (minLength > row.size())
minLength = row.size();
if (maxLength < row.size())
maxLength = row.size();
}
// Normalization (to be tolerant to faulty inputs)
if (minLength != maxLength)
{
boost::format fmt(_("Invalid CSV file (variable number of fields, from %1% to %2%)"));
throw CsvException((fmt % minLength % maxLength).str());
}
return data;
}
void CsvHelper::writeStream(ostream &output, const vector<DataRow> &iData)
{
if (iData.empty())
return;
// Make sure the rows have the same number of fields
size_t firstLength = iData.front().size();
BOOST_FOREACH(const DataRow &row, iData)
{
if (row.size() != firstLength)
throw CsvException(_("Invalid CSV data (variable number of fields)"));
}
BOOST_FOREACH(const DataRow &row, iData)
{
bool first = true;
BOOST_FOREACH(const string &field, row)
{
// Add the comma
if (first)
first = false;
else
output << ',';
// Needs quoting?
if (field.find_first_of(",\"\n") == string::npos)
output << field;
else
{
output << '"';
BOOST_FOREACH(char c, field)
{
output << c;
// Double the quote
if (c == '"')
output << '"';
}
output << '"';
}
}
// End of the row (CR + LF)
output << "\r\n";
}
}

65
dic/csv_helper.h Normal file
View file

@ -0,0 +1,65 @@
/*****************************************************************************
* Eliot
* Copyright (C) 2012 Olivier Teulière
* Authors: Olivier Teulière <ipkiss @@ gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#ifndef CSV_HELPER_H_
#define CSV_HELPER_H_
#include <string>
#include <vector>
#include <iosfwd>
#include "logging.h"
#include "base_exception.h"
using std::string;
using std::vector;
using std::istream;
using std::ostream;
/**
* Utility class to handle CSV (Comma Separated Value) files.
* The CSV format respects the guidelines summary listed here (RFC 4180):
* https://en.wikipedia.org/wiki/Comma-separated_values#Toward_standardization
* except for the newlines in fields which are not supported when parsing.
*/
class CsvHelper
{
DEFINE_LOGGER();
public:
typedef vector<string> DataRow;
static vector<DataRow> readStream(istream &input);
static void writeStream(ostream &output, const vector<DataRow> &iData);
};
class CsvException: public BaseException
{
public:
CsvException(const std::string &iMessage)
: BaseException(iMessage) {}
};
#endif