mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2024-12-27 09:58:08 +01:00
New utility class to parse/write CSV files
This commit is contained in:
parent
6dbfcc5f99
commit
3800f54527
3 changed files with 238 additions and 0 deletions
|
@ -31,6 +31,7 @@ libdic_a_SOURCES = \
|
||||||
tile.cpp tile.h \
|
tile.cpp tile.h \
|
||||||
dic.cpp dic.h \
|
dic.cpp dic.h \
|
||||||
dic_search.cpp \
|
dic_search.cpp \
|
||||||
|
csv_helper.cpp csv_helper.h \
|
||||||
encoding.cpp encoding.h \
|
encoding.cpp encoding.h \
|
||||||
stacktrace.cpp stacktrace.h \
|
stacktrace.cpp stacktrace.h \
|
||||||
automaton.cpp automaton.h \
|
automaton.cpp automaton.h \
|
||||||
|
|
172
dic/csv_helper.cpp
Normal file
172
dic/csv_helper.cpp
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* Eliot
|
||||||
|
* Copyright (C) 2012 Olivier Teulière
|
||||||
|
* Authors: Olivier Teulière <ipkiss @@ gmail.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
#include <boost/format.hpp>
|
||||||
|
|
||||||
|
#include "csv_helper.h"
|
||||||
|
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
#if ENABLE_NLS
|
||||||
|
# include <libintl.h>
|
||||||
|
# define _(String) gettext(String)
|
||||||
|
#else
|
||||||
|
# define _(String) String
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
INIT_LOGGER(qt, CsvHelper);
|
||||||
|
|
||||||
|
|
||||||
|
vector<CsvHelper::DataRow> CsvHelper::readStream(istream &input)
|
||||||
|
{
|
||||||
|
vector<DataRow> data;
|
||||||
|
size_t minLength = (size_t) -1;
|
||||||
|
size_t maxLength = 0;
|
||||||
|
string line;
|
||||||
|
string currField;
|
||||||
|
// XXX: Using getline() prevents from parsing newline chars in fields.
|
||||||
|
// But this limitation is not very big, and it makes the code way simpler.
|
||||||
|
while (getline(input, line))
|
||||||
|
{
|
||||||
|
DataRow row;
|
||||||
|
// Parse the line
|
||||||
|
bool inQuotes = false;
|
||||||
|
size_t pos = 0;
|
||||||
|
while (pos < line.size())
|
||||||
|
{
|
||||||
|
char c = line[pos];
|
||||||
|
if (c == ',' && !inQuotes)
|
||||||
|
{
|
||||||
|
// Field separator
|
||||||
|
row.push_back(currField);
|
||||||
|
currField.clear();
|
||||||
|
}
|
||||||
|
else if (c == '"')
|
||||||
|
{
|
||||||
|
if (inQuotes)
|
||||||
|
{
|
||||||
|
// Is the next char also a quote?
|
||||||
|
if (pos + 1 < line.size() && line[pos + 1] == '"')
|
||||||
|
{
|
||||||
|
// Escaped quote
|
||||||
|
currField.push_back(c);
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// End of quotation
|
||||||
|
inQuotes = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (currField.empty())
|
||||||
|
{
|
||||||
|
// Beginning of quotation
|
||||||
|
inQuotes = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Normal quote in an unquoted field
|
||||||
|
currField.push_back(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (c != '\r' && c != '\n')
|
||||||
|
{
|
||||||
|
// Normal char
|
||||||
|
currField.push_back(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next char
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the last field of the line
|
||||||
|
row.push_back(currField);
|
||||||
|
currField.clear();
|
||||||
|
|
||||||
|
data.push_back(row);
|
||||||
|
|
||||||
|
if (minLength > row.size())
|
||||||
|
minLength = row.size();
|
||||||
|
if (maxLength < row.size())
|
||||||
|
maxLength = row.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalization (to be tolerant to faulty inputs)
|
||||||
|
if (minLength != maxLength)
|
||||||
|
{
|
||||||
|
boost::format fmt(_("Invalid CSV file (variable number of fields, from %1% to %2%)"));
|
||||||
|
throw CsvException((fmt % minLength % maxLength).str());
|
||||||
|
}
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CsvHelper::writeStream(ostream &output, const vector<DataRow> &iData)
|
||||||
|
{
|
||||||
|
if (iData.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Make sure the rows have the same number of fields
|
||||||
|
size_t firstLength = iData.front().size();
|
||||||
|
BOOST_FOREACH(const DataRow &row, iData)
|
||||||
|
{
|
||||||
|
if (row.size() != firstLength)
|
||||||
|
throw CsvException(_("Invalid CSV data (variable number of fields)"));
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_FOREACH(const DataRow &row, iData)
|
||||||
|
{
|
||||||
|
bool first = true;
|
||||||
|
BOOST_FOREACH(const string &field, row)
|
||||||
|
{
|
||||||
|
// Add the comma
|
||||||
|
if (first)
|
||||||
|
first = false;
|
||||||
|
else
|
||||||
|
output << ',';
|
||||||
|
|
||||||
|
// Needs quoting?
|
||||||
|
if (field.find_first_of(",\"\n") == string::npos)
|
||||||
|
output << field;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
output << '"';
|
||||||
|
BOOST_FOREACH(char c, field)
|
||||||
|
{
|
||||||
|
output << c;
|
||||||
|
// Double the quote
|
||||||
|
if (c == '"')
|
||||||
|
output << '"';
|
||||||
|
}
|
||||||
|
output << '"';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// End of the row (CR + LF)
|
||||||
|
output << "\r\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
65
dic/csv_helper.h
Normal file
65
dic/csv_helper.h
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* Eliot
|
||||||
|
* Copyright (C) 2012 Olivier Teulière
|
||||||
|
* Authors: Olivier Teulière <ipkiss @@ gmail.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef CSV_HELPER_H_
|
||||||
|
#define CSV_HELPER_H_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <iosfwd>
|
||||||
|
|
||||||
|
#include "logging.h"
|
||||||
|
#include "base_exception.h"
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
using std::istream;
|
||||||
|
using std::ostream;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class to handle CSV (Comma Separated Value) files.
|
||||||
|
* The CSV format respects the guidelines summary listed here (RFC 4180):
|
||||||
|
* https://en.wikipedia.org/wiki/Comma-separated_values#Toward_standardization
|
||||||
|
* except for the newlines in fields which are not supported when parsing.
|
||||||
|
*/
|
||||||
|
class CsvHelper
|
||||||
|
{
|
||||||
|
DEFINE_LOGGER();
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef vector<string> DataRow;
|
||||||
|
|
||||||
|
static vector<DataRow> readStream(istream &input);
|
||||||
|
|
||||||
|
static void writeStream(ostream &output, const vector<DataRow> &iData);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CsvException: public BaseException
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CsvException(const std::string &iMessage)
|
||||||
|
: BaseException(iMessage) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue