diff --git a/dic/Makefile.am b/dic/Makefile.am index c89dc25..47d417e 100644 --- a/dic/Makefile.am +++ b/dic/Makefile.am @@ -31,6 +31,7 @@ libdic_a_SOURCES = \ tile.cpp tile.h \ dic.cpp dic.h \ dic_search.cpp \ + csv_helper.cpp csv_helper.h \ encoding.cpp encoding.h \ stacktrace.cpp stacktrace.h \ automaton.cpp automaton.h \ diff --git a/dic/csv_helper.cpp b/dic/csv_helper.cpp new file mode 100644 index 0000000..13f56df --- /dev/null +++ b/dic/csv_helper.cpp @@ -0,0 +1,172 @@ +/***************************************************************************** + * Eliot + * Copyright (C) 2012 Olivier Teulière + * Authors: Olivier Teulière + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + *****************************************************************************/ + +#include +#include + +#include "csv_helper.h" + +#include "debug.h" + +#if ENABLE_NLS +# include +# define _(String) gettext(String) +#else +# define _(String) String +#endif + +using namespace std; + +INIT_LOGGER(qt, CsvHelper); + + +vector CsvHelper::readStream(istream &input) +{ + vector data; + size_t minLength = (size_t) -1; + size_t maxLength = 0; + string line; + string currField; + // XXX: Using getline() prevents from parsing newline chars in fields. + // But this limitation is not very big, and it makes the code way simpler. + while (getline(input, line)) + { + DataRow row; + // Parse the line + bool inQuotes = false; + size_t pos = 0; + while (pos < line.size()) + { + char c = line[pos]; + if (c == ',' && !inQuotes) + { + // Field separator + row.push_back(currField); + currField.clear(); + } + else if (c == '"') + { + if (inQuotes) + { + // Is the next char also a quote? + if (pos + 1 < line.size() && line[pos + 1] == '"') + { + // Escaped quote + currField.push_back(c); + ++pos; + } + else + { + // End of quotation + inQuotes = false; + } + } + else + { + if (currField.empty()) + { + // Beginning of quotation + inQuotes = true; + } + else + { + // Normal quote in an unquoted field + currField.push_back(c); + } + } + } + else if (c != '\r' && c != '\n') + { + // Normal char + currField.push_back(c); + } + + // Next char + ++pos; + } + + // Add the last field of the line + row.push_back(currField); + currField.clear(); + + data.push_back(row); + + if (minLength > row.size()) + minLength = row.size(); + if (maxLength < row.size()) + maxLength = row.size(); + } + + // Normalization (to be tolerant to faulty inputs) + if (minLength != maxLength) + { + boost::format fmt(_("Invalid CSV file (variable number of fields, from %1% to %2%)")); + throw CsvException((fmt % minLength % maxLength).str()); + } + + return data; +} + + +void CsvHelper::writeStream(ostream &output, const vector &iData) +{ + if (iData.empty()) + return; + + // Make sure the rows have the same number of fields + size_t firstLength = iData.front().size(); + BOOST_FOREACH(const DataRow &row, iData) + { + if (row.size() != firstLength) + throw CsvException(_("Invalid CSV data (variable number of fields)")); + } + + BOOST_FOREACH(const DataRow &row, iData) + { + bool first = true; + BOOST_FOREACH(const string &field, row) + { + // Add the comma + if (first) + first = false; + else + output << ','; + + // Needs quoting? + if (field.find_first_of(",\"\n") == string::npos) + output << field; + else + { + output << '"'; + BOOST_FOREACH(char c, field) + { + output << c; + // Double the quote + if (c == '"') + output << '"'; + } + output << '"'; + } + } + // End of the row (CR + LF) + output << "\r\n"; + } +} + diff --git a/dic/csv_helper.h b/dic/csv_helper.h new file mode 100644 index 0000000..efe216f --- /dev/null +++ b/dic/csv_helper.h @@ -0,0 +1,65 @@ +/***************************************************************************** + * Eliot + * Copyright (C) 2012 Olivier Teulière + * Authors: Olivier Teulière + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + *****************************************************************************/ + +#ifndef CSV_HELPER_H_ +#define CSV_HELPER_H_ + +#include +#include +#include + +#include "logging.h" +#include "base_exception.h" + +using std::string; +using std::vector; +using std::istream; +using std::ostream; + + +/** + * Utility class to handle CSV (Comma Separated Value) files. + * The CSV format respects the guidelines summary listed here (RFC 4180): + * https://en.wikipedia.org/wiki/Comma-separated_values#Toward_standardization + * except for the newlines in fields which are not supported when parsing. + */ +class CsvHelper +{ + DEFINE_LOGGER(); + +public: + typedef vector DataRow; + + static vector readStream(istream &input); + + static void writeStream(ostream &output, const vector &iData); + +}; + + +class CsvException: public BaseException +{ +public: + CsvException(const std::string &iMessage) + : BaseException(iMessage) {} +}; + +#endif +