arabica/Utils/StringAdaptor.h

314 lines
9.2 KiB
C
Raw Normal View History

#ifndef ARABICA_UTILS_STRING_ADAPTOR_H
#define ARABICA_UTILS_STRING_ADAPTOR_H
2002-06-21 13:16:28 +02:00
/*
* $Id$
*/
2003-09-09 13:14:48 +02:00
#include <SAX/ArabicaConfig.h>
2002-06-21 13:16:28 +02:00
#include <string>
#include <Utils/convertstream.h>
2003-09-02 13:23:52 +02:00
#include <Utils/utf8ucs2codecvt.h>
2002-06-21 13:16:28 +02:00
namespace Arabica
2002-06-21 13:16:28 +02:00
{
2005-09-30 23:36:11 +02:00
template<class stringT>
2002-06-21 13:16:28 +02:00
class default_string_adaptor
{
public:
2005-09-30 23:36:11 +02:00
typedef stringT string_type;
typedef typename string_type::const_iterator const_iterator;
typedef typename string_type::value_type value_type;
typedef typename string_type::size_type size_type;
static const size_type npos;
2002-06-21 13:16:28 +02:00
2005-09-30 23:36:11 +02:00
template<class InputIterator> static string_type construct(InputIterator from, InputIterator to);
static string_type construct(const value_type* s);
static value_type convert_from_utf8(char c);
static string_type construct_from_utf8(const char* str);
static string_type construct_from_utf8(const char* str, int length);
2003-09-09 15:09:48 +02:00
#ifndef ARABICA_NO_WCHAR_T
2005-09-30 23:36:11 +02:00
static string_type construct_from_utf16(const wchar_t* str);
static string_type construct_from_utf16(const wchar_t* str, int length);
2003-09-09 15:09:48 +02:00
#endif
2002-06-21 13:16:28 +02:00
2005-09-30 23:36:11 +02:00
// all these functions should operate as std::string member functions do
static bool empty(const string_type& str);
static size_type find(const string_type& str, const string_type& what);
static string_type substr(const string_type& str, const size_type& offset);
static string_type substr(const string_type& str, const size_type& offset, const size_type& count);
static size_type length(const string_type& str);
static void append(string_type& str, const string_type& a);
static void insert(string_type& str, size_type offset, const string_type& a);
static void replace(string_type& str, size_type offser, size_type count, const string_type& a);
2005-09-28 22:02:00 +02:00
static const_iterator begin(const string_type& str);
static const_iterator end(const string_type& str);
2002-06-21 13:16:28 +02:00
// only used to constuct error strings - don't have to be highly efficient!
2005-09-30 23:36:11 +02:00
static std::string asStdString(const string_type& str);
2003-09-09 15:09:48 +02:00
#ifndef ARABICA_NO_WCHAR_T
2005-09-30 23:36:11 +02:00
static std::wstring asStdWString(const string_type& str);
2003-09-09 15:09:48 +02:00
#endif
2005-09-30 23:36:11 +02:00
}; // class default_string_adaptor
2002-06-21 13:16:28 +02:00
// specialize for std::string and std::wstring
template<>
class default_string_adaptor<std::string>
{
public:
2005-09-30 23:36:11 +02:00
typedef std::string string_type;
2005-09-28 22:02:00 +02:00
typedef std::string::const_iterator const_iterator;
2005-10-03 14:40:44 +02:00
typedef std::string::iterator mutable_iterator;
typedef std::string::value_type value_type;
typedef std::string::size_type size_type;
static const size_type npos = static_cast<size_type>(-1);
2005-09-30 23:36:11 +02:00
template<class InputIterator> static std::string construct(InputIterator from, InputIterator to)
{
return std::string(from, to);
} // construct
2002-06-21 13:16:28 +02:00
2005-09-30 23:36:11 +02:00
static std::string construct(const value_type* s)
{
return std::string(s);
} // construct
static char convert_from_utf8(char c) { return c; }
static std::string construct_from_utf8(const char* str)
2002-06-21 13:16:28 +02:00
{
return str ? std::string(str) : std::string();
} // makeStringT
2005-09-30 23:36:11 +02:00
static std::string construct_from_utf8(const char* str, int length)
2002-06-21 13:16:28 +02:00
{
return std::string(str, length);
} // makeStringT
2005-09-30 23:36:11 +02:00
static const std::string& asStdString(const std::string& str)
2003-09-09 15:09:48 +02:00
{
return str;
} // toStdString
#ifndef ARABICA_NO_WCHAR_T
2003-09-11 16:05:18 +02:00
typedef Arabica::convert::basic_iconvertstream<wchar_t, std::char_traits<wchar_t>,
2003-09-09 15:09:48 +02:00
char, std::char_traits<char> > widener;
2003-09-11 16:05:18 +02:00
typedef Arabica::convert::basic_oconvertstream<wchar_t, std::char_traits<wchar_t>,
2003-09-09 15:09:48 +02:00
char, std::char_traits<char> > narrower;
2005-10-16 00:43:36 +02:00
static std::string construct_from_utf16(const wchar_t* str)
2002-06-21 13:16:28 +02:00
{
2005-10-16 00:43:36 +02:00
#ifndef ARABICA_VS6_WORKAROUND
std::locale loc(std::locale(), new Arabica::convert::utf8ucs2codecvt());
#else
std::locale loc(std::_Addfac(std::locale(), new Arabica::convert::utf8ucs2codecvt));
#endif
narrower n;
n.imbue(loc);
n << str;
return n.str();
2002-06-21 13:16:28 +02:00
} // makeStringT
2005-10-16 00:43:36 +02:00
static std::string construct_from_utf16(const wchar_t* str, int length)
2002-06-21 13:16:28 +02:00
{
2005-10-16 00:43:36 +02:00
#ifndef ARABICA_VS6_WORKAROUND
std::locale loc(std::locale(), new Arabica::convert::utf8ucs2codecvt());
#else
std::locale loc(std::_Addfac(std::locale(), new Arabica::convert::utf8ucs2codecvt));
#endif
narrower n;
n.imbue(loc);
for(int i = 0; i < length; ++i)
n << str[i];
return n.str();
} // construct_from_utf16
static std::wstring asStdWString(const std::string& str)
2002-06-21 13:16:28 +02:00
{
2005-10-16 00:43:36 +02:00
#ifndef ARABICA_VS6_WORKAROUND
std::locale loc(std::locale(), new Arabica::convert::utf8ucs2codecvt());
#else
std::locale loc(std::_Addfac(std::locale(), new Arabica::convert::utf8ucs2codecvt));
#endif
widener w;
w.str(str);
return w.str();
2002-06-21 13:16:28 +02:00
} // toStdWString
2003-09-09 15:09:48 +02:00
#endif
2002-06-21 13:16:28 +02:00
static bool empty(const std::string& str)
{
return str.empty();
} // empty
static size_type find(const std::string& str, const std::string& what)
{
return str.find(what);
} // find
2005-10-03 14:40:44 +02:00
static size_type find(const std::string& str, char what)
{
return str.find(what);
} // find
static std::string substr(const std::string& str, const size_type& offset)
{
return str.substr(offset);
} // substr
static std::string substr(const std::string& str, const size_type& offset, const size_type& count)
{
return str.substr(offset, count);
} // substr
static size_type length(const std::string& str)
{
return str.length(); // TODO - fix me for utf8
} // length
static void append(std::string& str, const std::string& a)
{
str.append(a);
} // append
static void insert(std::string& str, size_type offset, const std::string& a)
{
str.insert(offset, a);
} // insert
static void replace(std::string& str, size_type offset, size_type count, const std::string& a)
{
str.replace(offset, count, a);
} // replace
2005-09-28 22:02:00 +02:00
static const_iterator begin(const std::string& str) { return str.begin(); }
2005-10-03 14:40:44 +02:00
static mutable_iterator begin(std::string& str) { return str.begin(); }
2005-09-28 22:02:00 +02:00
static const_iterator end(const std::string& str) { return str.end(); }
2005-10-03 14:40:44 +02:00
static mutable_iterator end(std::string& str) { return str.end(); }
2002-06-21 13:16:28 +02:00
}; // class default_string_adaptor
2003-09-09 15:09:48 +02:00
#ifndef ARABICA_NO_WCHAR_T
2002-06-21 13:16:28 +02:00
template<>
class default_string_adaptor<std::wstring>
{
2003-09-11 16:05:18 +02:00
typedef Arabica::convert::basic_iconvertstream<wchar_t, std::char_traits<wchar_t>,
2002-06-21 13:16:28 +02:00
char, std::char_traits<char> > widener;
2003-09-11 16:05:18 +02:00
typedef Arabica::convert::basic_oconvertstream<wchar_t, std::char_traits<wchar_t>,
2002-06-21 13:16:28 +02:00
char, std::char_traits<char> > narrower;
public:
typedef std::wstring stringT;
2005-09-28 22:02:00 +02:00
typedef std::wstring::const_iterator const_iterator;
typedef std::wstring::value_type value_type;
typedef std::wstring::size_type size_type;
static const size_type npos = static_cast<size_type>(-1);
2005-09-28 22:02:00 +02:00
wchar_t makeValueT(char c) const
2002-06-21 13:16:28 +02:00
{
return static_cast<wchar_t>(c);
} // makeValueT
2005-09-30 23:36:11 +02:00
std::wstring construct_from_utf8(const char* str) const
2002-06-21 13:16:28 +02:00
{
if(str)
w_.str(str);
else
w_.str("");
return w_.str();
} // makeStringT
2005-09-30 23:36:11 +02:00
std::wstring construct_from_utf8(const char* str, int length) const
2002-06-21 13:16:28 +02:00
{
w_.str(std::string(str, length));
return w_.str();
} // makeStringT
2005-09-30 23:36:11 +02:00
std::wstring construct_from_utf16(const wchar_t* str) const
2002-06-21 13:16:28 +02:00
{
return str ? std::wstring(str) : std::wstring();
} // makeStringT
2005-09-30 23:36:11 +02:00
std::wstring construct_from_utf16(const wchar_t* str, int length) const
2002-06-21 13:16:28 +02:00
{
return std::wstring(str, length);
} // makeStringT
2005-09-28 22:02:00 +02:00
std::wstring makeStringT(const const_iterator& first, const const_iterator& last) const
{
return std::wstring(first, last);
} // makeStringT
2002-06-21 13:16:28 +02:00
2005-09-28 22:02:00 +02:00
std::string asStdString(const std::wstring& str) const
2002-06-21 13:16:28 +02:00
{
n_.str(str);
return n_.str();
} // toStdString
2005-09-28 22:02:00 +02:00
const std::wstring& asStdWString(const std::wstring& str) const
2002-06-21 13:16:28 +02:00
{
return str;
} // toStdWString
static bool empty(const std::wstring& str)
{
return str.empty();
} // empty
static size_type find(const std::wstring& str, const std::wstring& what)
{
return str.find(what);
} // find
static std::wstring substr(const std::wstring& str, const size_type& offset)
{
return str.substr(offset);
} // substr
static std::wstring substr(const std::wstring& str, const size_type& offset, const size_type& count)
{
return str.substr(offset, count);
} // substr
static size_type length(const std::string& str)
{
return str.length(); // TODO - fix me for utf8
} // length
static void append(std::wstring& str, const std::wstring& a)
{
str.append(a);
} // append
static void insert(std::wstring& str, size_type offset, const std::wstring& a)
{
str.insert(offset, a);
} // insert
static void replace(std::wstring& str, size_type offset, size_type count, const std::wstring& a)
{
str.replace(offset, count, a);
} // replace
2005-09-28 22:02:00 +02:00
static const_iterator begin(const std::wstring& str) { return str.begin(); }
static const_iterator end(const std::wstring& str) { return str.end(); }
2002-06-21 13:16:28 +02:00
default_string_adaptor() :
2003-09-09 15:09:48 +02:00
#ifndef ARABICA_VS6_WORKAROUND
2003-09-11 16:05:18 +02:00
loc_(std::locale(), new Arabica::convert::utf8ucs2codecvt()),
2002-06-21 13:16:28 +02:00
#else
2003-09-11 16:05:18 +02:00
loc_(std::_Addfac(std::locale(), new Arabica::convert::utf8ucs2codecvt())),
2002-06-21 13:16:28 +02:00
#endif
n_(),
w_()
{
n_.imbue(loc_);
w_.imbue(loc_);
} // default_string_adaptor
private:
std::locale loc_;
mutable narrower n_;
mutable widener w_;
}; // class default_string_adaptor
2003-09-09 15:09:48 +02:00
#endif // ARABICA_NO_WCHAR_T
2002-06-21 13:16:28 +02:00
} // namespace Arabica
2002-06-21 13:16:28 +02:00
#endif
// end of file