2005-08-15 21:09:13 +00:00
|
|
|
#ifndef ARABICA_UTILS_STRING_ADAPTOR_H
|
|
|
|
#define ARABICA_UTILS_STRING_ADAPTOR_H
|
2002-06-21 11:16:28 +00:00
|
|
|
|
2007-09-04 22:55:47 +00:00
|
|
|
#include <SAX/ArabicaConfig.hpp>
|
2002-06-21 11:16:28 +00:00
|
|
|
#include <string>
|
2007-09-10 17:32:43 +00:00
|
|
|
#include <io/convertstream.hpp>
|
2007-09-10 16:21:31 +00:00
|
|
|
#include <convert/utf8ucs2codecvt.hpp>
|
2007-09-10 17:52:04 +00:00
|
|
|
#include <Arabica/stringadaptortag.hpp>
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2005-08-15 21:09:13 +00:00
|
|
|
namespace Arabica
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
|
2005-12-07 10:51:00 +00:00
|
|
|
template<class stringT> class default_string_adaptor;
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2005-09-30 21:36:11 +00:00
|
|
|
template<class stringT>
|
2005-10-22 03:21:05 +00:00
|
|
|
class default_string_adaptor_base
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
public:
|
2005-09-30 21:36:11 +00:00
|
|
|
typedef stringT string_type;
|
|
|
|
typedef typename string_type::const_iterator const_iterator;
|
2005-10-22 03:21:05 +00:00
|
|
|
typedef typename string_type::iterator mutable_iterator;
|
|
|
|
typedef typename string_type::iterator iterator;
|
2005-09-30 21:36:11 +00:00
|
|
|
typedef typename string_type::value_type value_type;
|
|
|
|
typedef typename string_type::size_type size_type;
|
2002-06-21 11:16:28 +00:00
|
|
|
|
2009-10-06 15:13:17 +02:00
|
|
|
virtual ~default_string_adaptor_base() {}
|
|
|
|
|
2006-06-08 09:51:18 +00:00
|
|
|
static size_type npos()
|
|
|
|
{
|
|
|
|
return static_cast<size_type>(-1);
|
|
|
|
}
|
2005-09-30 21:36:11 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
//todo: is this safe?
|
2009-10-06 15:13:17 +02:00
|
|
|
template<class InputIterator>
|
2005-10-22 03:21:05 +00:00
|
|
|
static inline string_type construct(InputIterator from, InputIterator to)
|
|
|
|
{
|
|
|
|
return string_type(from, to);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline string_type construct(const_iterator from, const_iterator to)
|
|
|
|
{
|
|
|
|
return string_type(from, to);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static string_type construct(const value_type* str)
|
|
|
|
{
|
|
|
|
return str ? string_type(str) : string_type();
|
|
|
|
}
|
|
|
|
|
2007-07-19 17:01:57 +00:00
|
|
|
static const string_type& empty_string() { static string_type es; return es; }
|
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
//todo: fix for utf8
|
|
|
|
static size_type length(const string_type& str) { return str.length(); }
|
2002-06-21 11:16:28 +00:00
|
|
|
|
2005-09-30 21:36:11 +00:00
|
|
|
// all these functions should operate as std::string member functions do
|
2005-10-22 03:21:05 +00:00
|
|
|
static bool empty(const string_type& str) { return str.empty(); }
|
|
|
|
static size_type find(const string_type& str, value_type what) { return str.find(what); }
|
|
|
|
static size_type find(const string_type& str, const string_type& what) { return str.find(what); }
|
2010-12-23 22:50:43 +00:00
|
|
|
static size_type find(const string_type& str, value_type what, size_type from) { return str.find(what, from); }
|
|
|
|
static size_type find(const string_type& str, const string_type& what, size_type from) { return str.find(what, from); }
|
2005-10-22 03:21:05 +00:00
|
|
|
static string_type substr(const string_type& str, const size_type& offset) { return str.substr(offset); }
|
|
|
|
static string_type substr(const string_type& str, const size_type& offset, const size_type& count) { return str.substr(offset, count); }
|
|
|
|
static void append(string_type& str, const string_type& a) { str.append(a); }
|
|
|
|
static void insert(string_type& str, size_type offset, const string_type& a) { str.insert(offset, a); }
|
|
|
|
static void replace(string_type& str, size_type offset, size_type count, const string_type& a) { str.replace(offset, count, a); }
|
2009-10-06 15:13:17 +02:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
static const_iterator begin(const string_type& str) { return str.begin(); }
|
|
|
|
static const_iterator end(const string_type& str) { return str.end(); }
|
|
|
|
|
|
|
|
static iterator begin(string_type& str) { return str.begin(); }
|
|
|
|
static iterator end(string_type& str) { return str.end(); }
|
|
|
|
|
2005-09-08 21:43:21 +00:00
|
|
|
|
2002-06-21 11:16:28 +00:00
|
|
|
// only used to constuct error strings - don't have to be highly efficient!
|
2005-09-30 21:36:11 +00:00
|
|
|
static std::string asStdString(const string_type& str);
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2003-09-09 13:09:48 +00:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2005-10-22 03:21:05 +00:00
|
|
|
static string_type construct_from_utf16(const wchar_t* str);
|
|
|
|
static string_type construct_from_utf16(const wchar_t* str, int length);
|
2005-09-30 21:36:11 +00:00
|
|
|
static std::wstring asStdWString(const string_type& str);
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2007-09-10 17:39:52 +00:00
|
|
|
typedef Arabica::io::basic_iconvertstream<wchar_t, std::char_traits<wchar_t>,
|
2005-10-22 03:21:05 +00:00
|
|
|
char, std::char_traits<char> > widener_t;
|
2007-09-10 17:39:52 +00:00
|
|
|
typedef Arabica::io::basic_oconvertstream<wchar_t, std::char_traits<wchar_t>,
|
2005-10-22 03:21:05 +00:00
|
|
|
char, std::char_traits<char> > narrower_t;
|
|
|
|
|
|
|
|
|
|
|
|
static const std::locale& utf8ucs2_locale()
|
|
|
|
{
|
2006-05-19 21:27:35 +00:00
|
|
|
static const std::locale loc = std::locale(std::locale(), new Arabica::convert::utf8ucs2codecvt);
|
2005-10-22 03:21:05 +00:00
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
#endif //ARABICA_NO_WCHAR_T
|
|
|
|
|
|
|
|
}; // class default_string_adaptor_base
|
|
|
|
|
2006-06-08 09:51:18 +00:00
|
|
|
|
2002-06-21 11:16:28 +00:00
|
|
|
// specialize for std::string and std::wstring
|
|
|
|
template<>
|
2009-10-06 15:13:17 +02:00
|
|
|
class default_string_adaptor<std::string> :
|
2005-12-07 10:51:00 +00:00
|
|
|
public string_adaptor_tag,
|
|
|
|
public default_string_adaptor_base<std::string>
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
public:
|
2005-09-30 21:36:11 +00:00
|
|
|
|
|
|
|
static char convert_from_utf8(char c) { return c; }
|
|
|
|
|
|
|
|
static std::string construct_from_utf8(const char* str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
return str ? std::string(str) : std::string();
|
2005-12-07 10:51:00 +00:00
|
|
|
} // construct_from_utf8
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2005-09-30 21:36:11 +00:00
|
|
|
static std::string construct_from_utf8(const char* str, int length)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
return std::string(str, length);
|
2005-12-07 10:51:00 +00:00
|
|
|
} // construct_from_utf8
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2005-09-30 21:36:11 +00:00
|
|
|
static const std::string& asStdString(const std::string& str)
|
2003-09-09 13:09:48 +00:00
|
|
|
{
|
|
|
|
return str;
|
2005-12-07 10:51:00 +00:00
|
|
|
} // asStdString
|
2003-09-09 13:09:48 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2005-10-15 22:43:36 +00:00
|
|
|
static std::string construct_from_utf16(const wchar_t* str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
2005-10-22 03:21:05 +00:00
|
|
|
narrower_t n;
|
|
|
|
n.imbue(utf8ucs2_locale());
|
|
|
|
n.str(str ? str : L"");
|
|
|
|
//n << str;
|
2005-10-15 22:43:36 +00:00
|
|
|
return n.str();
|
2005-10-22 03:21:05 +00:00
|
|
|
}
|
2005-10-15 22:43:36 +00:00
|
|
|
|
2009-10-06 15:13:17 +02:00
|
|
|
static std::string construct_from_utf16(const wchar_t* str, int length)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
2005-10-22 03:21:05 +00:00
|
|
|
narrower_t n;
|
|
|
|
n.imbue(utf8ucs2_locale());
|
|
|
|
n.str(std::wstring(str, length));
|
|
|
|
//for(int i = 0; i < length; ++i)
|
|
|
|
// n << str[i];
|
2005-10-15 22:43:36 +00:00
|
|
|
return n.str();
|
|
|
|
} // construct_from_utf16
|
|
|
|
|
2009-10-06 15:13:17 +02:00
|
|
|
static std::wstring asStdWString(const std::string& str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
2005-10-22 03:21:05 +00:00
|
|
|
widener_t w;
|
|
|
|
w.imbue(utf8ucs2_locale());
|
2005-10-15 22:43:36 +00:00
|
|
|
w.str(str);
|
|
|
|
return w.str();
|
2002-06-21 11:16:28 +00:00
|
|
|
} // toStdWString
|
2005-10-22 03:21:05 +00:00
|
|
|
#endif //ARABICA_NO_WCHAR_T
|
2005-09-08 21:43:21 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
}; // class default_string_adaptor<std::string>
|
2005-10-03 12:40:44 +00:00
|
|
|
|
2002-06-21 11:16:28 +00:00
|
|
|
|
2003-09-09 13:09:48 +00:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2005-10-22 03:21:05 +00:00
|
|
|
|
2002-06-21 11:16:28 +00:00
|
|
|
template<>
|
2009-10-06 15:13:17 +02:00
|
|
|
class default_string_adaptor<std::wstring> :
|
2005-12-07 10:51:00 +00:00
|
|
|
public string_adaptor_tag,
|
|
|
|
public default_string_adaptor_base<std::wstring>
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
public:
|
2005-10-17 22:35:25 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
static wchar_t makeValueT(char c)
|
2005-10-17 22:35:25 +00:00
|
|
|
{
|
2002-06-21 11:16:28 +00:00
|
|
|
return static_cast<wchar_t>(c);
|
2005-10-22 03:21:05 +00:00
|
|
|
} // makeValueT
|
2002-06-21 11:16:28 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
static std::wstring construct_from_utf8(const char* str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
2005-10-22 03:21:05 +00:00
|
|
|
widener_t w;
|
|
|
|
w.imbue(utf8ucs2_locale());
|
|
|
|
w.str(str ? str : "");
|
2005-10-17 22:35:25 +00:00
|
|
|
return w.str();
|
2005-10-22 03:21:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static std::wstring construct_from_utf8(const char* str, int length)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
2005-10-22 03:21:05 +00:00
|
|
|
widener_t w;
|
|
|
|
w.imbue(utf8ucs2_locale());
|
|
|
|
w.str(std::string(str, length));
|
2005-10-17 22:35:25 +00:00
|
|
|
return w.str();
|
2005-10-22 03:21:05 +00:00
|
|
|
}
|
2005-10-17 22:35:25 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
static std::wstring construct_from_utf16(const wchar_t* str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
return str ? std::wstring(str) : std::wstring();
|
2005-10-22 03:21:05 +00:00
|
|
|
}
|
2005-10-17 22:35:25 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
static std::wstring construct_from_utf16(const wchar_t* str, int length)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
return std::wstring(str, length);
|
2005-10-22 03:21:05 +00:00
|
|
|
}
|
2009-10-06 15:13:17 +02:00
|
|
|
|
2005-10-17 22:35:25 +00:00
|
|
|
static std::string asStdString(const std::wstring& str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
2005-10-22 03:21:05 +00:00
|
|
|
narrower_t n;
|
|
|
|
n.imbue(utf8ucs2_locale());
|
|
|
|
n.str(str);
|
2005-10-17 22:35:25 +00:00
|
|
|
return n.str();
|
2002-06-21 11:16:28 +00:00
|
|
|
} // toStdString
|
2005-10-17 22:35:25 +00:00
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
static const std::wstring& asStdWString(const std::wstring& str)
|
2002-06-21 11:16:28 +00:00
|
|
|
{
|
|
|
|
return str;
|
|
|
|
} // toStdWString
|
|
|
|
|
2005-10-22 03:21:05 +00:00
|
|
|
}; // class default_string_adaptor<std::wstring>
|
2005-09-08 21:43:21 +00:00
|
|
|
|
|
|
|
|
2003-09-09 13:09:48 +00:00
|
|
|
#endif // ARABICA_NO_WCHAR_T
|
2002-06-21 11:16:28 +00:00
|
|
|
|
2005-08-15 21:09:13 +00:00
|
|
|
} // namespace Arabica
|
2002-06-21 11:16:28 +00:00
|
|
|
|
|
|
|
#endif
|
|
|
|
// end of file
|