2005-08-15 23:09:13 +02:00
|
|
|
#ifndef ARABICA_UTILS_STRING_ADAPTOR_H
|
|
|
|
#define ARABICA_UTILS_STRING_ADAPTOR_H
|
2002-06-21 13:16:28 +02:00
|
|
|
/*
|
|
|
|
* $Id$
|
|
|
|
*/
|
|
|
|
|
2003-09-09 13:14:48 +02:00
|
|
|
#include <SAX/ArabicaConfig.h>
|
2002-06-21 13:16:28 +02:00
|
|
|
#include <string>
|
|
|
|
#include <Utils/convertstream.h>
|
2003-09-02 13:23:52 +02:00
|
|
|
#include <Utils/utf8ucs2codecvt.h>
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-08-15 23:09:13 +02:00
|
|
|
namespace Arabica
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
template<class stringT>
|
2002-06-21 13:16:28 +02:00
|
|
|
class default_string_adaptor
|
|
|
|
{
|
|
|
|
public:
|
2005-09-30 23:36:11 +02:00
|
|
|
typedef stringT string_type;
|
|
|
|
typedef typename string_type::const_iterator const_iterator;
|
|
|
|
typedef typename string_type::value_type value_type;
|
|
|
|
typedef typename string_type::size_type size_type;
|
2005-09-08 23:43:21 +02:00
|
|
|
static const size_type npos;
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
template<class InputIterator> static string_type construct(InputIterator from, InputIterator to);
|
|
|
|
static string_type construct(const value_type* s);
|
|
|
|
|
|
|
|
static value_type convert_from_utf8(char c);
|
|
|
|
static string_type construct_from_utf8(const char* str);
|
|
|
|
static string_type construct_from_utf8(const char* str, int length);
|
2003-09-09 15:09:48 +02:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2005-09-30 23:36:11 +02:00
|
|
|
static string_type construct_from_utf16(const wchar_t* str);
|
|
|
|
static string_type construct_from_utf16(const wchar_t* str, int length);
|
2003-09-09 15:09:48 +02:00
|
|
|
#endif
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
// all these functions should operate as std::string member functions do
|
|
|
|
static bool empty(const string_type& str);
|
|
|
|
static size_type find(const string_type& str, const string_type& what);
|
2005-09-08 23:43:21 +02:00
|
|
|
static string_type substr(const string_type& str, const size_type& offset);
|
|
|
|
static string_type substr(const string_type& str, const size_type& offset, const size_type& count);
|
|
|
|
static size_type length(const string_type& str);
|
|
|
|
static void append(string_type& str, const string_type& a);
|
|
|
|
static void insert(string_type& str, size_type offset, const string_type& a);
|
|
|
|
static void replace(string_type& str, size_type offser, size_type count, const string_type& a);
|
2005-09-28 22:02:00 +02:00
|
|
|
static const_iterator begin(const string_type& str);
|
|
|
|
static const_iterator end(const string_type& str);
|
2005-09-08 23:43:21 +02:00
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
// only used to constuct error strings - don't have to be highly efficient!
|
2005-09-30 23:36:11 +02:00
|
|
|
static std::string asStdString(const string_type& str);
|
2003-09-09 15:09:48 +02:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2005-09-30 23:36:11 +02:00
|
|
|
static std::wstring asStdWString(const string_type& str);
|
2003-09-09 15:09:48 +02:00
|
|
|
#endif
|
2005-09-30 23:36:11 +02:00
|
|
|
}; // class default_string_adaptor
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
// specialize for std::string and std::wstring
|
|
|
|
template<>
|
|
|
|
class default_string_adaptor<std::string>
|
|
|
|
{
|
|
|
|
public:
|
2005-09-30 23:36:11 +02:00
|
|
|
typedef std::string string_type;
|
2005-09-28 22:02:00 +02:00
|
|
|
typedef std::string::const_iterator const_iterator;
|
2005-10-03 14:40:44 +02:00
|
|
|
typedef std::string::iterator mutable_iterator;
|
2004-05-27 11:19:51 +02:00
|
|
|
typedef std::string::value_type value_type;
|
2005-09-08 23:43:21 +02:00
|
|
|
typedef std::string::size_type size_type;
|
|
|
|
static const size_type npos = static_cast<size_type>(-1);
|
2004-05-27 11:19:51 +02:00
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
template<class InputIterator> static std::string construct(InputIterator from, InputIterator to)
|
|
|
|
{
|
|
|
|
return std::string(from, to);
|
|
|
|
} // construct
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
static std::string construct(const value_type* s)
|
|
|
|
{
|
|
|
|
return std::string(s);
|
|
|
|
} // construct
|
|
|
|
|
|
|
|
static char convert_from_utf8(char c) { return c; }
|
|
|
|
|
|
|
|
static std::string construct_from_utf8(const char* str)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
return str ? std::string(str) : std::string();
|
|
|
|
} // makeStringT
|
2005-09-30 23:36:11 +02:00
|
|
|
static std::string construct_from_utf8(const char* str, int length)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
return std::string(str, length);
|
|
|
|
} // makeStringT
|
2005-09-30 23:36:11 +02:00
|
|
|
static const std::string& asStdString(const std::string& str)
|
2003-09-09 15:09:48 +02:00
|
|
|
{
|
|
|
|
return str;
|
|
|
|
} // toStdString
|
|
|
|
|
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2003-09-11 16:05:18 +02:00
|
|
|
typedef Arabica::convert::basic_iconvertstream<wchar_t, std::char_traits<wchar_t>,
|
2003-09-09 15:09:48 +02:00
|
|
|
char, std::char_traits<char> > widener;
|
2003-09-11 16:05:18 +02:00
|
|
|
typedef Arabica::convert::basic_oconvertstream<wchar_t, std::char_traits<wchar_t>,
|
2003-09-09 15:09:48 +02:00
|
|
|
char, std::char_traits<char> > narrower;
|
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
std::string construct_from_utf16(const wchar_t* str)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
std::wstring s;
|
|
|
|
if(str)
|
|
|
|
s = str;
|
|
|
|
n_.str(s);
|
|
|
|
return n_.str();
|
|
|
|
} // makeStringT
|
2005-09-30 23:36:11 +02:00
|
|
|
std::string construct_from_utf16(const wchar_t* str, int length)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
n_.str(std::wstring(str, length));
|
|
|
|
return n_.str();
|
|
|
|
} // makeStringT
|
2005-09-28 22:02:00 +02:00
|
|
|
std::wstring asStdWString(const std::string& str)
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
w_.str(str);
|
|
|
|
return w_.str();
|
|
|
|
} // toStdWString
|
2003-09-09 15:09:48 +02:00
|
|
|
#endif
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-09-08 23:43:21 +02:00
|
|
|
static bool empty(const std::string& str)
|
|
|
|
{
|
|
|
|
return str.empty();
|
|
|
|
} // empty
|
|
|
|
|
|
|
|
static size_type find(const std::string& str, const std::string& what)
|
|
|
|
{
|
|
|
|
return str.find(what);
|
|
|
|
} // find
|
|
|
|
|
2005-10-03 14:40:44 +02:00
|
|
|
static size_type find(const std::string& str, char what)
|
|
|
|
{
|
|
|
|
return str.find(what);
|
|
|
|
} // find
|
|
|
|
|
2005-09-08 23:43:21 +02:00
|
|
|
static std::string substr(const std::string& str, const size_type& offset)
|
|
|
|
{
|
|
|
|
return str.substr(offset);
|
|
|
|
} // substr
|
|
|
|
|
|
|
|
static std::string substr(const std::string& str, const size_type& offset, const size_type& count)
|
|
|
|
{
|
|
|
|
return str.substr(offset, count);
|
|
|
|
} // substr
|
|
|
|
|
|
|
|
static size_type length(const std::string& str)
|
|
|
|
{
|
|
|
|
return str.length(); // TODO - fix me for utf8
|
|
|
|
} // length
|
|
|
|
|
|
|
|
static void append(std::string& str, const std::string& a)
|
|
|
|
{
|
|
|
|
str.append(a);
|
|
|
|
} // append
|
|
|
|
|
|
|
|
static void insert(std::string& str, size_type offset, const std::string& a)
|
|
|
|
{
|
|
|
|
str.insert(offset, a);
|
|
|
|
} // insert
|
|
|
|
|
|
|
|
static void replace(std::string& str, size_type offset, size_type count, const std::string& a)
|
|
|
|
{
|
|
|
|
str.replace(offset, count, a);
|
|
|
|
} // replace
|
|
|
|
|
2005-09-28 22:02:00 +02:00
|
|
|
static const_iterator begin(const std::string& str) { return str.begin(); }
|
2005-10-03 14:40:44 +02:00
|
|
|
static mutable_iterator begin(std::string& str) { return str.begin(); }
|
2005-09-28 22:02:00 +02:00
|
|
|
static const_iterator end(const std::string& str) { return str.end(); }
|
2005-10-03 14:40:44 +02:00
|
|
|
static mutable_iterator end(std::string& str) { return str.end(); }
|
2005-09-28 22:02:00 +02:00
|
|
|
|
2003-09-09 15:09:48 +02:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2002-10-08 06:37:14 +02:00
|
|
|
default_string_adaptor() :
|
2003-09-09 15:09:48 +02:00
|
|
|
#ifndef ARABICA_VS6_WORKAROUND
|
2003-09-11 16:05:18 +02:00
|
|
|
loc_(std::locale(), new Arabica::convert::utf8ucs2codecvt()),
|
2002-06-21 13:16:28 +02:00
|
|
|
#else
|
2003-09-11 16:05:18 +02:00
|
|
|
loc_(std::_Addfac(std::locale(), new Arabica::convert::utf8ucs2codecvt)),
|
2002-06-21 13:16:28 +02:00
|
|
|
#endif
|
|
|
|
n_(),
|
|
|
|
w_()
|
|
|
|
{
|
|
|
|
n_.imbue(loc_);
|
|
|
|
w_.imbue(loc_);
|
|
|
|
} // default_string_adaptor
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::locale loc_;
|
|
|
|
mutable narrower n_;
|
|
|
|
mutable widener w_;
|
2003-09-09 15:09:48 +02:00
|
|
|
#else
|
|
|
|
default_string_adaptor() { }
|
|
|
|
#endif
|
2002-06-21 13:16:28 +02:00
|
|
|
}; // class default_string_adaptor
|
|
|
|
|
2003-09-09 15:09:48 +02:00
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
2002-06-21 13:16:28 +02:00
|
|
|
template<>
|
|
|
|
class default_string_adaptor<std::wstring>
|
|
|
|
{
|
2003-09-11 16:05:18 +02:00
|
|
|
typedef Arabica::convert::basic_iconvertstream<wchar_t, std::char_traits<wchar_t>,
|
2002-06-21 13:16:28 +02:00
|
|
|
char, std::char_traits<char> > widener;
|
2003-09-11 16:05:18 +02:00
|
|
|
typedef Arabica::convert::basic_oconvertstream<wchar_t, std::char_traits<wchar_t>,
|
2002-06-21 13:16:28 +02:00
|
|
|
char, std::char_traits<char> > narrower;
|
|
|
|
public:
|
2004-05-27 11:19:51 +02:00
|
|
|
typedef std::wstring stringT;
|
2005-09-28 22:02:00 +02:00
|
|
|
typedef std::wstring::const_iterator const_iterator;
|
2004-05-27 11:19:51 +02:00
|
|
|
typedef std::wstring::value_type value_type;
|
2005-09-08 23:43:21 +02:00
|
|
|
typedef std::wstring::size_type size_type;
|
|
|
|
static const size_type npos = static_cast<size_type>(-1);
|
2004-05-27 11:19:51 +02:00
|
|
|
|
2005-09-28 22:02:00 +02:00
|
|
|
wchar_t makeValueT(char c) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
return static_cast<wchar_t>(c);
|
|
|
|
} // makeValueT
|
|
|
|
|
2005-09-30 23:36:11 +02:00
|
|
|
std::wstring construct_from_utf8(const char* str) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
if(str)
|
|
|
|
w_.str(str);
|
|
|
|
else
|
|
|
|
w_.str("");
|
|
|
|
return w_.str();
|
|
|
|
} // makeStringT
|
2005-09-30 23:36:11 +02:00
|
|
|
std::wstring construct_from_utf8(const char* str, int length) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
w_.str(std::string(str, length));
|
|
|
|
return w_.str();
|
|
|
|
} // makeStringT
|
2005-09-30 23:36:11 +02:00
|
|
|
std::wstring construct_from_utf16(const wchar_t* str) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
return str ? std::wstring(str) : std::wstring();
|
|
|
|
} // makeStringT
|
2005-09-30 23:36:11 +02:00
|
|
|
std::wstring construct_from_utf16(const wchar_t* str, int length) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
return std::wstring(str, length);
|
|
|
|
} // makeStringT
|
2005-09-28 22:02:00 +02:00
|
|
|
std::wstring makeStringT(const const_iterator& first, const const_iterator& last) const
|
|
|
|
{
|
|
|
|
return std::wstring(first, last);
|
|
|
|
} // makeStringT
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-09-28 22:02:00 +02:00
|
|
|
std::string asStdString(const std::wstring& str) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
n_.str(str);
|
|
|
|
return n_.str();
|
|
|
|
} // toStdString
|
2005-09-28 22:02:00 +02:00
|
|
|
const std::wstring& asStdWString(const std::wstring& str) const
|
2002-06-21 13:16:28 +02:00
|
|
|
{
|
|
|
|
return str;
|
|
|
|
} // toStdWString
|
|
|
|
|
2005-09-08 23:43:21 +02:00
|
|
|
static bool empty(const std::wstring& str)
|
|
|
|
{
|
|
|
|
return str.empty();
|
|
|
|
} // empty
|
|
|
|
|
|
|
|
static size_type find(const std::wstring& str, const std::wstring& what)
|
|
|
|
{
|
|
|
|
return str.find(what);
|
|
|
|
} // find
|
|
|
|
|
|
|
|
static std::wstring substr(const std::wstring& str, const size_type& offset)
|
|
|
|
{
|
|
|
|
return str.substr(offset);
|
|
|
|
} // substr
|
|
|
|
|
|
|
|
static std::wstring substr(const std::wstring& str, const size_type& offset, const size_type& count)
|
|
|
|
{
|
|
|
|
return str.substr(offset, count);
|
|
|
|
} // substr
|
|
|
|
|
|
|
|
static size_type length(const std::string& str)
|
|
|
|
{
|
|
|
|
return str.length(); // TODO - fix me for utf8
|
|
|
|
} // length
|
|
|
|
|
|
|
|
static void append(std::wstring& str, const std::wstring& a)
|
|
|
|
{
|
|
|
|
str.append(a);
|
|
|
|
} // append
|
|
|
|
|
|
|
|
static void insert(std::wstring& str, size_type offset, const std::wstring& a)
|
|
|
|
{
|
|
|
|
str.insert(offset, a);
|
|
|
|
} // insert
|
|
|
|
|
|
|
|
static void replace(std::wstring& str, size_type offset, size_type count, const std::wstring& a)
|
|
|
|
{
|
|
|
|
str.replace(offset, count, a);
|
|
|
|
} // replace
|
|
|
|
|
2005-09-28 22:02:00 +02:00
|
|
|
static const_iterator begin(const std::wstring& str) { return str.begin(); }
|
|
|
|
static const_iterator end(const std::wstring& str) { return str.end(); }
|
|
|
|
|
2002-06-21 13:16:28 +02:00
|
|
|
default_string_adaptor() :
|
2003-09-09 15:09:48 +02:00
|
|
|
#ifndef ARABICA_VS6_WORKAROUND
|
2003-09-11 16:05:18 +02:00
|
|
|
loc_(std::locale(), new Arabica::convert::utf8ucs2codecvt()),
|
2002-06-21 13:16:28 +02:00
|
|
|
#else
|
2003-09-11 16:05:18 +02:00
|
|
|
loc_(std::_Addfac(std::locale(), new Arabica::convert::utf8ucs2codecvt())),
|
2002-06-21 13:16:28 +02:00
|
|
|
#endif
|
|
|
|
n_(),
|
|
|
|
w_()
|
|
|
|
{
|
|
|
|
n_.imbue(loc_);
|
|
|
|
w_.imbue(loc_);
|
|
|
|
} // default_string_adaptor
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::locale loc_;
|
|
|
|
mutable narrower n_;
|
|
|
|
mutable widener w_;
|
|
|
|
}; // class default_string_adaptor
|
2003-09-09 15:09:48 +02:00
|
|
|
#endif // ARABICA_NO_WCHAR_T
|
2002-06-21 13:16:28 +02:00
|
|
|
|
2005-08-15 23:09:13 +02:00
|
|
|
} // namespace Arabica
|
2002-06-21 13:16:28 +02:00
|
|
|
|
|
|
|
#endif
|
|
|
|
// end of file
|