mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-17 18:12:04 +01:00
identical to utf16utf8codecvt but better named
This commit is contained in:
parent
2563eee75c
commit
a36a7adde7
2 changed files with 141 additions and 0 deletions
65
Utils/ucs2utf8codecvt.cpp
Normal file
65
Utils/ucs2utf8codecvt.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
//---------------------------------------------------------------------------
|
||||
// $Id$
|
||||
//---------------------------------------------------------------------------
|
||||
#include "ucs2utf8codecvt.hpp"
|
||||
#include "impl/ucs2_utf8.h"
|
||||
//---------------------------------------------------------------------------
|
||||
// This facet converts from wide chars to char using the
|
||||
// FSS-UTF (UCS2) encoding.
|
||||
//
|
||||
using namespace Arabica::convert;
|
||||
|
||||
std::codecvt_base::result ucs2utf8codecvt::do_out(std::mbstate_t& /* state */,
|
||||
const char* from,
|
||||
const char* from_end,
|
||||
const char*& from_next,
|
||||
wchar_t* to,
|
||||
wchar_t* to_limit,
|
||||
wchar_t*& to_next) const
|
||||
{
|
||||
return Arabica::Internal::utf8_2_ucs2(from, from_end, from_next, to, to_limit, to_next);
|
||||
} // do_out
|
||||
|
||||
std::codecvt_base::result ucs2utf8codecvt::do_in(std::mbstate_t& /* state */,
|
||||
const wchar_t* from,
|
||||
const wchar_t* from_end,
|
||||
const wchar_t*& from_next,
|
||||
char* to,
|
||||
char* to_limit,
|
||||
char*& to_next) const
|
||||
{
|
||||
return Arabica::Internal::ucs2_2_utf8(from, from_end, from_next, to, to_limit, to_next);
|
||||
} // do_in
|
||||
|
||||
std::codecvt_base::result ucs2utf8codecvt::do_unshift(std::mbstate_t& /* state */,
|
||||
wchar_t* to,
|
||||
wchar_t* /* to_limit */,
|
||||
wchar_t*& to_next) const
|
||||
{
|
||||
to_next = to;
|
||||
return std::codecvt_base::noconv;
|
||||
} // do_unshift
|
||||
|
||||
int ucs2utf8codecvt::do_length(const std::mbstate_t&,
|
||||
const wchar_t* from,
|
||||
const wchar_t* end,
|
||||
size_t max) const
|
||||
{
|
||||
size_t count(0);
|
||||
const wchar_t* from_next = from;
|
||||
|
||||
while((from_next < end) && (count < max))
|
||||
{
|
||||
if(*from_next > 0x7FF)
|
||||
++count;
|
||||
if(*from_next > 0x7F)
|
||||
++count;
|
||||
++count;
|
||||
++from_next;
|
||||
} // while
|
||||
|
||||
return (from_next-from);
|
||||
} // do_length
|
||||
|
||||
// end of file
|
||||
|
76
Utils/ucs2utf8codecvt.hpp
Normal file
76
Utils/ucs2utf8codecvt.hpp
Normal file
|
@ -0,0 +1,76 @@
|
|||
#ifndef ARABICA_UCS2UTF8_CODECVT_HPP
|
||||
#define ARABICA_UCS2UTF8_CODECVT_HPP
|
||||
//---------------------------------------------------------------------------
|
||||
// class ucs2utf8codecvt
|
||||
// This facet converts from Unicode (UCS-2) wchar_ts to
|
||||
// char using the UTF-8 encoding.
|
||||
//
|
||||
// For the full guff on codecvts see section 22.2.1.5 of
|
||||
// The C++ Standard (ISO/IEC 14882 to be pedantic).
|
||||
//
|
||||
// I got my information about UTF-8 from RFC 2044.
|
||||
//
|
||||
// $Id$
|
||||
//---------------------------------------------------------------------------
|
||||
#include <SAX/ArabicaConfig.h>
|
||||
|
||||
#include <locale>
|
||||
|
||||
#ifndef ARABICA_NO_CODECVT_SPECIALISATIONS
|
||||
#include <Utils/impl/codecvt_specialisations.h>
|
||||
#endif
|
||||
|
||||
#ifdef ARABICA_VS6_WORKAROUND
|
||||
#include <Utils/impl/VS6Workaround.h>
|
||||
#endif
|
||||
|
||||
namespace Arabica
|
||||
{
|
||||
namespace convert
|
||||
{
|
||||
|
||||
class ucs2utf8codecvt : public std::codecvt<char, wchar_t, std::mbstate_t>
|
||||
{
|
||||
protected:
|
||||
typedef std::codecvt_base::result result;
|
||||
|
||||
virtual ~ucs2utf8codecvt() { }
|
||||
|
||||
virtual result do_out(std::mbstate_t&,
|
||||
const char* from,
|
||||
const char* from_end,
|
||||
const char*& from_next,
|
||||
wchar_t* to,
|
||||
wchar_t* to_limit,
|
||||
wchar_t*& to_next) const;
|
||||
|
||||
virtual result do_in(std::mbstate_t&,
|
||||
const wchar_t* from,
|
||||
const wchar_t* from_end,
|
||||
const wchar_t*& from_next,
|
||||
char* to,
|
||||
char* to_limit,
|
||||
char*& to_next) const;
|
||||
|
||||
virtual result do_unshift(std::mbstate_t&,
|
||||
wchar_t*,
|
||||
wchar_t*,
|
||||
wchar_t*&) const;
|
||||
|
||||
virtual int do_encoding() const throw() { return 0; }
|
||||
|
||||
virtual bool do_always_noconv() const throw() { return false; }
|
||||
|
||||
virtual int do_length(const std::mbstate_t&,
|
||||
const wchar_t* from,
|
||||
const wchar_t* end,
|
||||
size_t max) const;
|
||||
|
||||
virtual int do_max_length() const throw() { return 1; }
|
||||
}; // class ucs2utf8codecvt
|
||||
|
||||
} // namespace convert
|
||||
} // namespace Arabica
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in a new issue