identical to utf16utf8codecvt but better named

This commit is contained in:
jez_higgins 2005-12-07 10:48:15 +00:00
parent 2563eee75c
commit a36a7adde7
2 changed files with 141 additions and 0 deletions

65
Utils/ucs2utf8codecvt.cpp Normal file
View file

@ -0,0 +1,65 @@
//---------------------------------------------------------------------------
// $Id$
//---------------------------------------------------------------------------
#include "ucs2utf8codecvt.hpp"
#include "impl/ucs2_utf8.h"
//---------------------------------------------------------------------------
// This facet converts from wide chars to char using the
// FSS-UTF (UCS2) encoding.
//
using namespace Arabica::convert;
std::codecvt_base::result ucs2utf8codecvt::do_out(std::mbstate_t& /* state */,
const char* from,
const char* from_end,
const char*& from_next,
wchar_t* to,
wchar_t* to_limit,
wchar_t*& to_next) const
{
return Arabica::Internal::utf8_2_ucs2(from, from_end, from_next, to, to_limit, to_next);
} // do_out
std::codecvt_base::result ucs2utf8codecvt::do_in(std::mbstate_t& /* state */,
const wchar_t* from,
const wchar_t* from_end,
const wchar_t*& from_next,
char* to,
char* to_limit,
char*& to_next) const
{
return Arabica::Internal::ucs2_2_utf8(from, from_end, from_next, to, to_limit, to_next);
} // do_in
std::codecvt_base::result ucs2utf8codecvt::do_unshift(std::mbstate_t& /* state */,
wchar_t* to,
wchar_t* /* to_limit */,
wchar_t*& to_next) const
{
to_next = to;
return std::codecvt_base::noconv;
} // do_unshift
int ucs2utf8codecvt::do_length(const std::mbstate_t&,
const wchar_t* from,
const wchar_t* end,
size_t max) const
{
size_t count(0);
const wchar_t* from_next = from;
while((from_next < end) && (count < max))
{
if(*from_next > 0x7FF)
++count;
if(*from_next > 0x7F)
++count;
++count;
++from_next;
} // while
return (from_next-from);
} // do_length
// end of file

76
Utils/ucs2utf8codecvt.hpp Normal file
View file

@ -0,0 +1,76 @@
#ifndef ARABICA_UCS2UTF8_CODECVT_HPP
#define ARABICA_UCS2UTF8_CODECVT_HPP
//---------------------------------------------------------------------------
// class ucs2utf8codecvt
// This facet converts from Unicode (UCS-2) wchar_ts to
// char using the UTF-8 encoding.
//
// For the full guff on codecvts see section 22.2.1.5 of
// The C++ Standard (ISO/IEC 14882 to be pedantic).
//
// I got my information about UTF-8 from RFC 2044.
//
// $Id$
//---------------------------------------------------------------------------
#include <SAX/ArabicaConfig.h>
#include <locale>
#ifndef ARABICA_NO_CODECVT_SPECIALISATIONS
#include <Utils/impl/codecvt_specialisations.h>
#endif
#ifdef ARABICA_VS6_WORKAROUND
#include <Utils/impl/VS6Workaround.h>
#endif
namespace Arabica
{
namespace convert
{
class ucs2utf8codecvt : public std::codecvt<char, wchar_t, std::mbstate_t>
{
protected:
typedef std::codecvt_base::result result;
virtual ~ucs2utf8codecvt() { }
virtual result do_out(std::mbstate_t&,
const char* from,
const char* from_end,
const char*& from_next,
wchar_t* to,
wchar_t* to_limit,
wchar_t*& to_next) const;
virtual result do_in(std::mbstate_t&,
const wchar_t* from,
const wchar_t* from_end,
const wchar_t*& from_next,
char* to,
char* to_limit,
char*& to_next) const;
virtual result do_unshift(std::mbstate_t&,
wchar_t*,
wchar_t*,
wchar_t*&) const;
virtual int do_encoding() const throw() { return 0; }
virtual bool do_always_noconv() const throw() { return false; }
virtual int do_length(const std::mbstate_t&,
const wchar_t* from,
const wchar_t* end,
size_t max) const;
virtual int do_max_length() const throw() { return 1; }
}; // class ucs2utf8codecvt
} // namespace convert
} // namespace Arabica
#endif