arabica/Utils/utf16utf8_codecvt.cpp

//---------------------------------------------------------------------------
// $Id$
//---------------------------------------------------------------------------
#include "utf16utf8_codecvt.h"
//---------------------------------------------------------------------------
// This facet converts from wide chars to char using the
// FSS-UTF (UCS2) encoding.
//
// Some of this code is derived from work done by Ken Thompson,
// provided to the X/Open Group.

struct Tab
{
  char char_mask;
  char char_value;
  int shift;
  wchar_t wide_mask;
};

static const Tab tab[] =
{
  { char(0x80),  char(0x00),   0*6,    0x7F,   },   // 1 byte sequence
  { char(0xE0),  char(0xC0),   1*6,    0x7FF,  },   // 2 byte sequence
  { char(0xF0),  char(0xE0),   2*6,    0xFFFF, },   // 3 byte sequence
  { 0,           0,            0,      0,      }    // end of table
};

utf16utf8_codecvt::~utf16utf8_codecvt()
{
} // ~utf16utf8_codecvt

std::codecvt_base::result utf16utf8_codecvt::do_out(std::mbstate_t& /* state */,
                        const wchar_t* from,
                        const wchar_t* from_end,
                        const wchar_t*& from_next,
                        char* to,
                        char* to_limit,
                        char*& to_next) const
{
  from_next = from;
  to_next = to;

  while(from_next < from_end)
  {
    for(const Tab *t = tab; t->char_mask; t++)
    {
      if(*from_next > t->wide_mask )
        continue;

      // is there enough room in outbuffer?
      if(to_next + (t - tab) + 1 >= to_limit)
        return std::codecvt_base::partial;

      int c = t->shift;
      *to_next++ = static_cast<char>(t->char_value | (*from_next >> c));
      while(c > 0)
      {
      	c -= 6;
       	*to_next++ = static_cast<char>(0x80 | ((*from_next >> c) & 0x3F));
      } // while(c > 0)
      break;
    } // for(Tab *t = tab;  t->char_mask; t++)
    ++from_next;
  } // while(from_next < from_end)

  return std::codecvt_base::ok;
} // do_out

std::codecvt_base::result utf16utf8_codecvt::do_in(std::mbstate_t& /* state */,
                       const char* from,
                       const char* from_end,
                       const char*& from_next,
                       wchar_t* to,
                       wchar_t* to_limit,
                       wchar_t*& to_next) const
{
  from_next = from;
  to_next = to;

	while((from_next < from_end) && (to_next < to_limit))
	{
    char start = *from_next;
    *to_next = static_cast<unsigned char>(*from_next);
    for(const Tab *t = tab; t->char_mask; t++)
    {
      if((start & t->char_mask) == t->char_value)
      {
        *to_next &= t->wide_mask;
        break;
      }
      from_next++;
      *to_next = (*to_next << 6) | ((*from_next ^ 0x80) & 0xff);
    } // for(Tab *t = tab;  t->char_mask; t++)

    ++from_next;
    ++to_next;
  } // while

  return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial;
} // do_in

std::codecvt_base::result utf16utf8_codecvt::do_unshift(std::mbstate_t& /* state */,
                            char* to,
                            char* /* to_limit */,
                            char*& to_next) const
{
  to_next = to;
  return noconv;
} // do_unshift

int utf16utf8_codecvt::do_encoding() const throw()
{
  return 0;
} // do_encoding

bool utf16utf8_codecvt::do_always_noconv() const throw()
{
  return false;
} // do_always_noconv

int utf16utf8_codecvt::do_length(const std::mbstate_t&,
                        const char* from,
                        const char* end,
                        size_t max) const
{
  size_t count(0);
  const char* from_next = from;

  while((from_next < end) && (count < max))
  {
    if(!(*from_next & 0x80))
    {
	    ++count;
      ++from_next;
    }
    else if((*from_next&0xc0) == 0xc0)
    {
      if(from_next+2 < end)
      {
  	    ++count;
        from_next += 2;
      }
      else
        break;
    }
    else if((*from_next&0xe0) == 0xe0)
    {
      if(from_next+3 < end)
      {
  	    ++count;
        from_next += 3;
      }
      else
        break;
    }
  } // while

  return (from_next-from);
} // do_length

int utf16utf8_codecvt::do_max_length() const throw()
{
  return 3;
} // do_max_length

// end of file
Initial revision 2002-06-21 13:16:28 +02:00			`//---------------------------------------------------------------------------`
			`// $Id$`
			`//---------------------------------------------------------------------------`
			`#include "utf16utf8_codecvt.h"`
			`//---------------------------------------------------------------------------`
			`// This facet converts from wide chars to char using the`
			`// FSS-UTF (UCS2) encoding.`
			`//`
			`// Some of this code is derived from work done by Ken Thompson,`
			`// provided to the X/Open Group.`

			`struct Tab`
			`{`
			`char char_mask;`
			`char char_value;`
			`int shift;`
			`wchar_t wide_mask;`
			`};`

			`static const Tab tab[] =`
			`{`
			`{ char(0x80), char(0x00), 0*6, 0x7F, }, // 1 byte sequence`
			`{ char(0xE0), char(0xC0), 1*6, 0x7FF, }, // 2 byte sequence`
			`{ char(0xF0), char(0xE0), 2*6, 0xFFFF, }, // 3 byte sequence`
			`{ 0, 0, 0, 0, } // end of table`
			`};`

			`utf16utf8_codecvt::~utf16utf8_codecvt()`
			`{`
			`} // ~utf16utf8_codecvt`

			`std::codecvt_base::result utf16utf8_codecvt::do_out(std::mbstate_t& /* state */,`
			`const wchar_t* from,`
			`const wchar_t* from_end,`
			`const wchar_t*& from_next,`
			`char* to,`
			`char* to_limit,`
			`char*& to_next) const`
			`{`
			`from_next = from;`
			`to_next = to;`

			`while(from_next < from_end)`
			`{`
			`for(const Tab *t = tab; t->char_mask; t++)`
			`{`
			`if(*from_next > t->wide_mask )`
			`continue;`

			`// is there enough room in outbuffer?`
			`if(to_next + (t - tab) + 1 >= to_limit)`
			`return std::codecvt_base::partial;`

			`int c = t->shift;`
			`to_next++ = static_cast<char>(t->char_value \| (from_next >> c));`
			`while(c > 0)`
			`{`
			`c -= 6;`
			`to_next++ = static_cast<char>(0x80 \| ((from_next >> c) & 0x3F));`
			`} // while(c > 0)`
			`break;`
			`} // for(Tab *t = tab; t->char_mask; t++)`
			`++from_next;`
			`} // while(from_next < from_end)`

			`return std::codecvt_base::ok;`
			`} // do_out`

			`std::codecvt_base::result utf16utf8_codecvt::do_in(std::mbstate_t& /* state */,`
			`const char* from,`
			`const char* from_end,`
			`const char*& from_next,`
			`wchar_t* to,`
			`wchar_t* to_limit,`
			`wchar_t*& to_next) const`
			`{`
			`from_next = from;`
			`to_next = to;`

			`while((from_next < from_end) && (to_next < to_limit))`
			`{`
			`char start = *from_next;`
			`to_next = static_cast<unsigned char>(from_next);`
			`for(const Tab *t = tab; t->char_mask; t++)`
			`{`
			`if((start & t->char_mask) == t->char_value)`
			`{`
			`*to_next &= t->wide_mask;`
			`break;`
			`}`
			`from_next++;`
			`to_next = (to_next << 6) \| ((*from_next ^ 0x80) & 0xff);`
			`} // for(Tab *t = tab; t->char_mask; t++)`

			`++from_next;`
			`++to_next;`
			`} // while`

			`return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial;`
			`} // do_in`

			`std::codecvt_base::result utf16utf8_codecvt::do_unshift(std::mbstate_t& /* state */,`
			`char* to,`
			`char* /* to_limit */,`
			`char*& to_next) const`
			`{`
			`to_next = to;`
			`return noconv;`
			`} // do_unshift`

			`int utf16utf8_codecvt::do_encoding() const throw()`
			`{`
			`return 0;`
			`} // do_encoding`

			`bool utf16utf8_codecvt::do_always_noconv() const throw()`
			`{`
			`return false;`
			`} // do_always_noconv`

			`int utf16utf8_codecvt::do_length(const std::mbstate_t&,`
			`const char* from,`
			`const char* end,`
			`size_t max) const`
			`{`
			`size_t count(0);`
			`const char* from_next = from;`

			`while((from_next < end) && (count < max))`
			`{`
			`if(!(*from_next & 0x80))`
			`{`
			`++count;`
			`++from_next;`
			`}`
			`else if((*from_next&0xc0) == 0xc0)`
			`{`
			`if(from_next+2 < end)`
			`{`
			`++count;`
			`from_next += 2;`
			`}`
			`else`
			`break;`
			`}`
			`else if((*from_next&0xe0) == 0xe0)`
			`{`
			`if(from_next+3 < end)`
			`{`
			`++count;`
			`from_next += 3;`
			`}`
			`else`
			`break;`
			`}`
			`} // while`

			`return (from_next-from);`
			`} // do_length`

			`int utf16utf8_codecvt::do_max_length() const throw()`
			`{`
			`return 3;`
			`} // do_max_length`

			`// end of file`