//--------------------------------------------------------------------------- // $Id$ //--------------------------------------------------------------------------- #include "utf16utf8_codecvt.h" //--------------------------------------------------------------------------- // This facet converts from wide chars to char using the // FSS-UTF (UCS2) encoding. // // Some of this code is derived from work done by Ken Thompson, // provided to the X/Open Group. struct Tab { char char_mask; char char_value; int shift; wchar_t wide_mask; }; static const Tab tab[] = { { char(0x80), char(0x00), 0*6, 0x7F, }, // 1 byte sequence { char(0xE0), char(0xC0), 1*6, 0x7FF, }, // 2 byte sequence { char(0xF0), char(0xE0), 2*6, 0xFFFF, }, // 3 byte sequence { 0, 0, 0, 0, } // end of table }; utf16utf8_codecvt::~utf16utf8_codecvt() { } // ~utf16utf8_codecvt std::codecvt_base::result utf16utf8_codecvt::do_out(std::mbstate_t& /* state */, const wchar_t* from, const wchar_t* from_end, const wchar_t*& from_next, char* to, char* to_limit, char*& to_next) const { from_next = from; to_next = to; while(from_next < from_end) { for(const Tab *t = tab; t->char_mask; t++) { if(*from_next > t->wide_mask ) continue; // is there enough room in outbuffer? if(to_next + (t - tab) + 1 >= to_limit) return std::codecvt_base::partial; int c = t->shift; *to_next++ = static_cast(t->char_value | (*from_next >> c)); while(c > 0) { c -= 6; *to_next++ = static_cast(0x80 | ((*from_next >> c) & 0x3F)); } // while(c > 0) break; } // for(Tab *t = tab; t->char_mask; t++) ++from_next; } // while(from_next < from_end) return std::codecvt_base::ok; } // do_out std::codecvt_base::result utf16utf8_codecvt::do_in(std::mbstate_t& /* state */, const char* from, const char* from_end, const char*& from_next, wchar_t* to, wchar_t* to_limit, wchar_t*& to_next) const { from_next = from; to_next = to; while((from_next < from_end) && (to_next < to_limit)) { char start = *from_next; *to_next = static_cast(*from_next); for(const Tab *t = tab; t->char_mask; t++) { if((start & t->char_mask) == t->char_value) { *to_next &= t->wide_mask; break; } from_next++; *to_next = (*to_next << 6) | ((*from_next ^ 0x80) & 0xff); } // for(Tab *t = tab; t->char_mask; t++) ++from_next; ++to_next; } // while return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial; } // do_in std::codecvt_base::result utf16utf8_codecvt::do_unshift(std::mbstate_t& /* state */, char* to, char* /* to_limit */, char*& to_next) const { to_next = to; return noconv; } // do_unshift int utf16utf8_codecvt::do_encoding() const throw() { return 0; } // do_encoding bool utf16utf8_codecvt::do_always_noconv() const throw() { return false; } // do_always_noconv int utf16utf8_codecvt::do_length(const std::mbstate_t&, const char* from, const char* end, size_t max) const { size_t count(0); const char* from_next = from; while((from_next < end) && (count < max)) { if(!(*from_next & 0x80)) { ++count; ++from_next; } else if((*from_next&0xc0) == 0xc0) { if(from_next+2 < end) { ++count; from_next += 2; } else break; } else if((*from_next&0xe0) == 0xe0) { if(from_next+3 < end) { ++count; from_next += 3; } else break; } } // while return (from_next-from); } // do_length int utf16utf8_codecvt::do_max_length() const throw() { return 3; } // do_max_length // end of file