identical to utf16utf8codecvt but better named

2025-01-17 18:12:04 +01:00 · 2005-12-07 10:48:15 +00:00 · 2005-12-07 10:48:15 +00:00 · a36a7adde7
commit a36a7adde7
parent 2563eee75c
2 changed files with 141 additions and 0 deletions
--- a/Utils/ucs2utf8codecvt.cpp
+++ b/Utils/ucs2utf8codecvt.cpp
@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------
+// $Id$
+//---------------------------------------------------------------------------
+#include "ucs2utf8codecvt.hpp"
+#include "impl/ucs2_utf8.h"
+//---------------------------------------------------------------------------
+// This facet converts from wide chars to char using the
+// FSS-UTF (UCS2) encoding.
+//
+using namespace Arabica::convert;
+
+std::codecvt_base::result ucs2utf8codecvt::do_out(std::mbstate_t& /* state */,
+                       const char* from,
+                       const char* from_end,
+                       const char*& from_next,
+                       wchar_t* to,
+                       wchar_t* to_limit,
+                       wchar_t*& to_next) const
+{
+  return Arabica::Internal::utf8_2_ucs2(from, from_end, from_next, to, to_limit, to_next);
+} // do_out
+
+std::codecvt_base::result ucs2utf8codecvt::do_in(std::mbstate_t& /* state */,
+                        const wchar_t* from,
+                        const wchar_t* from_end,
+                        const wchar_t*& from_next,
+                        char* to,
+                        char* to_limit,
+                        char*& to_next) const
+{
+  return Arabica::Internal::ucs2_2_utf8(from, from_end, from_next, to, to_limit, to_next);
+} // do_in
+
+std::codecvt_base::result ucs2utf8codecvt::do_unshift(std::mbstate_t& /* state */,
+                            wchar_t* to,
+                            wchar_t* /* to_limit */,
+                            wchar_t*& to_next) const
+{
+  to_next = to;
+  return std::codecvt_base::noconv;
+} // do_unshift
+
+int ucs2utf8codecvt::do_length(const std::mbstate_t&,
+                                const wchar_t* from,
+                                const wchar_t* end,
+                                size_t max) const
+{
+  size_t count(0);
+  const wchar_t* from_next = from;
+
+  while((from_next < end) && (count < max))
+  {
+    if(*from_next > 0x7FF)
+      ++count;
+    if(*from_next > 0x7F)
+      ++count;
+    ++count;
+    ++from_next;
+  } // while
+
+  return (from_next-from);
+} // do_length
+
+// end of file
+
--- a/Utils/ucs2utf8codecvt.hpp
+++ b/Utils/ucs2utf8codecvt.hpp
@ -0,0 +1,76 @@
+#ifndef ARABICA_UCS2UTF8_CODECVT_HPP
+#define ARABICA_UCS2UTF8_CODECVT_HPP
+//---------------------------------------------------------------------------
+// class ucs2utf8codecvt
+// This facet converts from Unicode (UCS-2) wchar_ts to
+// char using the UTF-8 encoding.
+//
+// For the full guff on codecvts see section 22.2.1.5 of
+// The C++ Standard (ISO/IEC 14882 to be pedantic).
+//
+// I got my information about UTF-8 from RFC 2044.
+//
+// $Id$
+//---------------------------------------------------------------------------
+#include <SAX/ArabicaConfig.h>
+
+#include <locale>
+
+#ifndef ARABICA_NO_CODECVT_SPECIALISATIONS
+#include <Utils/impl/codecvt_specialisations.h>
+#endif
+
+#ifdef ARABICA_VS6_WORKAROUND
+#include <Utils/impl/VS6Workaround.h>
+#endif
+
+namespace Arabica
+{
+namespace convert
+{
+
+class ucs2utf8codecvt : public std::codecvt<char, wchar_t, std::mbstate_t>
+{
+protected:
+  typedef std::codecvt_base::result result;
+
+  virtual ~ucs2utf8codecvt() { }
+
+  virtual result do_out(std::mbstate_t&,
+                       const char* from,
+                       const char* from_end,
+                       const char*& from_next,
+                       wchar_t* to,
+                       wchar_t* to_limit,
+                       wchar_t*& to_next) const;
+
+  virtual result do_in(std::mbstate_t&,
+                        const wchar_t* from,
+                        const wchar_t* from_end,
+                        const wchar_t*& from_next,
+                        char* to,
+                        char* to_limit,
+                        char*& to_next) const;
+
+  virtual result do_unshift(std::mbstate_t&,
+                            wchar_t*,
+                            wchar_t*,
+                            wchar_t*&) const;
+
+  virtual int do_encoding() const throw() { return 0; }
+
+  virtual bool do_always_noconv() const throw() { return false; }
+
+  virtual int do_length(const std::mbstate_t&,
+                        const wchar_t* from,
+                        const wchar_t* end,
+                        size_t max) const;
+
+  virtual int do_max_length() const throw() { return 1; }
+}; // class ucs2utf8codecvt
+
+} // namespace convert
+} // namespace Arabica
+
+#endif
+