*** empty log message ***

2024-12-26 21:58:39 +01:00 · 2003-08-29 21:09:00 +00:00 · 2003-08-29 21:09:00 +00:00 · b2e29e67ac
commit b2e29e67ac
parent e694657457
17 changed files with 277 additions and 424 deletions
--- a/Arabica.sln
+++ b/Arabica.sln
@ -1,17 +1,19 @@
 Microsoft Visual Studio Solution File, Format Version 7.00
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DOM", "DOM\DOM.vcproj", "{AFD0FD18-3D55-4CEC-A242-EA290EBBF171}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SAX2DOM_test", "EXAMPLES\SAX2DOM\SAX2DOM_test.vcproj", "{E5157BA4-96A1-4D7F-B895-8C9A32F26BB5}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAX2DOMTests", "EXAMPLES\SAX2DOM\SAX2DOM_test.vcproj", "{E5157BA4-96A1-4D7F-B895-8C9A32F26BB5}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SAXlib", "SAX\SAX.vcproj", "{884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ArabicaLib", "SAX\SAX.vcproj", "{884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SimpleHandler", "examples\SAX\SimpleHandler.vcproj", "{16475ED0-2906-429B-9E73-F2BF2929F6E9}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAXSimpleHandler", "examples\SAX\SimpleHandler.vcproj", "{16475ED0-2906-429B-9E73-F2BF2929F6E9}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Writer", "EXAMPLES\SAX\Writer.vcproj", "{3C6CBC24-07D4-4DE8-A1DF-592C3BC77C56}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAXWriter", "EXAMPLES\SAX\Writer.vcproj", "{3C6CBC24-07D4-4DE8-A1DF-592C3BC77C56}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pyx", "examples\SAX\pyx.vcproj", "{AE33D6D1-0F57-4E97-90AE-696854C5AE71}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAXPyx", "examples\SAX\pyx.vcproj", "{AE33D6D1-0F57-4E97-90AE-696854C5AE71}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DOMWriter", "examples\SAX2DOM\DOMWriter.vcproj", "{C1CF7801-1681-4F15-8D71-BBC814805AF2}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_DOMWriter", "examples\SAX2DOM\DOMWriter.vcproj", "{C1CF7801-1681-4F15-8D71-BBC814805AF2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_UtilsTranscode", "examples\Utils\transcode.vcproj", "{436B423B-BF20-4B2E-A187-604AF391FBE2}"
 EndProject
 Global
 	GlobalSection(SolutionConfiguration) = preSolution
@ -23,6 +25,8 @@ Global
 		{16475ED0-2906-429B-9E73-F2BF2929F6E9}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
 		{3C6CBC24-07D4-4DE8-A1DF-592C3BC77C56}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
 		{AE33D6D1-0F57-4E97-90AE-696854C5AE71}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
+		{C1CF7801-1681-4F15-8D71-BBC814805AF2}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
+		{436B423B-BF20-4B2E-A187-604AF391FBE2}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
 	EndGlobalSection
 	GlobalSection(ProjectConfiguration) = postSolution
 		{AFD0FD18-3D55-4CEC-A242-EA290EBBF171}.Debug.ActiveCfg = Debug|Win32
@ -53,6 +57,10 @@ Global
 		{C1CF7801-1681-4F15-8D71-BBC814805AF2}.Debug.Build.0 = Debug|Win32
 		{C1CF7801-1681-4F15-8D71-BBC814805AF2}.Release.ActiveCfg = Release|Win32
 		{C1CF7801-1681-4F15-8D71-BBC814805AF2}.Release.Build.0 = Release|Win32
+		{436B423B-BF20-4B2E-A187-604AF391FBE2}.Debug.ActiveCfg = Debug|Win32
+		{436B423B-BF20-4B2E-A187-604AF391FBE2}.Debug.Build.0 = Debug|Win32
+		{436B423B-BF20-4B2E-A187-604AF391FBE2}.Release.ActiveCfg = Release|Win32
+		{436B423B-BF20-4B2E-A187-604AF391FBE2}.Release.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 	EndGlobalSection
--- a/SAX/SAX.vcproj
+++ b/SAX/SAX.vcproj
@ -330,10 +330,10 @@
 				RelativePath="..\XML\XMLCharacterClasses.h">
 			</File>
 			<File
-				RelativePath="..\Utils\base64_codecvt.cpp">
+				RelativePath="..\Utils\base64codecvt.cpp">
 			</File>
 			<File
-				RelativePath="..\Utils\base64_codecvt.h">
+				RelativePath="..\Utils\base64codecvt.h">
 			</File>
 			<File
 				RelativePath="..\Utils\convert_adaptor.h">
@ -342,26 +342,54 @@
 				RelativePath="..\Utils\convertstream.h">
 			</File>
 			<File
-				RelativePath="..\Utils\iso8859_1utf8_codecvt.cpp">
+				RelativePath="..\Utils\iso88591utf8codecvt.cpp">
 			</File>
 			<File
-				RelativePath="..\Utils\iso8859_1utf8_codecvt.h">
+				RelativePath="..\Utils\iso88591utf8codecvt.h">
 			</File>
 			<File
-				RelativePath="..\Utils\rot13_codecvt.cpp">
+				RelativePath="..\Utils\rot13codecvt.cpp">
 			</File>
 			<File
-				RelativePath="..\Utils\rot13_codecvt.h">
+				RelativePath="..\Utils\rot13codecvt.h">
 			</File>
 			<File
 				RelativePath="..\Utils\socket_stream.h">
 			</File>
 			<File
-				RelativePath="..\Utils\utf16utf8_codecvt.cpp">
+				RelativePath="..\Utils\utf16utf8codecvt.cpp">
 			</File>
 			<File
-				RelativePath="..\Utils\utf16utf8_codecvt.h">
+				RelativePath="..\Utils\utf16utf8codecvt.h">
 			</File>
+			<File
+				RelativePath="..\Utils\utf8iso88591codecvt.cpp">
+			</File>
+			<File
+				RelativePath="..\Utils\utf8iso88591codecvt.h">
+			</File>
+			<File
+				RelativePath="..\Utils\utf8utf16codecvt.cpp">
+			</File>
+			<File
+				RelativePath="..\Utils\utf8utf16codecvt.h">
+			</File>
+			<Filter
+				Name="impl"
+				Filter="">
+				<File
+					RelativePath="..\Utils\impl\iso88591_utf8.cpp">
+				</File>
+				<File
+					RelativePath="..\Utils\impl\iso88591_utf8.h">
+				</File>
+				<File
+					RelativePath="..\Utils\impl\utf16_utf8.cpp">
+				</File>
+				<File
+					RelativePath="..\Utils\impl\utf16_utf8.h">
+				</File>
+			</Filter>
 		</Filter>
 		<File
 			RelativePath=".\ParserConfig.S">
--- a/SAX/helpers/StringAdaptor.h
+++ b/SAX/helpers/StringAdaptor.h
@ -6,7 +6,7 @@

 #include <string>
 #include <Utils/convertstream.h>
-#include <Utils/utf16utf8_codecvt.h>
+#include <Utils/utf8utf16codecvt.h>

 namespace SAX
 {
@ -74,9 +74,9 @@ public:

  default_string_adaptor() :
 #if !(defined _MSC_VER) || !(_MSC_VER < 1300)
-    loc_(std::locale(), new utf16utf8_codecvt()),
+    loc_(std::locale(), new utf8utf16codecvt()),
 #else
-    loc_(std::_Addfac(std::locale(), new utf16utf8_codecvt)),
+    loc_(std::_Addfac(std::locale(), new utf8utf16codecvt)),
 #endif
    n_(),
    w_()
@ -138,9 +138,9 @@ public:

  default_string_adaptor() :
 #if !(defined _MSC_VER) || !(_MSC_VER < 1300)
-    loc_(std::locale(), new utf16utf8_codecvt()),
+    loc_(std::locale(), new utf8utf16codecvt()),
 #else
-    loc_(std::_Addfac(std::locale(), new utf16utf8_codecvt)),
+    loc_(std::_Addfac(std::locale(), new utf8utf16codecvt)),
 #endif
    n_(), 
    w_()
--- a/Utils/iso88591utf8codecvt.cpp
+++ b/Utils/iso88591utf8codecvt.cpp
@ -48,7 +48,7 @@ int iso88591utf8codecvt::do_length(const std::mbstate_t&,
  while((from_next < end) && (count < max))
  {
    unsigned char fn = static_cast<unsigned char>(*from_next);
-    if(fn && 0x80)
+    if(fn & 0x80)
      ++count;
    ++count;
    ++from_next;
--- a/Utils/iso8859_1utf8_codecvt.cpp
+++ b/Utils/iso8859_1utf8_codecvt.cpp
@ -1,171 +0,0 @@
-//---------------------------------------------------------------------------
-// $Id$
-//---------------------------------------------------------------------------
-#include "iso8859_1utf8_codecvt.h"
-//---------------------------------------------------------------------------
-// This facet converts from ISO8859:1 (Latin 1) chars to UTF-8 encoded chars.
-//
-// Some of this code is derived from work done by Ken Thompson,
-// provided to the X/Open Group.
-
-struct Tab
-{
-  char char_mask;
-  char char_value;
-  int shift;
-  wchar_t wide_mask;
-};
-
-static const Tab tab[] =
-{
-  { char(0x80),  char(0x00),   0*6,    0x7F,   },   // 1 byte sequence
-  { char(0xE0),  char(0xC0),   1*6,    0x7FF,  },   // 2 byte sequence
-  { char(0xF0),  char(0xE0),   2*6,    0xFFFF, },   // 3 byte sequence
-  { 0,           0,            0,      0,      }    // end of table
-};
-
-iso8859_1utf8_codecvt::~iso8859_1utf8_codecvt()
-{
-} // ~iso8859_1utf8_codecvt
-
-std::codecvt_base::result iso8859_1utf8_codecvt::do_in(std::mbstate_t& /* state */,
-                        const char* from,
-                        const char* from_end,
-                        const char*& from_next,
-                        char* to,
-                        char* to_limit,
-                        char*& to_next) const
-{
-  from_next = from;
-  to_next = to;
-
-  while(from_next < from_end)
-  {
-    for(const Tab *t = tab; t->char_mask; t++)
-    {
-      unsigned char fn = static_cast<unsigned char>(*from_next);
-      if(fn > t->wide_mask )
-        continue;
-
-      // is there enough room in outbuffer?
-      if(to_next + (t - tab) + 1 >= to_limit)
-        return std::codecvt_base::partial;
-
-      int c = t->shift;
-      *to_next++ = static_cast<char>(t->char_value | (fn >> c));
-      while(c > 0)
-      {
-      	c -= 6;
-       	*to_next++ = static_cast<char>(0x80 | ((fn >> c) & 0x3F));
-      } // while(c > 0)
-      break;
-    } // for(Tab *t = tab;  t->char_mask; t++)
-    ++from_next;
-  } // while(from_next < from_end)
-
-  return std::codecvt_base::ok;
-} // do_out
-
-std::codecvt_base::result iso8859_1utf8_codecvt::do_out(std::mbstate_t& /* state */,
-                       const char* from,
-                       const char* from_end,
-                       const char*& from_next,
-                       char* to,
-                       char* to_limit,
-                       char*& to_next) const
-{
-  from_next = from;
-  to_next = to;
-
-	while((from_next < from_end) && (to_next < to_limit))
-	{
-    char start = *from_next;
-    wchar_t next = static_cast<unsigned char>(*from_next);
-    for(const Tab *t = tab; t->char_mask; t++)
-    {
-      if((start & t->char_mask) == t->char_value)
-      {
-        next &= t->wide_mask;
-        break;
-      }
-      from_next++;
-      next = (next << 6) | ((*from_next ^ 0x80) & 0xff);
-    } // for(Tab *t = tab;  t->char_mask; t++)
-
-    if(next <= 0xFF)
-      *to_next = static_cast<char>(next);
-    else
-      *to_next = '?';  // error state!
-
-    ++from_next;
-    ++to_next;
-  } // while
-
-  return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial;
-} // do_in
-
-std::codecvt_base::result iso8859_1utf8_codecvt::do_unshift(std::mbstate_t& /* state */,
-                            char* to,
-                            char* /* to_limit */,
-                            char*& to_next) const
-{
-  to_next = to;
-  return noconv;
-} // do_unshift
-
-int iso8859_1utf8_codecvt::do_encoding() const throw()
-{
-  return 0;
-} // do_encoding
-
-bool iso8859_1utf8_codecvt::do_always_noconv() const throw()
-{
-  return false;
-} // do_always_noconv
-
-int iso8859_1utf8_codecvt::do_length(const std::mbstate_t&,
-                        const char* from,
-                        const char* end,
-                        size_t max) const
-{
-  size_t count(0);
-  const char* from_next = from;
-
-  while((from_next < end) && (count < max))
-  {
-    if(!(*from_next & 0x80))
-    {
-	    ++count;
-      ++from_next;
-    }
-    else if((*from_next&0xc0) == 0xc0)
-    {
-      if(from_next+2 < end)
-      {
-  	    ++count;
-        from_next += 2;
-      }
-      else
-        break;
-    }
-    else if((*from_next&0xe0) == 0xe0)
-    {
-      if(from_next+3 < end)
-      {
-  	    ++count;
-        from_next += 3;
-      }
-      else
-        break;
-    }
-  } // while
-
-  return (from_next-from);
-} // do_length
-
-int iso8859_1utf8_codecvt::do_max_length() const throw()
-{
-  return 2;
-} // do_max_length
-
-// end of file
--- a/Utils/iso8859_1utf8_codecvt.h
+++ b/Utils/iso8859_1utf8_codecvt.h
@ -1,49 +0,0 @@
-#ifndef ISO8859_1utf8_codecvtH
-#define ISO8859_1utf8_codecvtH
-#include <locale>
-
-#if(_MSC_VER < 1300)
-namespace std {
-  typedef ::mbstate_t mbstate_t;
-}
-#endif
-
-class iso8859_1utf8_codecvt : public std::codecvt<char, char, std::mbstate_t>
-{
-protected:
-  virtual ~iso8859_1utf8_codecvt();
-
-  virtual result do_out(std::mbstate_t&,
-                        const char* from,
-                        const char* from_end,
-                        const char*& from_next,
-                        char* to,
-                        char* to_limit,
-                        char*& to_next) const;
-
-  virtual result do_in(std::mbstate_t&,
-                       const char* from,
-                       const char* from_end,
-                       const char*& from_next,
-                       char* to,
-                       char* to_limit,
-                       char*& to_next) const;
-
-  virtual result do_unshift(std::mbstate_t&,
-                            char*,
-                            char*,
-                            char*&) const;
-
-  virtual int do_encoding() const throw();
-
-  virtual bool do_always_noconv() const throw();
-
-  virtual int do_length(const std::mbstate_t&,
-                        const char* from,
-                        const char* end,
-                        size_t max) const;
-
-  virtual int do_max_length() const throw();
-}; // class iso8859_1utf8_codecvt
-
-#endif
--- a/Utils/utf16utf8_codecvt.cpp
+++ b/Utils/utf16utf8_codecvt.cpp
@ -1,166 +0,0 @@
-//---------------------------------------------------------------------------
-// $Id$
-//---------------------------------------------------------------------------
-#include "utf16utf8_codecvt.h"
-//---------------------------------------------------------------------------
-// This facet converts from wide chars to char using the
-// FSS-UTF (UCS2) encoding.
-//
-// Some of this code is derived from work done by Ken Thompson,
-// provided to the X/Open Group.
-
-struct Tab
-{
-  char char_mask;
-  char char_value;
-  int shift;
-  wchar_t wide_mask;
-};
-
-static const Tab tab[] =
-{
-  { char(0x80),  char(0x00),   0*6,    0x7F,   },   // 1 byte sequence
-  { char(0xE0),  char(0xC0),   1*6,    0x7FF,  },   // 2 byte sequence
-  { char(0xF0),  char(0xE0),   2*6,    0xFFFF, },   // 3 byte sequence
-  { 0,           0,            0,      0,      }    // end of table
-};
-
-utf16utf8_codecvt::~utf16utf8_codecvt()
-{
-} // ~utf16utf8_codecvt
-
-std::codecvt_base::result utf16utf8_codecvt::do_out(std::mbstate_t& /* state */,
-                        const wchar_t* from,
-                        const wchar_t* from_end,
-                        const wchar_t*& from_next,
-                        char* to,
-                        char* to_limit,
-                        char*& to_next) const
-{
-  from_next = from;
-  to_next = to;
-
-  while(from_next < from_end)
-  {
-    for(const Tab *t = tab; t->char_mask; t++)
-    {
-      if(*from_next > t->wide_mask )
-        continue;
-
-      // is there enough room in outbuffer?
-      if(to_next + (t - tab) + 1 >= to_limit)
-        return std::codecvt_base::partial;
-
-      int c = t->shift;
-      *to_next++ = static_cast<char>(t->char_value | (*from_next >> c));
-      while(c > 0)
-      {
-      	c -= 6;
-       	*to_next++ = static_cast<char>(0x80 | ((*from_next >> c) & 0x3F));
-      } // while(c > 0)
-      break;
-    } // for(Tab *t = tab;  t->char_mask; t++)
-    ++from_next;
-  } // while(from_next < from_end)
-
-  return std::codecvt_base::ok;
-} // do_out
-
-std::codecvt_base::result utf16utf8_codecvt::do_in(std::mbstate_t& /* state */,
-                       const char* from,
-                       const char* from_end,
-                       const char*& from_next,
-                       wchar_t* to,
-                       wchar_t* to_limit,
-                       wchar_t*& to_next) const
-{
-  from_next = from;
-  to_next = to;
-
-	while((from_next < from_end) && (to_next < to_limit))
-	{
-    char start = *from_next;
-    *to_next = static_cast<unsigned char>(*from_next);
-    for(const Tab *t = tab; t->char_mask; t++)
-    {
-      if((start & t->char_mask) == t->char_value)
-      {
-        *to_next &= t->wide_mask;
-        break;
-      }
-      from_next++;
-      *to_next = (*to_next << 6) | ((*from_next ^ 0x80) & 0xff);
-    } // for(Tab *t = tab;  t->char_mask; t++)
-
-    ++from_next;
-    ++to_next;
-  } // while
-
-  return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial;
-} // do_in
-
-std::codecvt_base::result utf16utf8_codecvt::do_unshift(std::mbstate_t& /* state */,
-                            char* to,
-                            char* /* to_limit */,
-                            char*& to_next) const
-{
-  to_next = to;
-  return noconv;
-} // do_unshift
-
-int utf16utf8_codecvt::do_encoding() const throw()
-{
-  return 0;
-} // do_encoding
-
-bool utf16utf8_codecvt::do_always_noconv() const throw()
-{
-  return false;
-} // do_always_noconv
-
-int utf16utf8_codecvt::do_length(const std::mbstate_t&,
-                        const char* from,
-                        const char* end,
-                        size_t max) const
-{
-  size_t count(0);
-  const char* from_next = from;
-
-  while((from_next < end) && (count < max))
-  {
-    if(!(*from_next & 0x80))
-    {
-	    ++count;
-      ++from_next;
-    }
-    else if((*from_next&0xc0) == 0xc0)
-    {
-      if(from_next+2 < end)
-      {
-  	    ++count;
-        from_next += 2;
-      }
-      else
-        break;
-    }
-    else if((*from_next&0xe0) == 0xe0)
-    {
-      if(from_next+3 < end)
-      {
-  	    ++count;
-        from_next += 3;
-      }
-      else
-        break;
-    }
-  } // while
-
-  return (from_next-from);
-} // do_length
-
-int utf16utf8_codecvt::do_max_length() const throw()
-{
-  return 3;
-} // do_max_length
-
-// end of file
--- a/Utils/utf16utf8codecvt.cpp
+++ b/Utils/utf16utf8codecvt.cpp
@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------
+// $Id$
+//---------------------------------------------------------------------------
+#include "utf16utf8codecvt.h"
+#include "impl/utf16_utf8.h"
+//---------------------------------------------------------------------------
+// This facet converts from wide chars to char using the
+// FSS-UTF (UCS2) encoding.
+//
+
+std::codecvt_base::result utf16utf8codecvt::do_out(std::mbstate_t& /* state */,
+                       const char* from,
+                       const char* from_end,
+                       const char*& from_next,
+                       wchar_t* to,
+                       wchar_t* to_limit,
+                       wchar_t*& to_next) const
+{
+  return ArabicaInternal::utf8_2_utf16(from, from_end, from_next, to, to_limit, to_next);
+} // do_out
+
+std::codecvt_base::result utf16utf8codecvt::do_in(std::mbstate_t& /* state */,
+                        const wchar_t* from,
+                        const wchar_t* from_end,
+                        const wchar_t*& from_next,
+                        char* to,
+                        char* to_limit,
+                        char*& to_next) const
+{
+  return ArabicaInternal::utf16_2_utf8(from, from_end, from_next, to, to_limit, to_next);
+} // do_in
+
+std::codecvt_base::result utf16utf8codecvt::do_unshift(std::mbstate_t& /* state */,
+                            wchar_t* to,
+                            wchar_t* /* to_limit */,
+                            wchar_t*& to_next) const
+{
+  to_next = to;
+  return noconv;
+} // do_unshift
+
+int utf16utf8codecvt::do_length(const std::mbstate_t&,
+                                const wchar_t* from,
+                                const wchar_t* end,
+                                size_t max) const
+{
+  size_t count(0);
+  const wchar_t* from_next = from;
+
+  while((from_next < end) && (count < max))
+  {
+    if(*from_next > 0x7FF)
+      ++count;
+    if(*from_next > 0x7F)
+      ++count;
+    ++count;
+    ++from_next;
+  } // while
+
+  return (from_next-from);
+} // do_length
+
+// end of file
--- a/Utils/utf16utf8codecvt.h
+++ b/Utils/utf16utf8codecvt.h
@ -0,0 +1,61 @@
+#ifndef ARABICA_UTF16UTF8_CODECVT_H
+#define ARABICA_UTF16UTF8_CODECVT_H
+//---------------------------------------------------------------------------
+// class utf16utf8codecvt
+// This facet converts from Unicode (UCS-2) wchar_ts to
+// char using the UTF-8 encoding.
+//
+// For the full guff on codecvts see section 22.2.1.5 of
+// The C++ Standard (ISO/IEC 14882 to be pedantic).
+//
+// I got my information about UTF-8 from RFC 2044.
+//
+// $Id$
+//---------------------------------------------------------------------------
+#include <locale>
+
+#if(_MSC_VER < 1300)
+namespace std {
+  typedef ::mbstate_t mbstate_t;
+}
+#endif
+
+class utf16utf8codecvt : public std::codecvt<char, wchar_t, std::mbstate_t>
+{
+protected:
+  virtual ~utf16utf8codecvt() { }
+
+  virtual result do_out(std::mbstate_t&,
+                       const char* from,
+                       const char* from_end,
+                       const char*& from_next,
+                       wchar_t* to,
+                       wchar_t* to_limit,
+                       wchar_t*& to_next) const;
+
+  virtual result do_in(std::mbstate_t&,
+                        const wchar_t* from,
+                        const wchar_t* from_end,
+                        const wchar_t*& from_next,
+                        char* to,
+                        char* to_limit,
+                        char*& to_next) const;
+
+  virtual result do_unshift(std::mbstate_t&,
+                            wchar_t*,
+                            wchar_t*,
+                            wchar_t*&) const;
+
+  virtual int do_encoding() const throw() { return 0; }
+
+  virtual bool do_always_noconv() const throw() { return false; }
+
+  virtual int do_length(const std::mbstate_t&,
+                        const wchar_t* from,
+                        const wchar_t* end,
+                        size_t max) const;
+
+  virtual int do_max_length() const throw() { return 1; }
+}; // class utf16utf8codecvt
+
+#endif
--- a/Utils/utf8iso88591codecvt.h
+++ b/Utils/utf8iso88591codecvt.h
@ -43,7 +43,7 @@ protected:
                        const char* end,
                        size_t max) const;

-  virtual int do_max_length() const throw() { return 2; }
+  virtual int do_max_length() const throw() { return 3; }
 }; // class utf8iso88591codecvt

 #endif
--- a/Utils/utf8utf16codecvt.cpp
+++ b/Utils/utf8utf16codecvt.cpp
@ -0,0 +1,79 @@
+//---------------------------------------------------------------------------
+// $Id$
+//---------------------------------------------------------------------------
+#include "utf8utf16codecvt.h"
+#include "impl/utf16_utf8.h"
+//---------------------------------------------------------------------------
+
+std::codecvt_base::result utf8utf16codecvt::do_out(std::mbstate_t& /* state */,
+                        const wchar_t* from,
+                        const wchar_t* from_end,
+                        const wchar_t*& from_next,
+                        char* to,
+                        char* to_limit,
+                        char*& to_next) const
+{
+  return ArabicaInternal::utf16_2_utf8(from, from_end, from_next, to, to_limit, to_next);
+} // do_out
+
+std::codecvt_base::result utf8utf16codecvt::do_in(std::mbstate_t& /* state */,
+                       const char* from,
+                       const char* from_end,
+                       const char*& from_next,
+                       wchar_t* to,
+                       wchar_t* to_limit,
+                       wchar_t*& to_next) const
+{
+  return ArabicaInternal::utf8_2_utf16(from, from_end, from_next, to, to_limit, to_next);
+} // do_in
+
+std::codecvt_base::result utf8utf16codecvt::do_unshift(std::mbstate_t& /* state */,
+                            char* to,
+                            char* /* to_limit */,
+                            char*& to_next) const
+{
+  to_next = to;
+  return noconv;
+} // do_unshift
+
+int utf8utf16codecvt::do_length(const std::mbstate_t&,
+                                const char* from,
+                                const char* end,
+                                size_t max) const
+{
+  size_t count(0);
+  const char* from_next = from;
+
+  while((from_next < end) && (count < max))
+  {
+    if(!(*from_next & 0x80))
+    {
+	    ++count;
+      ++from_next;
+    }
+    else if((*from_next&0xc0) == 0xc0)
+    {
+      if(from_next+2 < end)
+      {
+  	    ++count;
+        from_next += 2;
+      }
+      else
+        break;
+    }
+    else if((*from_next&0xe0) == 0xe0)
+    {
+      if(from_next+3 < end)
+      {
+  	    ++count;
+        from_next += 3;
+      }
+      else
+        break;
+    }
+  } // while
+
+  return (from_next-from);
+} // do_length
+
+// end of file
--- a/Utils/utf16utf8_codecvt.h
+++ b/Utils/utf16utf8_codecvt.h
@ -1,7 +1,7 @@
-#ifndef utf16utf8_codecvtH
-#define utf16utf8_codecvtH
+#ifndef ARABICA_UTF8UTF16_CODECVT_H
+#define ARABICA_UTF8UTF16_CODECVT_H
 //---------------------------------------------------------------------------
-// class utf16tf8_codecvt
+// class utf8utf16codecvt
 // This facet converts from Unicode (UCS-2) wchar_ts to
 // char using the UTF-8 encoding.
 //
@ -14,16 +14,16 @@
 //---------------------------------------------------------------------------
 #include <locale>

-#ifdef _MSC_VER
+#if(_MSC_VER < 1300)
 namespace std {
  typedef ::mbstate_t mbstate_t;
 }
 #endif

-class utf16utf8_codecvt : public std::codecvt<wchar_t, char, std::mbstate_t>
+class utf8utf16codecvt : public std::codecvt<wchar_t, char, std::mbstate_t>
 {
 protected:
-  virtual ~utf16utf8_codecvt();
+  virtual ~utf8utf16codecvt() { }

  virtual result do_out(std::mbstate_t&,
                        const wchar_t* from,
@ -46,16 +46,16 @@ protected:
                            char*,
                            char*&) const;

-  virtual int do_encoding() const throw();
+  virtual int do_encoding() const throw() { return 0; }

-  virtual bool do_always_noconv() const throw();
+  virtual bool do_always_noconv() const throw() { return false; }

  virtual int do_length(const std::mbstate_t&,
                        const char* from,
                        const char* end,
                        size_t max) const;

-  virtual int do_max_length() const throw();
-}; // class utf16utf8_codecvt
+  virtual int do_max_length() const throw() { return 3; }
+}; // class utf8utf16codecvt

 #endif
--- a/examples/SAX/SimpleHandler.vcproj
+++ b/examples/SAX/SimpleHandler.vcproj
@ -2,7 +2,7 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="7.00"
-	Name="SimpleHandler"
+	Name="example_SAXSimpleHandler"
 	SccProjectName=""
 	SccLocalPath="">
 	<Platforms>
@ -71,7 +71,7 @@
 		</Configuration>
 		<Configuration
 			Name="Release|Win32"
-			OutputDirectory=".\..\bin"
+			OutputDirectory=".\..\..\bin"
 			IntermediateDirectory=".\Release"
 			ConfigurationType="1"
 			UseOfMFC="0"
--- a/examples/SAX/Writer.vcproj
+++ b/examples/SAX/Writer.vcproj
@ -2,7 +2,7 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="7.00"
-	Name="Writer"
+	Name="example_SAXWriter"
 	SccProjectName=""
 	SccLocalPath="">
 	<Platforms>
--- a/examples/SAX/pyx.vcproj
+++ b/examples/SAX/pyx.vcproj
@ -2,7 +2,7 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="7.00"
-	Name="pyx"
+	Name="example_SAXPyx"
 	SccProjectName=""
 	SccLocalPath="">
 	<Platforms>
--- a/examples/SAX2DOM/DOMWriter.vcproj
+++ b/examples/SAX2DOM/DOMWriter.vcproj
@ -2,7 +2,7 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="7.00"
-	Name="DOMWriter"
+	Name="example_DOMWriter"
 	ProjectGUID="{C1CF7801-1681-4F15-8D71-BBC814805AF2}"
 	Keyword="Win32Proj">
 	<Platforms>
--- a/examples/SAX2DOM/SAX2DOM_test.vcproj
+++ b/examples/SAX2DOM/SAX2DOM_test.vcproj
@ -2,7 +2,7 @@
 <VisualStudioProject
 	ProjectType="Visual C++"
 	Version="7.00"
-	Name="SAX2DOM_test"
+	Name="example_SAX2DOMTests"
 	SccProjectName=""
 	SccLocalPath="">
 	<Platforms>
@ -70,7 +70,7 @@
 		</Configuration>
 		<Configuration
 			Name="Release|Win32"
-			OutputDirectory=".\Release"
+			OutputDirectory=".\..\..\bin"
 			IntermediateDirectory=".\Release"
 			ConfigurationType="1"
 			UseOfMFC="0"