diff --git a/Arabica.sln b/Arabica.sln
index ad8366de..17d87095 100644
--- a/Arabica.sln
+++ b/Arabica.sln
@@ -1,17 +1,19 @@
Microsoft Visual Studio Solution File, Format Version 7.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DOM", "DOM\DOM.vcproj", "{AFD0FD18-3D55-4CEC-A242-EA290EBBF171}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SAX2DOM_test", "EXAMPLES\SAX2DOM\SAX2DOM_test.vcproj", "{E5157BA4-96A1-4D7F-B895-8C9A32F26BB5}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAX2DOMTests", "EXAMPLES\SAX2DOM\SAX2DOM_test.vcproj", "{E5157BA4-96A1-4D7F-B895-8C9A32F26BB5}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SAXlib", "SAX\SAX.vcproj", "{884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ArabicaLib", "SAX\SAX.vcproj", "{884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SimpleHandler", "examples\SAX\SimpleHandler.vcproj", "{16475ED0-2906-429B-9E73-F2BF2929F6E9}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAXSimpleHandler", "examples\SAX\SimpleHandler.vcproj", "{16475ED0-2906-429B-9E73-F2BF2929F6E9}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Writer", "EXAMPLES\SAX\Writer.vcproj", "{3C6CBC24-07D4-4DE8-A1DF-592C3BC77C56}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAXWriter", "EXAMPLES\SAX\Writer.vcproj", "{3C6CBC24-07D4-4DE8-A1DF-592C3BC77C56}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pyx", "examples\SAX\pyx.vcproj", "{AE33D6D1-0F57-4E97-90AE-696854C5AE71}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_SAXPyx", "examples\SAX\pyx.vcproj", "{AE33D6D1-0F57-4E97-90AE-696854C5AE71}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DOMWriter", "examples\SAX2DOM\DOMWriter.vcproj", "{C1CF7801-1681-4F15-8D71-BBC814805AF2}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_DOMWriter", "examples\SAX2DOM\DOMWriter.vcproj", "{C1CF7801-1681-4F15-8D71-BBC814805AF2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example_UtilsTranscode", "examples\Utils\transcode.vcproj", "{436B423B-BF20-4B2E-A187-604AF391FBE2}"
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
@@ -23,6 +25,8 @@ Global
{16475ED0-2906-429B-9E73-F2BF2929F6E9}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
{3C6CBC24-07D4-4DE8-A1DF-592C3BC77C56}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
{AE33D6D1-0F57-4E97-90AE-696854C5AE71}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
+ {C1CF7801-1681-4F15-8D71-BBC814805AF2}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
+ {436B423B-BF20-4B2E-A187-604AF391FBE2}.0 = {884490E3-E4B3-43BE-A88B-7FA9EA4E16AB}
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{AFD0FD18-3D55-4CEC-A242-EA290EBBF171}.Debug.ActiveCfg = Debug|Win32
@@ -53,6 +57,10 @@ Global
{C1CF7801-1681-4F15-8D71-BBC814805AF2}.Debug.Build.0 = Debug|Win32
{C1CF7801-1681-4F15-8D71-BBC814805AF2}.Release.ActiveCfg = Release|Win32
{C1CF7801-1681-4F15-8D71-BBC814805AF2}.Release.Build.0 = Release|Win32
+ {436B423B-BF20-4B2E-A187-604AF391FBE2}.Debug.ActiveCfg = Debug|Win32
+ {436B423B-BF20-4B2E-A187-604AF391FBE2}.Debug.Build.0 = Debug|Win32
+ {436B423B-BF20-4B2E-A187-604AF391FBE2}.Release.ActiveCfg = Release|Win32
+ {436B423B-BF20-4B2E-A187-604AF391FBE2}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
diff --git a/SAX/SAX.vcproj b/SAX/SAX.vcproj
index cb2a126d..bf63422e 100644
--- a/SAX/SAX.vcproj
+++ b/SAX/SAX.vcproj
@@ -330,10 +330,10 @@
RelativePath="..\XML\XMLCharacterClasses.h">
+ RelativePath="..\Utils\base64codecvt.cpp">
+ RelativePath="..\Utils\base64codecvt.h">
@@ -342,26 +342,54 @@
RelativePath="..\Utils\convertstream.h">
+ RelativePath="..\Utils\iso88591utf8codecvt.cpp">
+ RelativePath="..\Utils\iso88591utf8codecvt.h">
+ RelativePath="..\Utils\rot13codecvt.cpp">
+ RelativePath="..\Utils\rot13codecvt.h">
+ RelativePath="..\Utils\utf16utf8codecvt.cpp">
+ RelativePath="..\Utils\utf16utf8codecvt.h">
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/SAX/helpers/StringAdaptor.h b/SAX/helpers/StringAdaptor.h
index 5aa54819..d996f5d1 100644
--- a/SAX/helpers/StringAdaptor.h
+++ b/SAX/helpers/StringAdaptor.h
@@ -6,7 +6,7 @@
#include
#include
-#include
+#include
namespace SAX
{
@@ -74,9 +74,9 @@ public:
default_string_adaptor() :
#if !(defined _MSC_VER) || !(_MSC_VER < 1300)
- loc_(std::locale(), new utf16utf8_codecvt()),
+ loc_(std::locale(), new utf8utf16codecvt()),
#else
- loc_(std::_Addfac(std::locale(), new utf16utf8_codecvt)),
+ loc_(std::_Addfac(std::locale(), new utf8utf16codecvt)),
#endif
n_(),
w_()
@@ -138,9 +138,9 @@ public:
default_string_adaptor() :
#if !(defined _MSC_VER) || !(_MSC_VER < 1300)
- loc_(std::locale(), new utf16utf8_codecvt()),
+ loc_(std::locale(), new utf8utf16codecvt()),
#else
- loc_(std::_Addfac(std::locale(), new utf16utf8_codecvt)),
+ loc_(std::_Addfac(std::locale(), new utf8utf16codecvt)),
#endif
n_(),
w_()
diff --git a/Utils/iso88591utf8codecvt.cpp b/Utils/iso88591utf8codecvt.cpp
index c3e6f396..bcc9ff18 100644
--- a/Utils/iso88591utf8codecvt.cpp
+++ b/Utils/iso88591utf8codecvt.cpp
@@ -48,7 +48,7 @@ int iso88591utf8codecvt::do_length(const std::mbstate_t&,
while((from_next < end) && (count < max))
{
unsigned char fn = static_cast(*from_next);
- if(fn && 0x80)
+ if(fn & 0x80)
++count;
++count;
++from_next;
diff --git a/Utils/iso8859_1utf8_codecvt.cpp b/Utils/iso8859_1utf8_codecvt.cpp
deleted file mode 100644
index 391ada2b..00000000
--- a/Utils/iso8859_1utf8_codecvt.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-//---------------------------------------------------------------------------
-// $Id$
-//---------------------------------------------------------------------------
-#include "iso8859_1utf8_codecvt.h"
-//---------------------------------------------------------------------------
-// This facet converts from ISO8859:1 (Latin 1) chars to UTF-8 encoded chars.
-//
-// Some of this code is derived from work done by Ken Thompson,
-// provided to the X/Open Group.
-
-struct Tab
-{
- char char_mask;
- char char_value;
- int shift;
- wchar_t wide_mask;
-};
-
-static const Tab tab[] =
-{
- { char(0x80), char(0x00), 0*6, 0x7F, }, // 1 byte sequence
- { char(0xE0), char(0xC0), 1*6, 0x7FF, }, // 2 byte sequence
- { char(0xF0), char(0xE0), 2*6, 0xFFFF, }, // 3 byte sequence
- { 0, 0, 0, 0, } // end of table
-};
-
-iso8859_1utf8_codecvt::~iso8859_1utf8_codecvt()
-{
-} // ~iso8859_1utf8_codecvt
-
-std::codecvt_base::result iso8859_1utf8_codecvt::do_in(std::mbstate_t& /* state */,
- const char* from,
- const char* from_end,
- const char*& from_next,
- char* to,
- char* to_limit,
- char*& to_next) const
-{
- from_next = from;
- to_next = to;
-
- while(from_next < from_end)
- {
- for(const Tab *t = tab; t->char_mask; t++)
- {
- unsigned char fn = static_cast(*from_next);
- if(fn > t->wide_mask )
- continue;
-
- // is there enough room in outbuffer?
- if(to_next + (t - tab) + 1 >= to_limit)
- return std::codecvt_base::partial;
-
- int c = t->shift;
- *to_next++ = static_cast(t->char_value | (fn >> c));
- while(c > 0)
- {
- c -= 6;
- *to_next++ = static_cast(0x80 | ((fn >> c) & 0x3F));
- } // while(c > 0)
- break;
- } // for(Tab *t = tab; t->char_mask; t++)
- ++from_next;
- } // while(from_next < from_end)
-
- return std::codecvt_base::ok;
-} // do_out
-
-std::codecvt_base::result iso8859_1utf8_codecvt::do_out(std::mbstate_t& /* state */,
- const char* from,
- const char* from_end,
- const char*& from_next,
- char* to,
- char* to_limit,
- char*& to_next) const
-{
- from_next = from;
- to_next = to;
-
- while((from_next < from_end) && (to_next < to_limit))
- {
- char start = *from_next;
- wchar_t next = static_cast(*from_next);
- for(const Tab *t = tab; t->char_mask; t++)
- {
- if((start & t->char_mask) == t->char_value)
- {
- next &= t->wide_mask;
- break;
- }
- from_next++;
- next = (next << 6) | ((*from_next ^ 0x80) & 0xff);
- } // for(Tab *t = tab; t->char_mask; t++)
-
- if(next <= 0xFF)
- *to_next = static_cast(next);
- else
- *to_next = '?'; // error state!
-
- ++from_next;
- ++to_next;
- } // while
-
- return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial;
-} // do_in
-
-std::codecvt_base::result iso8859_1utf8_codecvt::do_unshift(std::mbstate_t& /* state */,
- char* to,
- char* /* to_limit */,
- char*& to_next) const
-{
- to_next = to;
- return noconv;
-} // do_unshift
-
-int iso8859_1utf8_codecvt::do_encoding() const throw()
-{
- return 0;
-} // do_encoding
-
-bool iso8859_1utf8_codecvt::do_always_noconv() const throw()
-{
- return false;
-} // do_always_noconv
-
-int iso8859_1utf8_codecvt::do_length(const std::mbstate_t&,
- const char* from,
- const char* end,
- size_t max) const
-{
- size_t count(0);
- const char* from_next = from;
-
- while((from_next < end) && (count < max))
- {
- if(!(*from_next & 0x80))
- {
- ++count;
- ++from_next;
- }
- else if((*from_next&0xc0) == 0xc0)
- {
- if(from_next+2 < end)
- {
- ++count;
- from_next += 2;
- }
- else
- break;
- }
- else if((*from_next&0xe0) == 0xe0)
- {
- if(from_next+3 < end)
- {
- ++count;
- from_next += 3;
- }
- else
- break;
- }
- } // while
-
- return (from_next-from);
-} // do_length
-
-int iso8859_1utf8_codecvt::do_max_length() const throw()
-{
- return 2;
-} // do_max_length
-
-// end of file
diff --git a/Utils/iso8859_1utf8_codecvt.h b/Utils/iso8859_1utf8_codecvt.h
deleted file mode 100644
index 78368a04..00000000
--- a/Utils/iso8859_1utf8_codecvt.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef ISO8859_1utf8_codecvtH
-#define ISO8859_1utf8_codecvtH
-#include
-
-#if(_MSC_VER < 1300)
-namespace std {
- typedef ::mbstate_t mbstate_t;
-}
-#endif
-
-class iso8859_1utf8_codecvt : public std::codecvt
-{
-protected:
- virtual ~iso8859_1utf8_codecvt();
-
- virtual result do_out(std::mbstate_t&,
- const char* from,
- const char* from_end,
- const char*& from_next,
- char* to,
- char* to_limit,
- char*& to_next) const;
-
- virtual result do_in(std::mbstate_t&,
- const char* from,
- const char* from_end,
- const char*& from_next,
- char* to,
- char* to_limit,
- char*& to_next) const;
-
- virtual result do_unshift(std::mbstate_t&,
- char*,
- char*,
- char*&) const;
-
- virtual int do_encoding() const throw();
-
- virtual bool do_always_noconv() const throw();
-
- virtual int do_length(const std::mbstate_t&,
- const char* from,
- const char* end,
- size_t max) const;
-
- virtual int do_max_length() const throw();
-}; // class iso8859_1utf8_codecvt
-
-#endif
diff --git a/Utils/utf16utf8_codecvt.cpp b/Utils/utf16utf8_codecvt.cpp
deleted file mode 100644
index 68cb66db..00000000
--- a/Utils/utf16utf8_codecvt.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//---------------------------------------------------------------------------
-// $Id$
-//---------------------------------------------------------------------------
-#include "utf16utf8_codecvt.h"
-//---------------------------------------------------------------------------
-// This facet converts from wide chars to char using the
-// FSS-UTF (UCS2) encoding.
-//
-// Some of this code is derived from work done by Ken Thompson,
-// provided to the X/Open Group.
-
-struct Tab
-{
- char char_mask;
- char char_value;
- int shift;
- wchar_t wide_mask;
-};
-
-static const Tab tab[] =
-{
- { char(0x80), char(0x00), 0*6, 0x7F, }, // 1 byte sequence
- { char(0xE0), char(0xC0), 1*6, 0x7FF, }, // 2 byte sequence
- { char(0xF0), char(0xE0), 2*6, 0xFFFF, }, // 3 byte sequence
- { 0, 0, 0, 0, } // end of table
-};
-
-utf16utf8_codecvt::~utf16utf8_codecvt()
-{
-} // ~utf16utf8_codecvt
-
-std::codecvt_base::result utf16utf8_codecvt::do_out(std::mbstate_t& /* state */,
- const wchar_t* from,
- const wchar_t* from_end,
- const wchar_t*& from_next,
- char* to,
- char* to_limit,
- char*& to_next) const
-{
- from_next = from;
- to_next = to;
-
- while(from_next < from_end)
- {
- for(const Tab *t = tab; t->char_mask; t++)
- {
- if(*from_next > t->wide_mask )
- continue;
-
- // is there enough room in outbuffer?
- if(to_next + (t - tab) + 1 >= to_limit)
- return std::codecvt_base::partial;
-
- int c = t->shift;
- *to_next++ = static_cast(t->char_value | (*from_next >> c));
- while(c > 0)
- {
- c -= 6;
- *to_next++ = static_cast(0x80 | ((*from_next >> c) & 0x3F));
- } // while(c > 0)
- break;
- } // for(Tab *t = tab; t->char_mask; t++)
- ++from_next;
- } // while(from_next < from_end)
-
- return std::codecvt_base::ok;
-} // do_out
-
-std::codecvt_base::result utf16utf8_codecvt::do_in(std::mbstate_t& /* state */,
- const char* from,
- const char* from_end,
- const char*& from_next,
- wchar_t* to,
- wchar_t* to_limit,
- wchar_t*& to_next) const
-{
- from_next = from;
- to_next = to;
-
- while((from_next < from_end) && (to_next < to_limit))
- {
- char start = *from_next;
- *to_next = static_cast(*from_next);
- for(const Tab *t = tab; t->char_mask; t++)
- {
- if((start & t->char_mask) == t->char_value)
- {
- *to_next &= t->wide_mask;
- break;
- }
- from_next++;
- *to_next = (*to_next << 6) | ((*from_next ^ 0x80) & 0xff);
- } // for(Tab *t = tab; t->char_mask; t++)
-
- ++from_next;
- ++to_next;
- } // while
-
- return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial;
-} // do_in
-
-std::codecvt_base::result utf16utf8_codecvt::do_unshift(std::mbstate_t& /* state */,
- char* to,
- char* /* to_limit */,
- char*& to_next) const
-{
- to_next = to;
- return noconv;
-} // do_unshift
-
-int utf16utf8_codecvt::do_encoding() const throw()
-{
- return 0;
-} // do_encoding
-
-bool utf16utf8_codecvt::do_always_noconv() const throw()
-{
- return false;
-} // do_always_noconv
-
-int utf16utf8_codecvt::do_length(const std::mbstate_t&,
- const char* from,
- const char* end,
- size_t max) const
-{
- size_t count(0);
- const char* from_next = from;
-
- while((from_next < end) && (count < max))
- {
- if(!(*from_next & 0x80))
- {
- ++count;
- ++from_next;
- }
- else if((*from_next&0xc0) == 0xc0)
- {
- if(from_next+2 < end)
- {
- ++count;
- from_next += 2;
- }
- else
- break;
- }
- else if((*from_next&0xe0) == 0xe0)
- {
- if(from_next+3 < end)
- {
- ++count;
- from_next += 3;
- }
- else
- break;
- }
- } // while
-
- return (from_next-from);
-} // do_length
-
-int utf16utf8_codecvt::do_max_length() const throw()
-{
- return 3;
-} // do_max_length
-
-// end of file
diff --git a/Utils/utf16utf8codecvt.cpp b/Utils/utf16utf8codecvt.cpp
new file mode 100644
index 00000000..5c083b8e
--- /dev/null
+++ b/Utils/utf16utf8codecvt.cpp
@@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------
+// $Id$
+//---------------------------------------------------------------------------
+#include "utf16utf8codecvt.h"
+#include "impl/utf16_utf8.h"
+//---------------------------------------------------------------------------
+// This facet converts from wide chars to char using the
+// FSS-UTF (UCS2) encoding.
+//
+
+std::codecvt_base::result utf16utf8codecvt::do_out(std::mbstate_t& /* state */,
+ const char* from,
+ const char* from_end,
+ const char*& from_next,
+ wchar_t* to,
+ wchar_t* to_limit,
+ wchar_t*& to_next) const
+{
+ return ArabicaInternal::utf8_2_utf16(from, from_end, from_next, to, to_limit, to_next);
+} // do_out
+
+std::codecvt_base::result utf16utf8codecvt::do_in(std::mbstate_t& /* state */,
+ const wchar_t* from,
+ const wchar_t* from_end,
+ const wchar_t*& from_next,
+ char* to,
+ char* to_limit,
+ char*& to_next) const
+{
+ return ArabicaInternal::utf16_2_utf8(from, from_end, from_next, to, to_limit, to_next);
+} // do_in
+
+std::codecvt_base::result utf16utf8codecvt::do_unshift(std::mbstate_t& /* state */,
+ wchar_t* to,
+ wchar_t* /* to_limit */,
+ wchar_t*& to_next) const
+{
+ to_next = to;
+ return noconv;
+} // do_unshift
+
+int utf16utf8codecvt::do_length(const std::mbstate_t&,
+ const wchar_t* from,
+ const wchar_t* end,
+ size_t max) const
+{
+ size_t count(0);
+ const wchar_t* from_next = from;
+
+ while((from_next < end) && (count < max))
+ {
+ if(*from_next > 0x7FF)
+ ++count;
+ if(*from_next > 0x7F)
+ ++count;
+ ++count;
+ ++from_next;
+ } // while
+
+ return (from_next-from);
+} // do_length
+
+// end of file
diff --git a/Utils/utf16utf8codecvt.h b/Utils/utf16utf8codecvt.h
new file mode 100644
index 00000000..fc5186ac
--- /dev/null
+++ b/Utils/utf16utf8codecvt.h
@@ -0,0 +1,61 @@
+#ifndef ARABICA_UTF16UTF8_CODECVT_H
+#define ARABICA_UTF16UTF8_CODECVT_H
+//---------------------------------------------------------------------------
+// class utf16utf8codecvt
+// This facet converts from Unicode (UCS-2) wchar_ts to
+// char using the UTF-8 encoding.
+//
+// For the full guff on codecvts see section 22.2.1.5 of
+// The C++ Standard (ISO/IEC 14882 to be pedantic).
+//
+// I got my information about UTF-8 from RFC 2044.
+//
+// $Id$
+//---------------------------------------------------------------------------
+#include
+
+#if(_MSC_VER < 1300)
+namespace std {
+ typedef ::mbstate_t mbstate_t;
+}
+#endif
+
+class utf16utf8codecvt : public std::codecvt
+{
+protected:
+ virtual ~utf16utf8codecvt() { }
+
+ virtual result do_out(std::mbstate_t&,
+ const char* from,
+ const char* from_end,
+ const char*& from_next,
+ wchar_t* to,
+ wchar_t* to_limit,
+ wchar_t*& to_next) const;
+
+ virtual result do_in(std::mbstate_t&,
+ const wchar_t* from,
+ const wchar_t* from_end,
+ const wchar_t*& from_next,
+ char* to,
+ char* to_limit,
+ char*& to_next) const;
+
+ virtual result do_unshift(std::mbstate_t&,
+ wchar_t*,
+ wchar_t*,
+ wchar_t*&) const;
+
+ virtual int do_encoding() const throw() { return 0; }
+
+ virtual bool do_always_noconv() const throw() { return false; }
+
+ virtual int do_length(const std::mbstate_t&,
+ const wchar_t* from,
+ const wchar_t* end,
+ size_t max) const;
+
+ virtual int do_max_length() const throw() { return 1; }
+}; // class utf16utf8codecvt
+
+#endif
diff --git a/Utils/utf8iso88591codecvt.h b/Utils/utf8iso88591codecvt.h
index a2582d57..01ba93f8 100644
--- a/Utils/utf8iso88591codecvt.h
+++ b/Utils/utf8iso88591codecvt.h
@@ -43,7 +43,7 @@ protected:
const char* end,
size_t max) const;
- virtual int do_max_length() const throw() { return 2; }
+ virtual int do_max_length() const throw() { return 3; }
}; // class utf8iso88591codecvt
#endif
diff --git a/Utils/utf8utf16codecvt.cpp b/Utils/utf8utf16codecvt.cpp
new file mode 100644
index 00000000..b4ed2b24
--- /dev/null
+++ b/Utils/utf8utf16codecvt.cpp
@@ -0,0 +1,79 @@
+//---------------------------------------------------------------------------
+// $Id$
+//---------------------------------------------------------------------------
+#include "utf8utf16codecvt.h"
+#include "impl/utf16_utf8.h"
+//---------------------------------------------------------------------------
+
+std::codecvt_base::result utf8utf16codecvt::do_out(std::mbstate_t& /* state */,
+ const wchar_t* from,
+ const wchar_t* from_end,
+ const wchar_t*& from_next,
+ char* to,
+ char* to_limit,
+ char*& to_next) const
+{
+ return ArabicaInternal::utf16_2_utf8(from, from_end, from_next, to, to_limit, to_next);
+} // do_out
+
+std::codecvt_base::result utf8utf16codecvt::do_in(std::mbstate_t& /* state */,
+ const char* from,
+ const char* from_end,
+ const char*& from_next,
+ wchar_t* to,
+ wchar_t* to_limit,
+ wchar_t*& to_next) const
+{
+ return ArabicaInternal::utf8_2_utf16(from, from_end, from_next, to, to_limit, to_next);
+} // do_in
+
+std::codecvt_base::result utf8utf16codecvt::do_unshift(std::mbstate_t& /* state */,
+ char* to,
+ char* /* to_limit */,
+ char*& to_next) const
+{
+ to_next = to;
+ return noconv;
+} // do_unshift
+
+int utf8utf16codecvt::do_length(const std::mbstate_t&,
+ const char* from,
+ const char* end,
+ size_t max) const
+{
+ size_t count(0);
+ const char* from_next = from;
+
+ while((from_next < end) && (count < max))
+ {
+ if(!(*from_next & 0x80))
+ {
+ ++count;
+ ++from_next;
+ }
+ else if((*from_next&0xc0) == 0xc0)
+ {
+ if(from_next+2 < end)
+ {
+ ++count;
+ from_next += 2;
+ }
+ else
+ break;
+ }
+ else if((*from_next&0xe0) == 0xe0)
+ {
+ if(from_next+3 < end)
+ {
+ ++count;
+ from_next += 3;
+ }
+ else
+ break;
+ }
+ } // while
+
+ return (from_next-from);
+} // do_length
+
+// end of file
diff --git a/Utils/utf16utf8_codecvt.h b/Utils/utf8utf16codecvt.h
similarity index 78%
rename from Utils/utf16utf8_codecvt.h
rename to Utils/utf8utf16codecvt.h
index c10ebb86..1c8ca723 100644
--- a/Utils/utf16utf8_codecvt.h
+++ b/Utils/utf8utf16codecvt.h
@@ -1,7 +1,7 @@
-#ifndef utf16utf8_codecvtH
-#define utf16utf8_codecvtH
+#ifndef ARABICA_UTF8UTF16_CODECVT_H
+#define ARABICA_UTF8UTF16_CODECVT_H
//---------------------------------------------------------------------------
-// class utf16tf8_codecvt
+// class utf8utf16codecvt
// This facet converts from Unicode (UCS-2) wchar_ts to
// char using the UTF-8 encoding.
//
@@ -14,16 +14,16 @@
//---------------------------------------------------------------------------
#include
-#ifdef _MSC_VER
+#if(_MSC_VER < 1300)
namespace std {
typedef ::mbstate_t mbstate_t;
}
#endif
-class utf16utf8_codecvt : public std::codecvt
+class utf8utf16codecvt : public std::codecvt
{
protected:
- virtual ~utf16utf8_codecvt();
+ virtual ~utf8utf16codecvt() { }
virtual result do_out(std::mbstate_t&,
const wchar_t* from,
@@ -46,16 +46,16 @@ protected:
char*,
char*&) const;
- virtual int do_encoding() const throw();
+ virtual int do_encoding() const throw() { return 0; }
- virtual bool do_always_noconv() const throw();
+ virtual bool do_always_noconv() const throw() { return false; }
virtual int do_length(const std::mbstate_t&,
const char* from,
const char* end,
size_t max) const;
- virtual int do_max_length() const throw();
-}; // class utf16utf8_codecvt
+ virtual int do_max_length() const throw() { return 3; }
+}; // class utf8utf16codecvt
#endif
diff --git a/examples/SAX/SimpleHandler.vcproj b/examples/SAX/SimpleHandler.vcproj
index 5d43571a..3187e004 100644
--- a/examples/SAX/SimpleHandler.vcproj
+++ b/examples/SAX/SimpleHandler.vcproj
@@ -2,7 +2,7 @@
@@ -71,7 +71,7 @@
diff --git a/examples/SAX/pyx.vcproj b/examples/SAX/pyx.vcproj
index 48464724..a9c119f5 100644
--- a/examples/SAX/pyx.vcproj
+++ b/examples/SAX/pyx.vcproj
@@ -2,7 +2,7 @@
diff --git a/examples/SAX2DOM/DOMWriter.vcproj b/examples/SAX2DOM/DOMWriter.vcproj
index 1d22caf8..1e4b44bf 100644
--- a/examples/SAX2DOM/DOMWriter.vcproj
+++ b/examples/SAX2DOM/DOMWriter.vcproj
@@ -2,7 +2,7 @@
diff --git a/examples/SAX2DOM/SAX2DOM_test.vcproj b/examples/SAX2DOM/SAX2DOM_test.vcproj
index 4a5f75cd..7da72428 100644
--- a/examples/SAX2DOM/SAX2DOM_test.vcproj
+++ b/examples/SAX2DOM/SAX2DOM_test.vcproj
@@ -2,7 +2,7 @@
@@ -70,7 +70,7 @@