From f356c2cf10307186ea4e451d737e679b535bf4d0 Mon Sep 17 00:00:00 2001 From: jez_higgins <> Date: Wed, 10 Sep 2003 10:39:01 +0000 Subject: [PATCH] codecvt workarounds - some implementations don't provide function bodies for the codecvt template clase - I need to check spec on this some implementations have an opaque definition of std::mbstate_t which means it's actually exceeding difficult to use :) --- SAX/ArabicaConfig.S | 2 ++ SAX/Makefile | 22 +++++++++++++--------- Utils/base64codecvt.cpp | 10 +++++----- Utils/base64codecvt.h | 23 +++++++++++++---------- Utils/utf16utf8codecvt.h | 6 +++++- 5 files changed, 38 insertions(+), 25 deletions(-) diff --git a/SAX/ArabicaConfig.S b/SAX/ArabicaConfig.S index 52d9dacb..ec5baab9 100644 --- a/SAX/ArabicaConfig.S +++ b/SAX/ArabicaConfig.S @@ -23,6 +23,8 @@ define pasty(ARABICA, _NO_WCHAR_T) 1 #if (_MSC_VER < 1300) define ARABICA_VS6_WORKAROUND #endif + +define ARABICA_NO_CODECVT_SPECIALISATIONS #endif #ifdef _WIN32 diff --git a/SAX/Makefile b/SAX/Makefile index 9ae471e8..592cc4ba 100644 --- a/SAX/Makefile +++ b/SAX/Makefile @@ -32,18 +32,22 @@ LDFLAGS += $(DYNAMIC_LIBS) # SOURCE FILES SRCS = saxlib.cpp \ helpers/InputSourceResolver.cpp \ + ../Utils/rot13codecvt.cpp \ ../Utils/utf8ucs2codecvt.cpp \ ../Utils/impl/ucs2_utf8.cpp \ + ../Utils/utf16beucs2codecvt.cpp \ + ../Utils/utf16leucs2codecvt.cpp \ + ../Utils/impl/ucs2_utf16.cpp \ + ../Utils/iso88591utf8codecvt.cpp \ + ../Utils/utf8iso88591codecvt.cpp \ + ../Utils/impl/iso88591_utf8.cpp \ + ../Utils/utf16utf8codecvt.cpp \ + ../Utils/base64codecvt.cpp \ ../XML/XMLCharacterClasses.cpp -# ../Utils/base64codecvt.cpp \ -# ../Utils/iso88591utf8codecvt.cpp \ -# ../Utils/rot13codecvt.cpp \ -# ../Utils/utf16beucs2codecvt.cpp \ -# ../Utils/utf16leucs2codecvt.cpp \ -# ../Utils/utf16utf8codecvt.cpp \ -# ../Utils/utf8iso88591codecvt.cpp \ -# ../Utils/impl/iso88591_utf8.cpp \ -# ../Utils/impl/ucs2_utf16.cpp \ + + + + HDRS = $(patsubst %.c,%.h,$(patsubst %.cpp,%.h,$(SRCS))) diff --git a/Utils/base64codecvt.cpp b/Utils/base64codecvt.cpp index b4577d0c..aa7ac666 100644 --- a/Utils/base64codecvt.cpp +++ b/Utils/base64codecvt.cpp @@ -9,7 +9,7 @@ static const std::string base64_charset("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); static const int NO_MORE = 256; -std::codecvt_base::result base64codecvt::do_out(state_t& state, +std::codecvt_base::result base64codecvt::do_out(std::mbstate_t& state, const char* from, const char* from_end, const char*& from_next, @@ -67,7 +67,7 @@ std::codecvt_base::result base64codecvt::do_out(state_t& state, return ((getState() == 0) && (getCurrentOutChar() == NO_MORE) && (getPreviousChar() == 0)) ? std::codecvt_base::ok : std::codecvt_base::partial; } // do_out -std::codecvt_base::result base64codecvt::do_in(state_t& state, +std::codecvt_base::result base64codecvt::do_in(std::mbstate_t& state, const char* from, const char* from_end, const char*& from_next, @@ -109,7 +109,7 @@ std::codecvt_base::result base64codecvt::do_in(state_t& state, return (from_next == from_end) ? std::codecvt_base::ok : std::codecvt_base::partial; } // do_in -std::codecvt_base::result base64codecvt::do_unshift(state_t& state, +std::codecvt_base::result base64codecvt::do_unshift(std::mbstate_t& state, char* to, char* to_limit, char*& to_next) const @@ -126,7 +126,7 @@ std::codecvt_base::result base64codecvt::do_unshift(state_t& state, return (to_next == to_limit) ? codecvt_base::ok : codecvt_base::partial; } // do_unshift -int base64codecvt::do_length(const state_t&, +int base64codecvt::do_length(const std::mbstate_t&, const char* from, const char* end, size_t max) const @@ -158,7 +158,7 @@ int base64codecvt::getState() const void base64codecvt::nextState() const { - state_t s = getState(); + int s = getState(); s = s + 1; if(s == 4) s = 0; diff --git a/Utils/base64codecvt.h b/Utils/base64codecvt.h index 0aacbd60..a2e79b70 100644 --- a/Utils/base64codecvt.h +++ b/Utils/base64codecvt.h @@ -8,15 +8,18 @@ #include -class base64codecvt : public std::codecvt -{ -public: - typedef int state_t; +#ifdef ARABICA_VS6_WORKAROUND +namespace std { + typedef ::mbstate_t mbstate_t; +} +#endif +class base64codecvt : public std::codecvt +{ protected: virtual ~base64codecvt() { } - virtual result do_out(state_t& state, + virtual result do_out(std::mbstate_t& state, const char* from, const char* from_end, const char*& from_next, @@ -24,7 +27,7 @@ protected: char* to_limit, char*& to_next) const; - virtual result do_in(state_t& state, + virtual result do_in(std::mbstate_t& state, const char* from, const char* from_end, const char*& from_next, @@ -32,7 +35,7 @@ protected: char* to_limit, char*& to_next) const; - virtual result do_unshift(state_t& state, + virtual result do_unshift(std::mbstate_t& state, char* to, char* to_limit, char*& to_next) const; @@ -41,7 +44,7 @@ protected: virtual bool do_always_noconv() const throw() { return false; } - virtual int do_length(const state_t&, + virtual int do_length(const std::mbstate_t&, const char* from, const char* end, size_t max) const; @@ -53,10 +56,10 @@ private: // the state counter, and in some case we need to "pad" the input // strings. I use these helper functions to mungle them // together and keep the details neater (or try to anyway) - mutable state_t* state_; + mutable int* state_; mutable const char** from_next_; mutable const char** from_end_; - void grabState(state_t& state) const { state_ = &state; } + void grabState(std::mbstate_t& state) const { state_ = reinterpret_cast(&state); } void grabFromNext(const char*& from_next) const { from_next_ = &from_next; } void grabFromEnd(const char*& from_end) const { from_end_ = &from_end; } diff --git a/Utils/utf16utf8codecvt.h b/Utils/utf16utf8codecvt.h index fc5186ac..3837b55d 100644 --- a/Utils/utf16utf8codecvt.h +++ b/Utils/utf16utf8codecvt.h @@ -14,7 +14,10 @@ //--------------------------------------------------------------------------- #include -#if(_MSC_VER < 1300) +#ifndef ARABICA_NO_CODECVT_SPECIALISATIONS +#include +#endif +#ifdef ARABICA_VS6_WORKAROUND namespace std { typedef ::mbstate_t mbstate_t; } @@ -59,3 +62,4 @@ protected: }; // class utf16utf8codecvt #endif +