From ffd2a819db2f53d7f23dd5108b365fd649c9b7f6 Mon Sep 17 00:00:00 2001 From: jez <> Date: Thu, 18 Jan 2007 20:49:19 +0000 Subject: [PATCH] When ARABICA_NO_WCHAR_T defined, transcode to UTF-8 not native code page Fix for http://sourceforge.net/tracker/index.php?func=detail&aid=1636010&group_id=56163&atid=479571 --- include/SAX/wrappers/saxxerces.h | 81 ++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/include/SAX/wrappers/saxxerces.h b/include/SAX/wrappers/saxxerces.h index e7e5a245..b7ca3925 100644 --- a/include/SAX/wrappers/saxxerces.h +++ b/include/SAX/wrappers/saxxerces.h @@ -142,6 +142,7 @@ class xerces_wrapper : public SAX::basic_ProgressiveParser typedef SAX::basic_Attributes AttributesT; typedef SAX::basic_DeclHandler DeclHandlerT; typedef SAX::basic_ErrorHandler ErrorHandlerT; + typedef typename ErrorHandlerT::SAXParseExceptionT SAXParseExceptionT; typedef SAX::XercesFeatureNames featuresT; @@ -244,9 +245,7 @@ class xerces_wrapper : public SAX::basic_ProgressiveParser // We want the trailing 0 character. *inserter = *str; while (*str != 0) // str points to the character we've just copied - { *++inserter = *++str; - } return base::construct_from_utf16(&buffer[0]); } else @@ -273,36 +272,57 @@ class xerces_wrapper : public SAX::basic_ProgressiveParser } // makeStringT #else // alternative version for the wchar_t impaired + static string_type makeStringT(const XMLCh* str, unsigned int inputLength) + { + static XMLByte outBuff[4096]; + + string_type out; + unsigned int outputLength; + unsigned int eaten = 0; + unsigned int offset = 0; + + while(inputLength) + { + outputLength = transcoder_->transcodeTo(str+offset, + inputLength, + outBuff, 1024, + eaten, + XERCES_CPP_NAMESPACE::XMLTranscoder::UnRep_RepChar); + base::append(out, construct_from_XMLByte(outBuff, outputLength)); + offset += eaten; + inputLength -= eaten; + } + + return out; + } // makeStringT + static string_type makeStringT(const XMLCh* str) { - if(str) - { - xerces_string_janitor cstr(XERCES_CPP_NAMESPACE::XMLString::transcode(str)); - return base::construct_from_utf8(cstr.get()); - } - return string_type(); + return makeStringT(str, XERCES_CPP_NAMESPACE::XMLString::stringLen(str)); } // makeStringT - static string_type makeStringT(const XMLCh* str, int length) + static string_type construct_from_XMLByte(const XMLByte* bytes, int length) { - // this isn't pretty, but Xerces doesn't provide a transcode with takes - // a length - if(str && length) - { - std::vector wv(length + 1); - std::copy(str, str+length, std::insert_iterator >(wv, wv.begin())); - wv.push_back(0); + return base::construct_from_utf8(reinterpret_cast(bytes), length); + } // construct_from_XMLByte - return makeStringT(&wv[0]); - } - return string_type(); - } // makeStringT + static std::auto_ptr transcoder_; + static kickoff() + { + XERCES_CPP_NAMESPACE::XMLTransService::Codes res; + transcoder_.reset(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewTranscoderFor(XERCES_CPP_NAMESPACE::XMLRecognizer::UTF_8, res, 4096, XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager)); + } + static shutdown() + { + transcoder_.reset(0); + } // shutdown #endif static XMLCh* asXMLChString(const string_type& s) { std::string str = base::asStdString(s); return XERCES_CPP_NAMESPACE::XMLString::transcode(str.c_str()); } // asXMLChString + }; // class xerces_string_adaptor typedef xerces_string_adaptor SA; @@ -645,7 +665,7 @@ class xerces_wrapper : public SAX::basic_ProgressiveParser } // resetErrors private: - typedef void(ErrorHandlerT::* ErrorFn)(const typename ErrorHandlerT::SAXParseExceptionT&); + typedef void(ErrorHandlerT::* ErrorFn)(const SAXParseExceptionT&); void handleError(const XERCES_CPP_NAMESPACE::SAXParseException& exception, ErrorFn fn) { @@ -655,7 +675,7 @@ class xerces_wrapper : public SAX::basic_ProgressiveParser string_type errorMsg(SA::makeStringT(exception.getMessage())); string_type publicId(SA::makeStringT(exception.getPublicId())); string_type systemId(SA::makeStringT(exception.getSystemId())); - ErrorHandlerT::SAXParseExceptionT sp(SA::asStdString(errorMsg), + SAXParseExceptionT sp(SA::asStdString(errorMsg), publicId, systemId, exception.getLineNumber(), @@ -858,6 +878,11 @@ class xerces_wrapper : public SAX::basic_ProgressiveParser string_type externalNoNamespaceSchemaLocation_; }; // class xerces_wrapper +#ifdef ARABICA_NO_WCHAR_T +template +std::auto_ptr xerces_wrapper::xerces_string_adaptor::transcoder_; +#endif + template xerces_wrapper::xerces_wrapper() { @@ -865,6 +890,9 @@ xerces_wrapper::xerces_wrapper() { std::auto_ptr init(new XercesImpl::xerces_initializer()); initializer_ = init; +#ifdef ARABICA_NO_WCHAR_T + xerces_string_adaptor::kickoff(); +#endif } catch(const XERCES_CPP_NAMESPACE::XMLException& toCatch) { @@ -885,6 +913,9 @@ xerces_wrapper::xerces_wrapper() template xerces_wrapper::~xerces_wrapper() { +#ifdef ARABICA_NO_WCHAR_T + xerces_string_adaptor::shutdown(); +#endif delete xerces_; } // ~xerces_wrapper @@ -1001,7 +1032,8 @@ void xerces_wrapper::doSetProperty(const string_type& name, { if(name == properties_.lexicalHandler) { - Property&>* prop = dynamic_cast&>*>(value.get()); + typedef typename SAX::basic_XMLReader::template Property Prop; + Prop* prop = dynamic_cast(value.get()); if(!prop) throw std::runtime_error("bad_cast: Property LexicalHandler is wrong type, should be SAX::LexicalHandler&"); @@ -1012,7 +1044,8 @@ void xerces_wrapper::doSetProperty(const string_type& name, if(name == properties_.declHandler) { - Property&>* prop = dynamic_cast&>*>(value.get()); + typedef typename SAX::basic_XMLReader::template Property Prop; + Prop* prop = dynamic_cast(value.get()); if(!prop) throw std::runtime_error("bad_cast: Property DeclHandler is wrong type, should be SAX::DeclHandler&");