#ifndef ARABICA_SAX_WRITER_H #define ARABICA_SAX_WRITER_H #include #include #include #include #include #include #include #include #include namespace SAX { template class basic_Writer : public basic_XMLFilterImpl, private basic_LexicalHandler, private basic_DeclHandler { public: typedef string_type stringT; typedef basic_Writer WriterT; typedef typename string_type::value_type charT; typedef typename string_type::traits_type traitsT; typedef std::basic_ostream ostreamT; typedef basic_XMLReader XMLReaderT; typedef basic_XMLFilterImpl XMLFilterT; typedef SAX::basic_DeclHandler declHandlerT; typedef SAX::basic_LexicalHandler lexicalHandlerT; typedef typename basic_XMLFilterImpl::AttributesT AttributesT; typedef Arabica::Unicode UnicodeT; private: typedef basic_LexicalHandler LexicalHandlerT; typedef basic_DeclHandler DeclHandlerT; typedef typename XMLReaderT::InputSourceT InputSourceT; typedef typename XMLReaderT::PropertyBase PropertyBaseT; typedef typename XMLReaderT::template Property getLexicalHandlerT; typedef typename XMLReaderT::template Property setLexicalHandlerT; typedef typename XMLReaderT::template Property getDeclHandlerT; typedef typename XMLReaderT::template Property setDeclHandlerT; using XMLFilterT::getParent; public: basic_Writer(ostreamT& stream, unsigned int indent = 2) : inCDATA_(false), inDTD_(false), internalSubset_(true), indent_(indent), depth_(0), stream_(&stream), lexicalHandler_(0), declHandler_(0), encoding_(), lastTag_(startTag) { } // basic_Writer basic_Writer(ostreamT& stream, XMLReaderT& parent, unsigned int indent = 2) : XMLFilterT(parent), inCDATA_(false), inDTD_(false), internalSubset_(true), indent_(indent), depth_(0), stream_(&stream), lexicalHandler_(0), declHandler_(0), encoding_(), lastTag_(startTag) { } // basic_Writer basic_Writer(ostreamT& stream, const stringT& encoding, unsigned int indent = 2) : inCDATA_(false), inDTD_(false), internalSubset_(true), indent_(indent), depth_(0), stream_(&stream), lexicalHandler_(0), declHandler_(0), encoding_(encoding), lastTag_(startTag) { } // basic_Writer basic_Writer(ostreamT& stream, XMLReaderT& parent, const stringT& encoding, unsigned int indent = 2) : XMLFilterT(parent), inCDATA_(false), inDTD_(false), internalSubset_(true), indent_(indent), depth_(0), stream_(&stream), lexicalHandler_(0), declHandler_(0), lastTag_(startTag), encoding_(encoding) { } // basic_Writer virtual void parse(InputSourceT& input); // setEncoding // Sets the encoding included in the XML declaration. If not set, then the encoding // declaration will be omitted. // NOTE: This is merely a label. The writer will not perform any transcoding. It is // your responsibility to ensure that the data is already properly encoded, or that the // destination stream will perform any necessary transcoding. void setEncoding(const stringT& encoding) { encoding_ = encoding; } protected: // Parser virtual std::auto_ptr doGetProperty(const stringT& name); virtual void doSetProperty(const stringT& name, std::auto_ptr value); // ContentHandler virtual void startDocument(); virtual void endDocument(); virtual void startElement(const stringT& namespaceURI, const stringT& localName, const stringT& qName, const AttributesT& atts); virtual void endElement(const stringT& namespaceURI, const stringT& localName, const stringT& qName); virtual void characters(const stringT& ch); virtual void ignorableWhitespace(const stringT& ch); virtual void processingInstruction(const stringT& target, const stringT& data); virtual void skippedEntity(const stringT& name); // Lexical Handler virtual void startDTD(const stringT& name, const stringT& publicId, const stringT& systemId); virtual void endDTD(); virtual void startEntity(const stringT& name); virtual void endEntity(const stringT& name); virtual void startCDATA(); virtual void endCDATA(); virtual void comment(const stringT& text); // DTD Handler virtual void notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId); virtual void unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName); // Decl Handler virtual void elementDecl(const stringT& name, const stringT& model); virtual void attributeDecl(const stringT& elementName, const stringT& attributeName, const stringT& type, const stringT& valueDefault, const stringT& value); virtual void internalEntityDecl(const stringT& name, const stringT& value); virtual void externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId); ///////////////// void startEntityDecl(const stringT& name); void publicAndSystem(const stringT& publicId, const stringT& systemId); void doIndent(); bool isDtd(const stringT& name); private: stringT encoding_; bool inCDATA_; bool inDTD_; bool internalSubset_; int indent_; int depth_; ostreamT* stream_; LexicalHandlerT* lexicalHandler_; DeclHandlerT* declHandler_; enum { startTag, endTag, docTag } lastTag_; const SAX::PropertyNames properties_; template class escaper { private: typedef char_type charT; typedef traits_type traitsT; typedef std::basic_ostream ostreamT; typedef Arabica::Unicode UnicodeT; public: escaper(ostreamT* stream) : stream_(stream) { } void operator()(charT ch) { if(ch == UnicodeT::LESS_THAN_SIGN) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_L << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::LESS_THAN_SIGN) if(ch == UnicodeT::GREATER_THAN_SIGN) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_G << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::GREATER_THAN_SIGN) if(ch == UnicodeT::AMPERSAND) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_A << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_P << UnicodeT::SEMI_COLON; return; } // if(ch == case UnicodeT::AMPERSAND) if(ch == UnicodeT::QUOTATION_MARK) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_Q << UnicodeT::LOWERCASE_U << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::QUOTATION_MARK) *stream_ << ch; } // operator() private: ostreamT* stream_; }; // escaper }; // class basic_Writer template void basic_Writer::startDocument() { *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::SPACE << UnicodeT::LOWERCASE_V << UnicodeT::LOWERCASE_E << UnicodeT::LOWERCASE_R << UnicodeT::LOWERCASE_S << UnicodeT::LOWERCASE_I << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_N << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK << UnicodeT::NUMBER_1 << UnicodeT::FULL_STOP << UnicodeT::NUMBER_0 << UnicodeT::QUOTATION_MARK; if(encoding_.size()) *stream_ << UnicodeT::SPACE << UnicodeT::LOWERCASE_E << UnicodeT::LOWERCASE_N << UnicodeT::LOWERCASE_C << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_D << UnicodeT::LOWERCASE_I << UnicodeT::LOWERCASE_N << UnicodeT::LOWERCASE_G << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK << encoding_ << UnicodeT::QUOTATION_MARK; *stream_ << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN << UnicodeT::LINE_FEED; depth_ = 0; inCDATA_ = false; XMLFilterT::startDocument(); lastTag_ = docTag; } // startDocument template void basic_Writer::endDocument() { XMLFilterT::endDocument(); lastTag_ = endTag; } // endDocument template void basic_Writer::startElement( const stringT& namespaceURI, const stringT& localName, const stringT& qName, const AttributesT& atts) { if((lastTag_ == startTag) && (indent_ > 0)) *stream_ << UnicodeT::LINE_FEED; doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << (!qName.empty() ? qName : localName); for(int i = 0; i < atts.getLength(); ++i) { *stream_ << UnicodeT::SPACE << atts.getQName(i) << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; stringT value = atts.getValue(i); std::for_each(value.begin(), value.end(), escaper(stream_)); *stream_ << UnicodeT::QUOTATION_MARK; } *stream_ << UnicodeT::GREATER_THAN_SIGN; depth_ += indent_; lastTag_ = startTag; XMLFilterT::startElement(namespaceURI, localName, qName, atts); } // startElement template void basic_Writer::endElement( const stringT& namespaceURI, const stringT& localName, const stringT& qName) { depth_ -= indent_; if(lastTag_ == endTag) doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::SLASH << (!qName.empty() ? qName : localName) << UnicodeT::GREATER_THAN_SIGN; if(indent_ != 0) *stream_ << UnicodeT::LINE_FEED; lastTag_ = endTag; XMLFilterT::endElement(namespaceURI, localName, qName); } // endElement template void basic_Writer::characters(const stringT& ch) { if(!inCDATA_) std::for_each(ch.begin(), ch.end(), escaper(stream_)); else *stream_ << ch; XMLFilterT::characters(ch); } // characters template void basic_Writer::ignorableWhitespace(const stringT& ch) { *stream_ << ch; XMLFilterT::ignorableWhitespace(ch); } // ignorableWhitespace template void basic_Writer::processingInstruction(const stringT& target, const stringT& data) { if((!inDTD_) || (inDTD_ && internalSubset_)) { *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << target; if(data.length()) *stream_ << UnicodeT::SPACE << data; *stream_ << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN; } } // processingInstruction template void basic_Writer::skippedEntity(const stringT& name) { if(!isDtd(name)) *stream_ << UnicodeT::AMPERSAND << name << UnicodeT::SEMI_COLON; } // skippedEntity template void basic_Writer::parse(InputSourceT& input) { try { XMLReaderT* parent = getParent(); if(parent) parent->setProperty(properties_.lexicalHandler, static_cast(*this)); } catch(...) { } try { XMLReaderT* parent = getParent(); if(parent) parent->setProperty(properties_.declHandler, static_cast(*this)); } catch(...) { } XMLFilterT::parse(input); } // parse template void basic_Writer::doIndent() { for(int i = 0; i < depth_; ++i) *stream_ << UnicodeT::SPACE; } // doIndent template bool basic_Writer::isDtd(const string_type& name) { return (name.length() == 5 && name[0] == UnicodeT::LEFT_SQUARE_BRACKET && name[1] == UnicodeT::LOWERCASE_D && name[2] == UnicodeT::LOWERCASE_T && name[3] == UnicodeT::LOWERCASE_D && name[4] == UnicodeT::RIGHT_SQUARE_BRACKET); } // isDtd #ifndef ARABICA_VS6_WORKAROUND template std::auto_ptr::PropertyBaseT> basic_Writer::doGetProperty(const string_type& name) #else template std::auto_ptr::PropertyBaseT> basic_Writer::doGetProperty(const string_type& name) #endif { if(name == properties_.lexicalHandler) { getLexicalHandlerT* prop = new getLexicalHandlerT(lexicalHandler_); return std::auto_ptr(prop); } if(name == properties_.declHandler) { getDeclHandlerT* prop = new getDeclHandlerT(declHandler_); return std::auto_ptr(prop); } return XMLFilterT::doGetProperty(name); } // doGetProperty #ifndef ARABICA_VS6_WORKAROUND template void basic_Writer::doSetProperty(const string_type& name, std::auto_ptr::PropertyBaseT> value) #else template void basic_Writer::doSetProperty(const string_type& name, std::auto_ptr::PropertyBaseT> value) #endif { if(name == properties_.lexicalHandler) { setLexicalHandlerT* prop = dynamic_cast(value.get()); if(!prop) throw std::bad_cast(); lexicalHandler_ = &(prop->get()); } else if(name == properties_.declHandler) { setDeclHandlerT* prop = dynamic_cast(value.get()); if(!prop) throw std::bad_cast(); declHandler_ = &(prop->get()); } XMLFilterT::doSetProperty(name, value); } // doSetProperty template void basic_Writer::startDTD(const stringT& name, const stringT& publicId, const stringT& systemId) { inDTD_ = true; depth_ += indent_; *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_O << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_Y << UnicodeT::CAPITAL_P << UnicodeT::CAPITAL_E << UnicodeT::SPACE << name; publicAndSystem(publicId, systemId); *stream_ << UnicodeT::SPACE << UnicodeT::LEFT_SQUARE_BRACKET << std::endl; if(lexicalHandler_) lexicalHandler_->startDTD(name, publicId, systemId); } // startDTD template void basic_Writer::endDTD() { *stream_ << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::GREATER_THAN_SIGN << std::endl; inDTD_ = false; depth_ -= indent_; if(lexicalHandler_) lexicalHandler_->endDTD(); } // endDTD template void basic_Writer::startEntity(const stringT& name) { if(isDtd(name)) internalSubset_ = false; if(lexicalHandler_) lexicalHandler_->startEntity(name); } // startEntity template void basic_Writer::endEntity(const stringT& name) { if(isDtd(name)) internalSubset_ = true; if(lexicalHandler_) lexicalHandler_->endEntity(name); } // endEntity template void basic_Writer::startCDATA() { inCDATA_ = true; *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::LEFT_SQUARE_BRACKET << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::LEFT_SQUARE_BRACKET; if(lexicalHandler_) lexicalHandler_->startCDATA(); } // startCDATA template void basic_Writer::endCDATA() { *stream_ << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::GREATER_THAN_SIGN; inCDATA_ = false; if(lexicalHandler_) lexicalHandler_->endCDATA(); } // endCDATA template void basic_Writer::comment(const stringT& text) { if((!inDTD_) || (inDTD_ && internalSubset_)) *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << text << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << UnicodeT::GREATER_THAN_SIGN; if(lexicalHandler_) lexicalHandler_->comment(text); } // comment template void basic_Writer::notationDecl(const stringT& name, const stringT& publicId, const stringT& systemId) { if(internalSubset_) { doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::CAPITAL_N << UnicodeT::CAPITAL_O << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_I << UnicodeT::CAPITAL_O << UnicodeT::CAPITAL_N << UnicodeT::SPACE << name; publicAndSystem(publicId, systemId); *stream_ << UnicodeT::GREATER_THAN_SIGN << std::endl; } // if ... XMLFilterT::notationDecl(name, publicId, systemId); } // notationDecl template void basic_Writer::unparsedEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId, const stringT& notationName) { if(internalSubset_) { doIndent(); startEntityDecl(name); publicAndSystem(publicId, systemId); *stream_ << UnicodeT::SPACE << UnicodeT::CAPITAL_N << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::SPACE << notationName << UnicodeT::GREATER_THAN_SIGN << std::endl; } // if ... XMLFilterT::unparsedEntityDecl(name, publicId, systemId, notationName); } // unparsedEntityDecl template void basic_Writer::elementDecl(const stringT& name, const stringT& model) { if(internalSubset_) { doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::CAPITAL_E << UnicodeT::CAPITAL_L << UnicodeT::CAPITAL_E << UnicodeT::CAPITAL_M << UnicodeT::CAPITAL_E << UnicodeT::CAPITAL_N << UnicodeT::CAPITAL_T << UnicodeT::SPACE << name << UnicodeT::SPACE << model << UnicodeT::GREATER_THAN_SIGN << std::endl; } // if ... if(declHandler_) declHandler_->elementDecl(name, model); } // elementDecl template void basic_Writer::attributeDecl(const stringT& elementName, const stringT& attributeName, const stringT& type, const stringT& valueDefault, const stringT& value) { if(internalSubset_) { doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_L << UnicodeT::CAPITAL_I << UnicodeT::CAPITAL_S << UnicodeT::CAPITAL_T << UnicodeT::SPACE << elementName << UnicodeT::SPACE << attributeName << UnicodeT::SPACE << type; if(!valueDefault.empty()) *stream_ << UnicodeT::SPACE << valueDefault; if(!value.empty()) *stream_ << UnicodeT::SPACE << UnicodeT::QUOTATION_MARK << value << UnicodeT::QUOTATION_MARK; *stream_ << UnicodeT::GREATER_THAN_SIGN << std::endl; } // if ... if(declHandler_) declHandler_->attributeDecl(elementName, attributeName, type, valueDefault, value); } // attributeDecl template void basic_Writer::internalEntityDecl(const stringT& name, const stringT& value) { if(internalSubset_) { doIndent(); startEntityDecl(name); *stream_ << UnicodeT::SPACE << UnicodeT::QUOTATION_MARK << value << UnicodeT::QUOTATION_MARK << UnicodeT::GREATER_THAN_SIGN << std::endl; } // if ... if(declHandler_) declHandler_->internalEntityDecl(name, value); } // internalEntityDecl template void basic_Writer::externalEntityDecl(const stringT& name, const stringT& publicId, const stringT& systemId) { if(internalSubset_) { doIndent(); startEntityDecl(name); publicAndSystem(publicId, systemId); *stream_ << UnicodeT::GREATER_THAN_SIGN << std::endl; } // if ... if(declHandler_) declHandler_->externalEntityDecl(name, publicId, systemId); } // externalEntityDecl template void basic_Writer::startEntityDecl(const stringT& name) { *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::CAPITAL_E << UnicodeT::CAPITAL_N << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_I << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_Y << UnicodeT::SPACE << name; } // startEntityDecl template void basic_Writer::publicAndSystem(const stringT& publicId, const stringT& systemId) { *stream_ << UnicodeT::SPACE; if(!publicId.empty()) *stream_ << UnicodeT::CAPITAL_P << UnicodeT::CAPITAL_U << UnicodeT::CAPITAL_B << UnicodeT::CAPITAL_L << UnicodeT::CAPITAL_I << UnicodeT::CAPITAL_C << UnicodeT::SPACE << UnicodeT::QUOTATION_MARK << publicId << UnicodeT::QUOTATION_MARK; if(!systemId.empty()) { if(publicId.empty()) *stream_ << UnicodeT::CAPITAL_S << UnicodeT::CAPITAL_Y << UnicodeT::CAPITAL_S << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_E << UnicodeT::CAPITAL_M; *stream_ << UnicodeT::SPACE << UnicodeT::QUOTATION_MARK << systemId << UnicodeT::QUOTATION_MARK; } // if ... } // publicAndSystem typedef basic_Writer Writer; #ifndef ARABICA_NO_WCHAR_T typedef basic_Writer wWriter; #endif } // namespace SAX #endif