#ifndef ARABICA_SAX_WRITER_H #define ARABICA_SAX_WRITER_H #include #include #include #include #include #include #include #include namespace SAX { template class basic_Writer : public basic_XMLFilterImpl, private basic_LexicalHandler { public: typedef string_type stringT; typedef basic_Writer WriterT; typedef typename string_type::value_type charT; typedef typename string_type::traits_type traitsT; typedef std::basic_ostream ostreamT; typedef basic_XMLReader XMLReaderT; typedef basic_XMLFilterImpl XMLFilterT; typedef typename basic_XMLFilterImpl::AttributesT AttributesT; typedef Arabica::Unicode UnicodeT; private: typedef basic_LexicalHandler LexicalHandlerT; typedef typename XMLReaderT::InputSourceT InputSourceT; typedef typename XMLReaderT::PropertyBase PropertyBase; public: basic_Writer(ostreamT& stream, unsigned int indent = 2) : inCDATA_(false), inDTD_(false), indent_(indent), depth_(0), stream_(&stream), lexicalHandler_(0), lastTag_(startTag) { } // basic_Writer basic_Writer(ostreamT& stream, XMLReaderT& parent, unsigned int indent = 2) : XMLFilterT(parent), inCDATA_(false), inDTD_(false), indent_(indent), depth_(0), stream_(&stream), lexicalHandler_(0), lastTag_(startTag) { } // basic_Writer virtual void startDocument(); virtual void endDocument(); virtual void startElement(const stringT& namespaceURI, const stringT& localName, const stringT& qName, const AttributesT& atts); virtual void endElement(const stringT& namespaceURI, const stringT& localName, const stringT& qName); virtual void characters(const stringT& ch); virtual void processingInstruction(const stringT& target, const stringT& data); virtual void skippedEntity(const stringT& name); virtual void parse(InputSourceT& input); protected: virtual std::auto_ptr doGetProperty(const stringT& name); virtual void doSetProperty(const stringT& name, std::auto_ptr value); private: virtual void startDTD(const stringT& name, const stringT& publicId, const stringT& systemId); virtual void endDTD(); virtual void startEntity(const stringT& name); virtual void endEntity(const stringT& name); virtual void startCDATA(); virtual void endCDATA(); virtual void comment(const stringT& text); void doIndent(); bool isDtd(const stringT& name); bool inCDATA_; bool inDTD_; int indent_; int depth_; ostreamT* stream_; LexicalHandlerT* lexicalHandler_; enum { startTag, endTag, docTag } lastTag_; const SAX::PropertyNames properties_; template class escaper { private: typedef char_type charT; typedef traits_type traitsT; typedef std::basic_ostream ostreamT; typedef Arabica::Unicode UnicodeT; public: escaper(ostreamT* stream) : stream_(stream) { } void operator()(charT ch) { if(ch == UnicodeT::LESS_THAN_SIGN) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_L << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::LESS_THAN_SIGN) if(ch == UnicodeT::GREATER_THAN_SIGN) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_G << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::GREATER_THAN_SIGN) if(ch == UnicodeT::AMPERSAND) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_A << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_P << UnicodeT::SEMI_COLON; return; } // if(ch == case UnicodeT::AMPERSAND) if(ch == UnicodeT::QUOTATION_MARK) { *stream_ << UnicodeT::AMPERSAND << UnicodeT::LOWERCASE_Q << UnicodeT::LOWERCASE_U << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_T << UnicodeT::SEMI_COLON; return; } // if(ch == UnicodeT::QUOTATION_MARK) *stream_ << ch; } // operator() private: ostreamT* stream_; }; // escaper }; // class basic_Writer template void basic_Writer::startDocument() { *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << UnicodeT::LOWERCASE_X << UnicodeT::LOWERCASE_M << UnicodeT::LOWERCASE_L << UnicodeT::SPACE << UnicodeT::LOWERCASE_V << UnicodeT::LOWERCASE_E << UnicodeT::LOWERCASE_R << UnicodeT::LOWERCASE_S << UnicodeT::LOWERCASE_I << UnicodeT::LOWERCASE_O << UnicodeT::LOWERCASE_N << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK << UnicodeT::NUMBER_1 << UnicodeT::FULL_STOP << UnicodeT::NUMBER_0 << UnicodeT::QUOTATION_MARK << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN << UnicodeT::LINE_FEED; depth_ = 0; inCDATA_ = false; XMLFilterT::startDocument(); lastTag_ = docTag; } // startDocument template void basic_Writer::endDocument() { XMLFilterT::endDocument(); lastTag_ = endTag; } // endDocument template void basic_Writer::startElement( const stringT& namespaceURI, const stringT& localName, const stringT& qName, const AttributesT& atts) { if((lastTag_ == startTag) && (indent_ > 0)) *stream_ << UnicodeT::LINE_FEED; doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << (!qName.empty() ? qName : localName); for(int i = 0; i < atts.getLength(); ++i) { *stream_ << UnicodeT::SPACE << atts.getQName(i) << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; stringT value = atts.getValue(i); std::for_each(value.begin(), value.end(), escaper(stream_)); *stream_ << UnicodeT::QUOTATION_MARK; } *stream_ << UnicodeT::GREATER_THAN_SIGN; depth_ += indent_; lastTag_ = startTag; XMLFilterT::startElement(namespaceURI, localName, qName, atts); } // startElement template void basic_Writer::endElement( const stringT& namespaceURI, const stringT& localName, const stringT& qName) { depth_ -= indent_; if(lastTag_ == endTag) doIndent(); *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::SLASH << (!qName.empty() ? qName : localName) << UnicodeT::GREATER_THAN_SIGN; if(indent_ != 0) *stream_ << UnicodeT::LINE_FEED; lastTag_ = endTag; XMLFilterT::endElement(namespaceURI, localName, qName); } // endElement template void basic_Writer::characters(const stringT& ch) { if(!inCDATA_) std::for_each(ch.begin(), ch.end(), escaper(stream_)); else *stream_ << ch; XMLFilterT::characters(ch); } // characters template void basic_Writer::processingInstruction(const stringT& target, const stringT& data) { *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::QUESTION_MARK << target << UnicodeT::SPACE << data << UnicodeT::QUESTION_MARK << UnicodeT::GREATER_THAN_SIGN; } // processingInstruction template void basic_Writer::skippedEntity(const stringT& name) { if(!isDtd(name)) *stream_ << UnicodeT::AMPERSAND << name << UnicodeT::SEMI_COLON; } // skippedEntity template void basic_Writer::parse(InputSourceT& input) { try { XMLReaderT* parent = getParent(); if(parent) parent->setProperty(properties_.lexicalHandler, static_cast(*this)); } catch(...) { } XMLFilterT::parse(input); } // parse template void basic_Writer::doIndent() { for(int i = 0; i < depth_; ++i) *stream_ << UnicodeT::SPACE; } // doIndent template bool basic_Writer::isDtd(const string_type& name) { return (name.length() == 5 && name[0] == UnicodeT::LEFT_SQUARE_BRACKET && name[1] == UnicodeT::LOWERCASE_D && name[2] == UnicodeT::LOWERCASE_T && name[3] == UnicodeT::LOWERCASE_D && name[4] == UnicodeT::RIGHT_SQUARE_BRACKET); } // isDtd #ifndef ARABICA_VS6_WORKAROUND template std::auto_ptr::PropertyBase> basic_Writer::doGetProperty(const string_type& name) #else template std::auto_ptr::PropertyBase> basic_Writer::doGetProperty(const string_type& name) #endif { if(name == properties_.lexicalHandler) { XMLReaderT::Property* prop = new XMLReaderT::Property(lexicalHandler_); return std::auto_ptr(prop); } return XMLFilterT::doGetProperty(name); } // doGetProperty #ifndef ARABICA_VS6_WORKAROUND template void basic_Writer::doSetProperty(const string_type& name, std::auto_ptr::PropertyBase> value) #else template void basic_Writer::doSetProperty(const string_type& name, std::auto_ptr::PropertyBase> value) #endif { if(name == properties_.lexicalHandler) { XMLReaderT::Property* prop = dynamic_cast*>(value.get()); if(!prop) throw std::bad_cast(); lexicalHandler_ = &(prop->get()); } XMLFilterT::doSetProperty(name, value); } // doSetProperty template void basic_Writer::startDTD(const stringT& name, const stringT& publicId, const stringT& systemId) { inDTD_ = true; *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_O << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_Y << UnicodeT::CAPITAL_P << UnicodeT::CAPITAL_E << UnicodeT::SPACE << name << UnicodeT::SPACE; if(publicId != stringT()) *stream_ << UnicodeT::CAPITAL_P << UnicodeT::CAPITAL_U << UnicodeT::CAPITAL_B << UnicodeT::CAPITAL_L << UnicodeT::CAPITAL_I << UnicodeT::CAPITAL_C << UnicodeT::SPACE << UnicodeT::QUOTATION_MARK << publicId << UnicodeT::QUOTATION_MARK << UnicodeT::SPACE; else *stream_ << UnicodeT::CAPITAL_S << UnicodeT::CAPITAL_Y << UnicodeT::CAPITAL_S << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_E << UnicodeT::CAPITAL_M; *stream_ << UnicodeT::SPACE << UnicodeT::QUOTATION_MARK << systemId << UnicodeT::QUOTATION_MARK << UnicodeT::GREATER_THAN_SIGN << std::endl; if(lexicalHandler_) lexicalHandler_->startDTD(name, publicId, systemId); } // startDTD template void basic_Writer::endDTD() { inDTD_ = false; if(lexicalHandler_) lexicalHandler_->endDTD(); } // endDTD template void basic_Writer::startEntity(const stringT& name) { if(lexicalHandler_) lexicalHandler_->startEntity(name); } // startEntity template void basic_Writer::endEntity(const stringT& name) { if(lexicalHandler_) lexicalHandler_->endEntity(name); } // endEntity template void basic_Writer::startCDATA() { inCDATA_ = true; *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::LEFT_SQUARE_BRACKET << UnicodeT::CAPITAL_C << UnicodeT::CAPITAL_D << UnicodeT::CAPITAL_A << UnicodeT::CAPITAL_T << UnicodeT::CAPITAL_A << UnicodeT::LEFT_SQUARE_BRACKET; if(lexicalHandler_) lexicalHandler_->startCDATA(); } // startCDATA template void basic_Writer::endCDATA() { *stream_ << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::RIGHT_SQUARE_BRACKET << UnicodeT::GREATER_THAN_SIGN; inCDATA_ = false; if(lexicalHandler_) lexicalHandler_->endCDATA(); } // endCDATA template void basic_Writer::comment(const stringT& text) { if(!inDTD_) *stream_ << UnicodeT::LESS_THAN_SIGN << UnicodeT::EXCLAMATION_MARK << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << text << UnicodeT::HYPHEN_MINUS << UnicodeT::HYPHEN_MINUS << UnicodeT::GREATER_THAN_SIGN; if(lexicalHandler_) lexicalHandler_->comment(text); } // comment typedef basic_Writer Writer; #ifndef ARABICA_NO_WCHAR_T typedef basic_Writer wWriter; #endif } // namespace SAX #endif