Factored out the XML escaping stuff

This commit is contained in:
jez_higgins 2005-12-14 14:49:57 +00:00
parent f8f3af0a76
commit 071e62b2dd
4 changed files with 82 additions and 119 deletions

View file

@ -5,7 +5,7 @@
// DOM/Utils/Stream.h // DOM/Utils/Stream.h
// //
// Written by Jez Higgins <jez@jezuk.co.uk> // Written by Jez Higgins <jez@jezuk.co.uk>
// Copyright 2003 Jez UK Ltd, http://www.jezuk.co.uk // Copyright 2003-2005 Jez UK Ltd, http://www.jezuk.co.uk
// //
// Provides streaming operator<< for DOM::Nodes. Fully parameterised so // Provides streaming operator<< for DOM::Nodes. Fully parameterised so
// will work with wide and narrow char types, so long as an operator<< // will work with wide and narrow char types, so long as an operator<<
@ -27,67 +27,13 @@
#include <iostream> #include <iostream>
#include <algorithm> #include <algorithm>
#include <XML/UnicodeCharacters.h> #include <XML/UnicodeCharacters.h>
#include <XML/escaper.hpp>
namespace DOM namespace DOM
{ {
namespace StreamImpl namespace StreamImpl
{ {
template<typename char_type, typename traits_type>
class escaper
{
private:
typedef char_type charT;
typedef traits_type traitsT;
typedef std::basic_ostream<charT, traitsT> ostreamT;
typedef Arabica::Unicode<charT> UnicodeT;
public:
escaper(ostreamT& stream) : stream_(stream) { }
void operator()(charT ch)
{
if(ch == UnicodeT::LESS_THAN_SIGN)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_L
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::LESS_THAN_SIGN)
if(ch == UnicodeT::GREATER_THAN_SIGN)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_G
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::GREATER_THAN_SIGN)
if(ch == UnicodeT::AMPERSAND)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_A
<< UnicodeT::LOWERCASE_M
<< UnicodeT::LOWERCASE_P
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == case UnicodeT::AMPERSAND)
if(ch == UnicodeT::QUOTATION_MARK)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_Q
<< UnicodeT::LOWERCASE_U
<< UnicodeT::LOWERCASE_O
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::QUOTATION_MARK)
stream_ << ch;
} // operator()
private:
ostreamT& stream_;
}; // escaper
template<class stringT, class charT, class traitsT> template<class stringT, class charT, class traitsT>
void streamChildren(std::basic_ostream<charT, traitsT>& stream, DOM::Node<stringT>& node) void streamChildren(std::basic_ostream<charT, traitsT>& stream, DOM::Node<stringT>& node)
{ {
@ -195,7 +141,7 @@ int prefix_mapper(std::basic_ostream<charT, traitsT>& stream,
stream << UnicodeT::EQUALS_SIGN stream << UnicodeT::EQUALS_SIGN
<< UnicodeT::QUOTATION_MARK; << UnicodeT::QUOTATION_MARK;
stringT value = attr.getNodeValue(); stringT value = attr.getNodeValue();
std::for_each(value.begin(), value.end(), StreamImpl::escaper<charT, traitsT>(stream)); std::for_each(value.begin(), value.end(), Arabica::XML::escaper<charT, traitsT>(stream));
stream << UnicodeT::QUOTATION_MARK; stream << UnicodeT::QUOTATION_MARK;
} }
@ -212,7 +158,7 @@ int prefix_mapper(std::basic_ostream<charT, traitsT>& stream,
if(!(i->second.empty())) if(!(i->second.empty()))
stream << UnicodeT::COLON << i->second; stream << UnicodeT::COLON << i->second;
stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK; stream << UnicodeT::EQUALS_SIGN << UnicodeT::QUOTATION_MARK;
std::for_each(i->first.begin(), i->first.end(), StreamImpl::escaper<charT, traitsT>(stream)); std::for_each(i->first.begin(), i->first.end(), Arabica::XML::escaper<charT, traitsT>(stream));
stream << UnicodeT::QUOTATION_MARK; stream << UnicodeT::QUOTATION_MARK;
} // for ... } // for ...
@ -285,7 +231,7 @@ operator<<(std::basic_ostream<charT, traitsT>& stream,
case DOM::Node<stringT>::TEXT_NODE: case DOM::Node<stringT>::TEXT_NODE:
{ {
stringT value = node.getNodeValue(); stringT value = node.getNodeValue();
std::for_each(value.begin(), value.end(), StreamImpl::escaper<charT, traitsT>(stream)); std::for_each(value.begin(), value.end(), Arabica::XML::escaper<charT, traitsT>(stream));
} }
break; break;
case DOM::Node<stringT>::ENTITY_REFERENCE_NODE: case DOM::Node<stringT>::ENTITY_REFERENCE_NODE:

View file

@ -362,6 +362,9 @@
<File <File
RelativePath="..\Utils\convertstream.h"> RelativePath="..\Utils\convertstream.h">
</File> </File>
<File
RelativePath="..\Xml\escaper.hpp">
</File>
<File <File
RelativePath=".\Utils\getparam.hpp"> RelativePath=".\Utils\getparam.hpp">
</File> </File>
@ -754,8 +757,8 @@
Name="Debug|Win32"> Name="Debug|Win32">
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="cl /TC /D USE_XERCES /EP ParserConfig.S &gt; ParserConfig.h CommandLine="cl /TC /D USE_EXPAT /EP ParserConfig.S &gt; ParserConfig.h
cl /TC /D USE_XERCES /EP saxlib.S &gt; saxlib.cpp cl /TC /D USE_EXPAT /EP saxlib.S &gt; saxlib.cpp
" "
Outputs="ParserConfig.h;saxlib.cpp"/> Outputs="ParserConfig.h;saxlib.cpp"/>
</FileConfiguration> </FileConfiguration>

View file

@ -7,6 +7,7 @@
#include <SAX/ext/DeclHandler.h> #include <SAX/ext/DeclHandler.h>
#include <XML/UnicodeCharacters.h> #include <XML/UnicodeCharacters.h>
#include <SAX/helpers/PropertyNames.h> #include <SAX/helpers/PropertyNames.h>
#include <XML/escaper.hpp>
#include <ostream> #include <ostream>
#include <algorithm> #include <algorithm>
#include <typeinfo> #include <typeinfo>
@ -30,6 +31,7 @@ class basic_Writer : public basic_XMLFilterImpl<string_type>,
typedef SAX::basic_LexicalHandler<stringT> lexicalHandlerT; typedef SAX::basic_LexicalHandler<stringT> lexicalHandlerT;
typedef typename basic_XMLFilterImpl<stringT>::AttributesT AttributesT; typedef typename basic_XMLFilterImpl<stringT>::AttributesT AttributesT;
typedef Arabica::Unicode<charT> UnicodeT; typedef Arabica::Unicode<charT> UnicodeT;
typedef Arabica::XML::escaper<charT, traitsT> escaperT;
private: private:
typedef basic_LexicalHandler<stringT> LexicalHandlerT; typedef basic_LexicalHandler<stringT> LexicalHandlerT;
typedef basic_DeclHandler<stringT> DeclHandlerT; typedef basic_DeclHandler<stringT> DeclHandlerT;
@ -166,62 +168,6 @@ private:
enum { startTag, endTag, docTag } lastTag_; enum { startTag, endTag, docTag } lastTag_;
const SAX::PropertyNames<stringT> properties_; const SAX::PropertyNames<stringT> properties_;
template<typename char_type, typename traits_type>
class escaper
{
private:
typedef char_type charT;
typedef traits_type traitsT;
typedef std::basic_ostream<charT, traitsT> ostreamT;
typedef Arabica::Unicode<charT> UnicodeT;
public:
escaper(ostreamT* stream) : stream_(stream) { }
void operator()(charT ch)
{
if(ch == UnicodeT::LESS_THAN_SIGN)
{
*stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_L
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::LESS_THAN_SIGN)
if(ch == UnicodeT::GREATER_THAN_SIGN)
{
*stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_G
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::GREATER_THAN_SIGN)
if(ch == UnicodeT::AMPERSAND)
{
*stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_A
<< UnicodeT::LOWERCASE_M
<< UnicodeT::LOWERCASE_P
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == case UnicodeT::AMPERSAND)
if(ch == UnicodeT::QUOTATION_MARK)
{
*stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_Q
<< UnicodeT::LOWERCASE_U
<< UnicodeT::LOWERCASE_O
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::QUOTATION_MARK)
*stream_ << ch;
} // operator()
private:
ostreamT* stream_;
}; // escaper
}; // class basic_Writer }; // class basic_Writer
template<class string_type> template<class string_type>
@ -298,7 +244,7 @@ void basic_Writer<string_type>::startElement(
<< UnicodeT::EQUALS_SIGN << UnicodeT::EQUALS_SIGN
<< UnicodeT::QUOTATION_MARK; << UnicodeT::QUOTATION_MARK;
stringT value = atts.getValue(i); stringT value = atts.getValue(i);
std::for_each(value.begin(), value.end(), escaper<charT, traitsT>(stream_)); std::for_each(value.begin(), value.end(), escaperT(*stream_));
*stream_ << UnicodeT::QUOTATION_MARK; *stream_ << UnicodeT::QUOTATION_MARK;
} }
@ -332,7 +278,7 @@ template<class string_type>
void basic_Writer<string_type>::characters(const stringT& ch) void basic_Writer<string_type>::characters(const stringT& ch)
{ {
if(!inCDATA_) if(!inCDATA_)
std::for_each(ch.begin(), ch.end(), escaper<charT, traitsT>(stream_)); std::for_each(ch.begin(), ch.end(), escaperT(*stream_));
else else
*stream_ << ch; *stream_ << ch;

68
XML/escaper.hpp Normal file
View file

@ -0,0 +1,68 @@
#ifndef ARABICA_UTILS_ESCAPER_HPP
#define ARABICA_UTILS_ESCAPER_HPP
#include <iostream>
#include <XML/UnicodeCharacters.h>
namespace Arabica {
namespace XML {
template<typename char_type, typename traits_type = std::char_traits<char_type> >
class escaper
{
private:
typedef char_type charT;
typedef traits_type traitsT;
typedef std::basic_ostream<charT, traitsT> ostreamT;
typedef Arabica::Unicode<charT> UnicodeT;
public:
escaper(ostreamT& stream) : stream_(stream) { }
void operator()(charT ch)
{
if(ch == UnicodeT::LESS_THAN_SIGN)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_L
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::LESS_THAN_SIGN)
if(ch == UnicodeT::GREATER_THAN_SIGN)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_G
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::GREATER_THAN_SIGN)
if(ch == UnicodeT::AMPERSAND)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_A
<< UnicodeT::LOWERCASE_M
<< UnicodeT::LOWERCASE_P
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == case UnicodeT::AMPERSAND)
if(ch == UnicodeT::QUOTATION_MARK)
{
stream_ << UnicodeT::AMPERSAND
<< UnicodeT::LOWERCASE_Q
<< UnicodeT::LOWERCASE_U
<< UnicodeT::LOWERCASE_O
<< UnicodeT::LOWERCASE_T
<< UnicodeT::SEMI_COLON;
return;
} // if(ch == UnicodeT::QUOTATION_MARK)
stream_ << ch;
} // operator()
private:
ostreamT& stream_;
}; // escaper
} // namespace XML
} // namespace Arabica
#endif // ARABICA_UTILS_ESCAPER_HPP