mirror of
https://github.com/jezhiggins/arabica
synced 2024-11-17 07:48:50 +01:00
first pass at a functional whitespace normalising filter
This commit is contained in:
parent
fce1ba265a
commit
1d277df200
2 changed files with 87 additions and 3 deletions
|
@ -2,11 +2,95 @@
|
|||
#pragma warning(disable : 4786)
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <SAX/XMLReader.h>
|
||||
#include <SAX/InputSource.h>
|
||||
#include <SAX/filter/Writer.h>
|
||||
#include <SAX/helpers/CatchErrorHandler.h>
|
||||
#include <XML/XMLCharacterClasses.h>
|
||||
#include <sstream>
|
||||
|
||||
template<class string_type, class string_adaptor>
|
||||
string_type normalise_whitespace(const string_type& ch)
|
||||
{
|
||||
std::string value = string_adaptor::asStdString(ch);
|
||||
|
||||
return string_adaptor::construct_from_utf8(normalise_whitespace<std::string, Arabica::default_string_adaptor<std::string> >(ch));
|
||||
} // normalise_whitespace
|
||||
|
||||
template<>
|
||||
std::string normalise_whitespace<std::string, Arabica::default_string_adaptor<std::string> >(const std::string& ch)
|
||||
{
|
||||
std::string value(ch);
|
||||
std::string::const_iterator i = value.begin(), ie = value.end();
|
||||
std::string::iterator p = value.begin(), pe = value.end();
|
||||
|
||||
// string leading space
|
||||
while((i != ie) && (Arabica::XML::is_space(static_cast<char>(*i))))
|
||||
++i;
|
||||
|
||||
while(i != ie)
|
||||
{
|
||||
while((i != ie) && (!Arabica::XML::is_space(static_cast<char>(*i))))
|
||||
*p++ = *i++;
|
||||
while((i != ie) && (Arabica::XML::is_space(static_cast<char>(*i))))
|
||||
++i;
|
||||
if(i != ie)
|
||||
*p++ = Arabica::Unicode<char>::SPACE;
|
||||
} // while ...
|
||||
if(p != ie)
|
||||
*p++ = 0;
|
||||
|
||||
return value;
|
||||
} // normalise_whitespace
|
||||
|
||||
template<class string_type, class string_adaptor = Arabica::default_string_adaptor<string_type> >
|
||||
class basic_WhitespaceStripper : public SAX::basic_XMLFilterImpl<string_type>
|
||||
{
|
||||
public:
|
||||
typedef string_type stringT;
|
||||
typedef SAX::basic_XMLFilterImpl<stringT> XMLFilterT;
|
||||
|
||||
basic_WhitespaceStripper() { }
|
||||
|
||||
virtual void characters(const stringT& ch)
|
||||
{
|
||||
XMLFilterT::characters(normalise_whitespace<string_type, string_adaptor>(ch));
|
||||
} // characters
|
||||
|
||||
virtual void ignorableWhitespace(const stringT& ch)
|
||||
{
|
||||
} // ignorableWhitespace
|
||||
}; // class basic_WhitespaceStripper
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
std::cout << "tests go here" << std::endl;
|
||||
|
||||
std::ostringstream output;
|
||||
std::ostringstream output2;
|
||||
SAX::Writer writer2(output2, 2);
|
||||
basic_WhitespaceStripper<std::string> stripper;
|
||||
SAX::Writer writer(output, 4);
|
||||
|
||||
SAX::XMLReader<std::string> parser;
|
||||
SAX::CatchErrorHandler<std::string> eh;
|
||||
writer.setParent(parser);
|
||||
writer.setErrorHandler(eh);
|
||||
stripper.setParent(writer);
|
||||
writer2.setParent(stripper);
|
||||
writer2.setErrorHandler(eh);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "<test><p>Woo </p><!-- yea --></test>";
|
||||
|
||||
SAX::InputSource is(ss);
|
||||
|
||||
writer2.parse(is);
|
||||
|
||||
std::cout << "1: " << output.str() << std::endl;
|
||||
std::cout << "2: " << output2.str() << std::endl;
|
||||
|
||||
|
||||
return 0;
|
||||
} // main
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
Name="VCLinkerTool"
|
||||
AdditionalOptions="/MACHINE:I386"
|
||||
AdditionalDependencies="odbc32.lib odbccp32.lib wsock32.lib"
|
||||
OutputFile="$(OutDir)/Writer.exe"
|
||||
OutputFile="$(OutDir)/sax_filter_test.exe"
|
||||
LinkIncremental="2"
|
||||
SuppressStartupBanner="TRUE"
|
||||
GenerateDebugInformation="TRUE"
|
||||
|
@ -106,7 +106,7 @@
|
|||
Name="VCLinkerTool"
|
||||
AdditionalOptions="/MACHINE:I386"
|
||||
AdditionalDependencies="wsock32.lib"
|
||||
OutputFile="$(OutDir)/Writer.exe"
|
||||
OutputFile="$(OutDir)/sax_filter_test.exe"
|
||||
LinkIncremental="1"
|
||||
SuppressStartupBanner="TRUE"
|
||||
ProgramDatabaseFile=".\Release/Writer.pdb"
|
||||
|
|
Loading…
Reference in a new issue