arabica/include/text/normalize_whitespace.hpp

51 lines
1.4 KiB
C++
Raw Permalink Normal View History

2007-07-19 19:01:57 +02:00
#ifndef ARABICA_UTILS_NORMALIZE_WHITESPACE_HPP
#define ARABICA_UTILS_NORMALIZE_WHITESPACE_HPP
#include <Arabica/StringAdaptor.hpp>
2007-09-05 00:55:47 +02:00
#include <XML/XMLCharacterClasses.hpp>
#include <text/UnicodeCharacters.hpp>
2007-07-19 19:01:57 +02:00
namespace Arabica
{
namespace text
2007-07-19 19:01:57 +02:00
{
template<class string_type, class string_adaptor>
inline string_type normalize_whitespace(const string_type& ch)
2007-07-19 19:01:57 +02:00
{
std::string value = string_adaptor::asStdString(ch);
std::string stripped = normalize_whitespace<std::string, Arabica::default_string_adaptor<std::string> >(value);
return string_adaptor::construct_from_utf8(stripped.c_str());
} // normalize_whitespace
template<>
inline std::string normalize_whitespace<std::string, Arabica::default_string_adaptor<std::string> >(const std::string& ch)
2007-07-19 19:01:57 +02:00
{
std::string value(ch);
std::string::const_iterator i = value.begin(), ie = value.end();
std::string::iterator p = value.begin(), pe = value.end();
// string leading space
while((i != ie) && (Arabica::XML::is_space(static_cast<char>(*i))))
++i;
while(i != ie)
{
while((i != ie) && (!Arabica::XML::is_space(static_cast<char>(*i))))
*p++ = *i++;
while((i != ie) && (Arabica::XML::is_space(static_cast<char>(*i))))
++i;
if(i != ie)
*p++ = Arabica::text::Unicode<char>::SPACE;
2007-07-19 19:01:57 +02:00
} // while ...
if(p != pe)
value.erase(p, pe);
return value;
} // normalize_whitespace
} // namespace text
2007-07-19 19:01:57 +02:00
} // namespace Arabica
#endif