arabica/examples/Utils/transcode.cpp

230 lines
5.6 KiB
C++
Raw Permalink Normal View History

2003-09-02 13:23:52 +02:00
// transcode.cpp
//
// Warning! Contains ifdef hackery
//
///////////////////////////////////
#ifdef _MSC_VER
2007-08-31 12:59:50 +02:00
#pragma warning(disable: 4244 4996)
#endif
2003-09-02 13:23:52 +02:00
#include <iostream>
#include <io/convert_adaptor.hpp>
#include <convert/utf8iso88591codecvt.hpp>
#include <convert/iso88591utf8codecvt.hpp>
#include <convert/rot13codecvt.hpp>
#include <convert/base64codecvt.hpp>
#include <convert/utf8ucs2codecvt.hpp>
#include <convert/utf16utf8codecvt.hpp>
#include <convert/utf16beucs2codecvt.hpp>
#include <convert/utf16leucs2codecvt.hpp>
2003-09-02 13:23:52 +02:00
#include <fstream>
std::ifstream inputFile;
std::ofstream outputFile;
2003-09-18 10:25:27 +02:00
using namespace Arabica::io;
using namespace Arabica::convert;
iconvert_adaptor<char> iByteConvertor(std::cin);
oconvert_adaptor<char> oByteConvertor(std::cout);
bool needWCharIntermediary = false;
#ifndef ARABICA_NO_WCHAR_T
2003-09-18 10:25:27 +02:00
typedef iconvert_adaptor<wchar_t, std::char_traits<wchar_t>, char, std::char_traits<char> > Widener;
typedef oconvert_adaptor<wchar_t, std::char_traits<wchar_t>, char, std::char_traits<char> > Narrower;
Widener iCharAdaptor(iByteConvertor);
Narrower oCharAdaptor(oByteConvertor);
#endif
2003-09-18 10:25:27 +02:00
void wchar_transcode();
void byte_transcode();
bool processArgs(int argc, const char* argv[]);
bool imbueInput(int& argn, int argc, const char* argv[]);
bool imbueOutput(int& argn, int argc, const char* argv[]);
2005-08-22 17:03:41 +02:00
int main(int argc, const char* argv[])
2003-09-02 13:23:52 +02:00
{
if(!processArgs(argc, argv))
{
std::cerr << argv[0] << "\n"
<< " [(-ie|--input-encoding) input-encoding]\n"
<< " [(-oe|--output-encoding) output-encoding]\n"
<< " [(-i|--input) input-filename]\n"
<< " [(-o|--output) output-filename]"
<< std::endl;
return 0;
}
2003-09-02 13:23:52 +02:00
if(needWCharIntermediary)
wchar_transcode();
else
byte_transcode();
return 0;
} // main
void wchar_transcode()
{
#ifndef ARABICA_NO_WCHAR_T
2003-09-18 10:25:27 +02:00
int count = 0;
wchar_t c = iCharAdaptor.get();
while(!iCharAdaptor.eof())
2003-09-02 13:23:52 +02:00
{
2003-09-18 10:25:27 +02:00
oCharAdaptor << c;
if(count == 1024)
{
oCharAdaptor.flush();
oByteConvertor.flush();
count = 0;
} // if ...
c = iCharAdaptor.get();
}
oCharAdaptor.flush();
oByteConvertor.flush();
#endif
} // wchar_transcode
void byte_transcode()
{
int count = 0;
char c = iByteConvertor.get();
while(!iByteConvertor.eof())
{
oByteConvertor << c;
if(count == 1024)
{
oByteConvertor.flush();
count = 0;
} // if ...
c = iByteConvertor.get();
} // while
2003-09-18 10:25:27 +02:00
oByteConvertor.flush();
} // byte_transcode
2003-09-02 13:23:52 +02:00
bool processArgs(int argc, const char* argv[])
2003-09-18 10:25:27 +02:00
{
for(int i = 1; i < argc; ++i)
{
std::string io(argv[i]);
if(io == "-ie" || io == "--input-encoding")
2003-09-18 10:25:27 +02:00
{
if(!(imbueInput(i, argc, argv)))
return false;
}
else if(io == "-oe" || io == "--output-encoding")
2003-09-18 10:25:27 +02:00
{
if(!(imbueOutput(i, argc, argv)))
return false;
}
else if(io == "-i" || io == "--input")
{
++i;
if(i >= argc)
return false;
inputFile.open(argv[i], std::ios_base::in | std::ios_base::binary);
iByteConvertor.set_stream(inputFile);
}
else if(io == "-o" || io == "--output")
2003-09-18 10:25:27 +02:00
{
++i;
if(i >= argc)
return false;
outputFile.open(argv[i], std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
oByteConvertor.set_stream(outputFile);
}
else
return false;
}
return true;
} // processArgs
2005-03-30 13:04:30 +02:00
template<class Facet>
std::locale addFacet(const std::locale& base, Facet* facet)
{
#ifndef ARABICA_VS6_WORKAROUND
return std::locale(base, facet);
#else
return std::locale(std::_Addfac(base, facet));
#endif
} // addFacet
bool imbueInput(int& argn, int argc, const char* argv[])
{
++argn;
if(argn >= argc)
return false;
std::string cvt(argv[argn]);
2005-03-30 13:04:30 +02:00
if(cvt == "rot13")
2005-03-30 13:04:30 +02:00
iByteConvertor.imbue(addFacet(iByteConvertor.getloc(), new rot13codecvt()));
else if(cvt == "base64")
2005-03-30 13:04:30 +02:00
iByteConvertor.imbue(addFacet(iByteConvertor.getloc(), new base64codecvt()));
#ifndef ARABICA_NO_WCHAR_T
else if(cvt == "utf8")
2005-03-30 13:04:30 +02:00
iCharAdaptor.imbue(addFacet(iCharAdaptor.getloc(), new utf8ucs2codecvt()));
else if(cvt == "utf16be")
2005-03-30 13:04:30 +02:00
iCharAdaptor.imbue(addFacet(iCharAdaptor.getloc(), new utf16beucs2codecvt()));
else if(cvt == "utf16le")
2005-03-30 13:04:30 +02:00
iCharAdaptor.imbue(addFacet(iCharAdaptor.getloc(), new utf16leucs2codecvt()));
#endif
else
{
std::cerr << cvt << " is not a valid input encoding." << std::endl;
return false;
}
#ifndef ARABICA_NO_WCHAR_T
if((cvt == "utf8") ||
(cvt == "utf16be") ||
(cvt == "utf16le"))
needWCharIntermediary = true;
#endif
return true;
} // imbueInput
2005-03-30 13:04:30 +02:00
bool imbueOutput(int& argn, int argc, const char* argv[])
{
++argn;
if(argn >= argc)
return false;
std::string cvt(argv[argn]);
if(cvt == "rot13")
2005-03-30 13:04:30 +02:00
oByteConvertor.imbue(addFacet(oByteConvertor.getloc(), new rot13codecvt()));
else if(cvt == "base64")
2005-03-30 13:04:30 +02:00
oByteConvertor.imbue(addFacet(oByteConvertor.getloc(), new base64codecvt()));
#ifndef ARABICA_NO_WCHAR_T
else if(cvt == "utf8")
2005-03-30 13:04:30 +02:00
oCharAdaptor.imbue(addFacet(oCharAdaptor.getloc(), new utf8ucs2codecvt()));
else if(cvt == "utf16be")
2005-03-30 13:04:30 +02:00
oCharAdaptor.imbue(addFacet(oCharAdaptor.getloc(), new utf16beucs2codecvt()));
else if(cvt == "utf16le")
2005-03-30 13:04:30 +02:00
oCharAdaptor.imbue(addFacet(oCharAdaptor.getloc(), new utf16leucs2codecvt()));
#endif
else
{
std::cerr << cvt << " is not a valid output encoding." << std::endl;
return false;
}
#ifndef ARABICA_NO_WCHAR_T
if((cvt == "utf8") ||
(cvt == "utf16be") ||
(cvt == "utf16le"))
needWCharIntermediary = true;
#endif
return true;
} // imbueOutputs
2003-09-18 10:25:27 +02:00