mirror of
https://github.com/jezhiggins/arabica
synced 2025-01-01 06:20:38 +01:00
227 lines
5.5 KiB
C++
227 lines
5.5 KiB
C++
// transcode.cpp
|
|
//
|
|
// Warning! Contains ifdef hackery
|
|
//
|
|
///////////////////////////////////
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable: 4244 4996)
|
|
#endif
|
|
|
|
#include <iostream>
|
|
#include <Utils/convert_adaptor.hpp>
|
|
#include <Utils/utf8iso88591codecvt.hpp>
|
|
#include <Utils/iso88591utf8codecvt.hpp>
|
|
#include <Utils/rot13codecvt.hpp>
|
|
#include <Utils/base64codecvt.hpp>
|
|
#include <Utils/utf8ucs2codecvt.hpp>
|
|
#include <Utils/utf16utf8codecvt.hpp>
|
|
#include <Utils/utf16beucs2codecvt.hpp>
|
|
#include <Utils/utf16leucs2codecvt.hpp>
|
|
|
|
#include <fstream>
|
|
std::ifstream inputFile;
|
|
std::ofstream outputFile;
|
|
|
|
using namespace Arabica::convert;
|
|
iconvert_adaptor<char> iByteConvertor(std::cin);
|
|
oconvert_adaptor<char> oByteConvertor(std::cout);
|
|
bool needWCharIntermediary = false;
|
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
typedef iconvert_adaptor<wchar_t, std::char_traits<wchar_t>, char, std::char_traits<char> > Widener;
|
|
typedef oconvert_adaptor<wchar_t, std::char_traits<wchar_t>, char, std::char_traits<char> > Narrower;
|
|
|
|
Widener iCharAdaptor(iByteConvertor);
|
|
Narrower oCharAdaptor(oByteConvertor);
|
|
#endif
|
|
|
|
void wchar_transcode();
|
|
void byte_transcode();
|
|
bool processArgs(int argc, const char* argv[]);
|
|
bool imbueInput(int& argn, int argc, const char* argv[]);
|
|
bool imbueOutput(int& argn, int argc, const char* argv[]);
|
|
|
|
int main(int argc, const char* argv[])
|
|
{
|
|
if(!processArgs(argc, argv))
|
|
{
|
|
std::cerr << argv[0] << "\n"
|
|
<< " [(-ie|--input-encoding) input-encoding]\n"
|
|
<< " [(-oe|--output-encoding) output-encoding]\n"
|
|
<< " [(-i|--input) input-filename]\n"
|
|
<< " [(-o|--output) output-filename]"
|
|
<< std::endl;
|
|
return 0;
|
|
}
|
|
|
|
if(needWCharIntermediary)
|
|
wchar_transcode();
|
|
else
|
|
byte_transcode();
|
|
|
|
return 0;
|
|
} // main
|
|
|
|
void wchar_transcode()
|
|
{
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
int count = 0;
|
|
wchar_t c = iCharAdaptor.get();
|
|
while(!iCharAdaptor.eof())
|
|
{
|
|
oCharAdaptor << c;
|
|
|
|
if(count == 1024)
|
|
{
|
|
oCharAdaptor.flush();
|
|
oByteConvertor.flush();
|
|
count = 0;
|
|
} // if ...
|
|
|
|
c = iCharAdaptor.get();
|
|
}
|
|
oCharAdaptor.flush();
|
|
oByteConvertor.flush();
|
|
#endif
|
|
} // wchar_transcode
|
|
|
|
void byte_transcode()
|
|
{
|
|
int count = 0;
|
|
char c = iByteConvertor.get();
|
|
while(!iByteConvertor.eof())
|
|
{
|
|
oByteConvertor << c;
|
|
|
|
if(count == 1024)
|
|
{
|
|
oByteConvertor.flush();
|
|
count = 0;
|
|
} // if ...
|
|
|
|
c = iByteConvertor.get();
|
|
} // while
|
|
|
|
oByteConvertor.flush();
|
|
} // byte_transcode
|
|
|
|
bool processArgs(int argc, const char* argv[])
|
|
{
|
|
for(int i = 1; i < argc; ++i)
|
|
{
|
|
std::string io(argv[i]);
|
|
|
|
if(io == "-ie" || io == "--input-encoding")
|
|
{
|
|
if(!(imbueInput(i, argc, argv)))
|
|
return false;
|
|
}
|
|
else if(io == "-oe" || io == "--output-encoding")
|
|
{
|
|
if(!(imbueOutput(i, argc, argv)))
|
|
return false;
|
|
}
|
|
else if(io == "-i" || io == "--input")
|
|
{
|
|
++i;
|
|
if(i >= argc)
|
|
return false;
|
|
inputFile.open(argv[i], std::ios_base::in | std::ios_base::binary);
|
|
iByteConvertor.set_stream(inputFile);
|
|
}
|
|
else if(io == "-o" || io == "--output")
|
|
{
|
|
++i;
|
|
if(i >= argc)
|
|
return false;
|
|
outputFile.open(argv[i], std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
|
|
oByteConvertor.set_stream(outputFile);
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
return true;
|
|
} // processArgs
|
|
|
|
template<class Facet>
|
|
std::locale addFacet(const std::locale& base, Facet* facet)
|
|
{
|
|
#ifndef ARABICA_VS6_WORKAROUND
|
|
return std::locale(base, facet);
|
|
#else
|
|
return std::locale(std::_Addfac(base, facet));
|
|
#endif
|
|
} // addFacet
|
|
|
|
bool imbueInput(int& argn, int argc, const char* argv[])
|
|
{
|
|
++argn;
|
|
if(argn >= argc)
|
|
return false;
|
|
|
|
std::string cvt(argv[argn]);
|
|
|
|
if(cvt == "rot13")
|
|
iByteConvertor.imbue(addFacet(iByteConvertor.getloc(), new rot13codecvt()));
|
|
else if(cvt == "base64")
|
|
iByteConvertor.imbue(addFacet(iByteConvertor.getloc(), new base64codecvt()));
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
else if(cvt == "utf8")
|
|
iCharAdaptor.imbue(addFacet(iCharAdaptor.getloc(), new utf8ucs2codecvt()));
|
|
else if(cvt == "utf16be")
|
|
iCharAdaptor.imbue(addFacet(iCharAdaptor.getloc(), new utf16beucs2codecvt()));
|
|
else if(cvt == "utf16le")
|
|
iCharAdaptor.imbue(addFacet(iCharAdaptor.getloc(), new utf16leucs2codecvt()));
|
|
#endif
|
|
else
|
|
{
|
|
std::cerr << cvt << " is not a valid input encoding." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
if((cvt == "utf8") ||
|
|
(cvt == "utf16be") ||
|
|
(cvt == "utf16le"))
|
|
needWCharIntermediary = true;
|
|
#endif
|
|
|
|
return true;
|
|
} // imbueInput
|
|
|
|
bool imbueOutput(int& argn, int argc, const char* argv[])
|
|
{
|
|
++argn;
|
|
if(argn >= argc)
|
|
return false;
|
|
|
|
std::string cvt(argv[argn]);
|
|
if(cvt == "rot13")
|
|
oByteConvertor.imbue(addFacet(oByteConvertor.getloc(), new rot13codecvt()));
|
|
else if(cvt == "base64")
|
|
oByteConvertor.imbue(addFacet(oByteConvertor.getloc(), new base64codecvt()));
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
else if(cvt == "utf8")
|
|
oCharAdaptor.imbue(addFacet(oCharAdaptor.getloc(), new utf8ucs2codecvt()));
|
|
else if(cvt == "utf16be")
|
|
oCharAdaptor.imbue(addFacet(oCharAdaptor.getloc(), new utf16beucs2codecvt()));
|
|
else if(cvt == "utf16le")
|
|
oCharAdaptor.imbue(addFacet(oCharAdaptor.getloc(), new utf16leucs2codecvt()));
|
|
#endif
|
|
else
|
|
{
|
|
std::cerr << cvt << " is not a valid output encoding." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
#ifndef ARABICA_NO_WCHAR_T
|
|
if((cvt == "utf8") ||
|
|
(cvt == "utf16be") ||
|
|
(cvt == "utf16le"))
|
|
needWCharIntermediary = true;
|
|
#endif
|
|
|
|
return true;
|
|
} // imbueOutputs
|
|
|