mirror of
https://github.com/mamedev/mame.git
synced 2024-11-18 10:06:19 +01:00
Vas Crabb feedback: Fixed Latin-1 C1 space, changed charconverter prototype to not dictate std::string and fixed invalid encoding issue
This commit is contained in:
parent
34048b54f6
commit
46ddd46f25
2 changed files with 45 additions and 36 deletions
|
@ -13,44 +13,50 @@
|
|||
#include "unicode.h"
|
||||
#include "coretmpl.h"
|
||||
|
||||
static const char32_t iso_8859_1_code_page[128] =
|
||||
{
|
||||
// 0x80 - 0x8F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// 0x90 - 0x9F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// 0xA0 - 0xAF
|
||||
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
|
||||
// 0xB0 - 0xBF
|
||||
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
|
||||
// 0xC0 - 0xCF
|
||||
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
|
||||
// 0xD0 - 0xDF
|
||||
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
|
||||
// 0xE0 - 0xEF
|
||||
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
|
||||
// 0xF0 - 0xFF
|
||||
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
|
||||
};
|
||||
|
||||
imgtool::simple_charconverter imgtool::charconverter_iso_8859_1(nullptr, iso_8859_1_code_page);
|
||||
imgtool::simple_charconverter imgtool::charconverter_iso_8859_1(nullptr, nullptr);
|
||||
|
||||
|
||||
//-------------------------------------------------
|
||||
// from_utf8
|
||||
//-------------------------------------------------
|
||||
|
||||
void imgtool::simple_charconverter::from_utf8(std::ostream &dest, const std::string &src) const
|
||||
void imgtool::charconverter::from_utf8(std::ostream &dest, const std::string &src) const
|
||||
{
|
||||
from_utf8(dest, src.c_str(), src.size());
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------
|
||||
// to_utf8
|
||||
//-------------------------------------------------
|
||||
|
||||
void imgtool::charconverter::to_utf8(std::ostream &dest, const std::string &src) const
|
||||
{
|
||||
to_utf8(dest, src.c_str(), src.size());
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------
|
||||
// from_utf8
|
||||
//-------------------------------------------------
|
||||
|
||||
void imgtool::simple_charconverter::from_utf8(std::ostream &dest, const char *src, size_t src_length) const
|
||||
{
|
||||
// normalize the incoming unicode
|
||||
std::string normalized_src = normalize_unicode(src, m_norm);
|
||||
std::string normalized_src = normalize_unicode(src, src_length, m_norm);
|
||||
|
||||
auto iter = normalized_src.begin();
|
||||
while(iter != normalized_src.end())
|
||||
{
|
||||
// get the next character
|
||||
char32_t ch;
|
||||
iter += uchar_from_utf8(&ch, &*iter, normalized_src.end() - iter);
|
||||
int rc = uchar_from_utf8(&ch, &*iter, normalized_src.end() - iter);
|
||||
if (rc < 0)
|
||||
{
|
||||
ch = 0xFFFD;
|
||||
rc = 1;
|
||||
}
|
||||
iter += rc;
|
||||
|
||||
// look in all pages
|
||||
const char32_t *pages[2];
|
||||
|
@ -93,24 +99,24 @@ void imgtool::simple_charconverter::from_utf8(std::ostream &dest, const std::str
|
|||
// to_utf8
|
||||
//-------------------------------------------------
|
||||
|
||||
void imgtool::simple_charconverter::to_utf8(std::ostream &dest, const std::string &src) const
|
||||
void imgtool::simple_charconverter::to_utf8(std::ostream &dest, const char *src, size_t src_length) const
|
||||
{
|
||||
for (auto iter = src.begin(); iter != src.end(); iter++)
|
||||
for (size_t i = 0; i < src_length; i++)
|
||||
{
|
||||
// which page is this in?
|
||||
const char32_t *page = ((*iter & 0x80) == 0) ? m_lowpage : m_highpage;
|
||||
const char32_t *page = ((src[i] & 0x80) == 0) ? m_lowpage : m_highpage;
|
||||
|
||||
// is this page present?
|
||||
if ((*iter & 0x80) == 0)
|
||||
if ((src[i] & 0x80) == 0)
|
||||
{
|
||||
// no - pass it on
|
||||
dest << *iter;
|
||||
dest << src[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
// yes - we need to do a lookup
|
||||
size_t base = ((*iter & 0x80) == 0) ? 0x00 : 0x80;
|
||||
char32_t ch = page[((unsigned char)(*iter)) - base];
|
||||
size_t base = ((src[i] & 0x80) == 0) ? 0x00 : 0x80;
|
||||
char32_t ch = page[((unsigned char)(src[i])) - base];
|
||||
if (ch == 0)
|
||||
throw charconverter_exception();
|
||||
|
||||
|
|
|
@ -22,9 +22,12 @@ namespace imgtool
|
|||
class charconverter
|
||||
{
|
||||
public:
|
||||
virtual void from_utf8(std::ostream &dest, const std::string &src) const = 0;
|
||||
virtual void to_utf8(std::ostream &dest, const std::string &src) const = 0;
|
||||
|
||||
virtual void from_utf8(std::ostream &dest, const char *src, size_t src_length) const = 0;
|
||||
virtual void to_utf8(std::ostream &dest, const char *src, size_t src_length) const = 0;
|
||||
|
||||
void from_utf8(std::ostream &dest, const std::string &src) const;
|
||||
void to_utf8(std::ostream &dest, const std::string &src) const;
|
||||
|
||||
std::string from_utf8(const std::string &src) const
|
||||
{
|
||||
// inlining so that the return value can potentially be removed by return value optimization
|
||||
|
@ -59,8 +62,8 @@ namespace imgtool
|
|||
{
|
||||
}
|
||||
|
||||
virtual void from_utf8(std::ostream &dest, const std::string &src) const override;
|
||||
virtual void to_utf8(std::ostream &dest, const std::string &src) const override;
|
||||
virtual void from_utf8(std::ostream &dest, const char *src, size_t src_length) const override;
|
||||
virtual void to_utf8(std::ostream &dest, const char *src, size_t src_length) const override;
|
||||
|
||||
private:
|
||||
unicode_normalization_form m_norm;
|
||||
|
|
Loading…
Reference in a new issue