Return libdxfrw internal text codec converter.

This commit is contained in:
Roman Telezhynskyi 2023-08-21 19:13:47 +03:00
parent c2fea54864
commit 5f091344fa
9 changed files with 62614 additions and 127 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,25 +1,26 @@
#include "drw_textcodec.h"
#include <sstream>
#include <iomanip>
#include "../drw_base.h"
#include "drw_cptable932.h"
#include "drw_cptable936.h"
#include "drw_cptable949.h"
#include "drw_cptable950.h"
#include "drw_cptables.h"
#include <QDebug>
#include <QString>
#include <algorithm>
#include <cstring>
#include <QString>
#include <QDebug>
#include "../drw_base.h"
#include "../vmisc/vabstractvalapplication.h"
#include "../ifc/exception/vexception.h"
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
#include "../vmisc/vtextcodec.h"
#else
#include <QTextCodec>
#endif
#include <iomanip>
#include <memory>
#include <sstream>
DRW_TextCodec::DRW_TextCodec()
: version(DRW::AC1021)
{}
: version(DRW::AC1021),
conv(new DRW_Converter(nullptr, 0))
{
}
void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat)
{
switch (v)
{
case DRW::UNKNOWNV:
@ -47,10 +48,10 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
case DRW::AC1018:
{
version = DRW::AC1015;
// if (cp.empty()) { //codepage not set, initialize
cp = "ANSI_1252";
setCodePage(cp, dxfFormat);
// }
// if (cp.empty()) { //codepage not set, initialize
cp = "ANSI_1252";
setCodePage(cp, dxfFormat);
// }
break;
}
case DRW::AC1021:
@ -61,11 +62,11 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
version = DRW::AC1021;
if (dxfFormat)
{
cp = "UTF-8";//RLZ: can be UCS2 or UTF-16 16bits per char
cp = "UTF-8"; // RLZ: can be UCS2 or UTF-16 16bits per char
}
else
{
cp = "UTF-16";//RLZ: can be UCS2 or UTF-16 16bits per char
cp = "UTF-16"; // RLZ: can be UCS2 or UTF-16 16bits per char
}
setCodePage(cp, dxfFormat);
break;
@ -75,134 +76,609 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
}
}
void DRW_TextCodec::setVersion(const std::string &v, bool dxfFormat){
void DRW_TextCodec::setVersion(const std::string &v, bool dxfFormat)
{
version = DRW::UNKNOWNV;
for (auto dwgVersionString : DRW::dwgVersionStrings)
{
if (std::strcmp( v.c_str(), dwgVersionString.first ) == 0)
if (std::strcmp(v.c_str(), dwgVersionString.first) == 0)
{
version = dwgVersionString.second;
setVersion( dwgVersionString.second, dxfFormat);
setVersion(dwgVersionString.second, dxfFormat);
break;
}
}
}
void DRW_TextCodec::setCodePage(const std::string &c, bool dxfFormat){
void DRW_TextCodec::setCodePage(const std::string &c, bool dxfFormat)
{
cp = correctCodePage(c);
if (version < DRW::AC1021)
conv.reset();
if (version == DRW::AC1009 || version == DRW::AC1015)
{
if (cp == "UTF-8")
if (cp == "ANSI_874")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table874, CPLENGTHCOMMON);
}
else if (cp == "ANSI_932")
{
conv = std::make_unique<DRW_Conv932Table>();
}
else if (cp == "ANSI_936")
{
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table936, DRW_LeadTable936, DRW_DoubleTable936, CPLENGTH936);
}
else if (cp == "ANSI_949")
{
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table949, DRW_LeadTable949, DRW_DoubleTable949, CPLENGTH949);
}
else if (cp == "ANSI_950")
{
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table950, DRW_LeadTable950, DRW_DoubleTable950, CPLENGTH950);
}
else if (cp == "ANSI_1250")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1250, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1251")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1251, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1253")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1253, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1254")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1254, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1255")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1255, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1256")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1256, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1257")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1257, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1258")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1258, CPLENGTHCOMMON);
}
else if (cp == "UTF-8")
{ // DXF older than 2007 are write in win codepages
cp = "ANSI_1252";
conv = std::make_unique<DRW_Converter>(nullptr, 0);
}
else
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1252, CPLENGTHCOMMON);
}
conv = DRW_TextCodec::CodecForName(QString::fromStdString(cp));
}
else
{
conv = DRW_TextCodec::CodecForName(dxfFormat ? QStringLiteral("UTF-8") : QStringLiteral("UTF-16"));
}
if (conv == nullptr)
{
const QString errorMsg = QCoreApplication::translate("DRW_TextCodec", "No available codec for code page '%1'.")
.arg(cp.c_str());
VAbstractApplication::VApp()->IsPedantic()
? throw VException(errorMsg) : qWarning() << VAbstractValApplication::warningMessageSignature + errorMsg;
if (version < DRW::AC1021 && cp == "UTF-8")
if (dxfFormat)
{
cp = "ANSI_1252";
conv = DRW_TextCodec::CodecForName(QString::fromStdString(cp)); // Fallback to latin
conv = std::make_unique<DRW_Converter>(nullptr, 0); // utf16 to utf8
}
else
{
conv = std::make_unique<DRW_ConvUTF16>(); // utf16 to utf8
}
}
}
auto DRW_TextCodec::DXFCodePageMap() -> QMap<QString, QStringList>
{
static auto map = QMap<QString, QStringList>
{
{"ANSI_874", {"ANSI_874", "CP874", "ISO8859-11", "TIS-620"}}, // Latin/Thai
{"ANSI_932", {"ANSI_932", "SHIFT-JIS", "SHIFT_JIS", "CSSHIFTJIS", "CSWINDOWS31J", "MS_KANJI", "X-MS-CP932",
"X-SJIS", "EUCJP", "EUC-JP", "CSEUCPKDFMTJAPANESE", "X-EUC", "X-EUC-JP", "CP932",
"JIS7"}}, // Japanese
{"ANSI_936", {"ANSI_936", "GBK", "GB2312", "CHINESE", "CN-GB", "CSGB2312", "CSGB231280", "CSISO58BG231280",
"GB_2312-80", "GB231280", "GB2312-80", "ISO-IR-58", "GB18030"}}, // Chinese PRC GBK (XGB) simplified
{"ANSI_949", {"ANSI_949", "EUCKR"}}, // Korean
{"ANSI_950", {"ANSI_950", "BIG5", "CN-BIG5", "CSBIG5", "X-X-BIG5",
"BIG5-HKSCS"}}, // Chinese Big5 (Taiwan, Hong Kong SAR)
{"ANSI_1250", {"ANSI_1250", "CP1250", "ISO8859-2"}}, //Central Europe and Eastern Europe
{"ANSI_1251", {"ANSI_1251", "CP1251", "ISO8859-5", "KOI8-R", "KOI8-U", "IBM 866"}}, // Cyrillic script
{"ANSI_1252", {"ANSI_1252", "CP1252", "LATIN1", "ISO-8859-1", "CP819", "CSISO", "IBM819", "L1",
"ISO_8859-1", "APPLE ROMAN", "ISO8859-1", "ISO8859-15", "ISO-IR-100", "IBM 850"}}, // Western Europe
{"ANSI_1253", {"ANSI_1253", "CP1253", "ISO8859-7"}}, // Greek
{"ANSI_1254", {"ANSI_1254", "CP1254", "ISO8859-9", "iso8859-3"}}, // Turkish
{"ANSI_1255", {"ANSI_1255", "CP1255", "ISO8859-8"}}, // Hebrew
{"ANSI_1256", {"ANSI_1256", "CP1256", "ISO8859-6"}}, // Arabic
{"ANSI_1257", {"ANSI_1257", "CP1257", "ISO8859-4", "ISO8859-10", "ISO8859-13"}}, // Baltic
{"ANSI_1258", {"ANSI_1258", "CP1258"}}, // Vietnamese
{"UTF-8", {"UTF-8", "UTF8", "UTF8-BIT"}},
{"UTF-16", {"UTF-16", "UTF16", "UTF16-BIT"}},
};
return map;
}
auto DRW_TextCodec::CodecForName(const QString &name) -> VTextCodec *
{
QMap<QString, QStringList> knownCodecs = DXFCodePageMap();
if (knownCodecs.contains(name))
{
QStringList aliases = knownCodecs.value(name);
for (auto &alias : aliases)
{
if (VTextCodec *codec = VTextCodec::codecForName(alias.toLatin1()))
{
return codec;
}
}
}
return nullptr;
}
auto DRW_TextCodec::toUtf8(const std::string &s) -> std::string
{
if (conv == nullptr)
{
return s;
}
const QString encodedString = conv->toUnicode(s.c_str());
return encodedString.toStdString();
return conv->toUtf8(s);
}
auto DRW_TextCodec::fromUtf8(const std::string &s) -> std::string
{
if (conv == nullptr)
{
return s;
}
const QByteArray encodedString = conv->fromUnicode(QString::fromStdString(s));
return std::string(encodedString.constData());
return conv->fromUtf8(s);
}
auto DRW_TextCodec::correctCodePage(const std::string& s) -> std::string {
//stringstream cause crash in OS/X, bug#3597944
QString codePage = QString::fromStdString(s);
codePage = codePage.toUpper();
QMap<QString, QStringList> codeMap = DRW_TextCodec::DXFCodePageMap();
auto DRW_TextCodec::correctCodePage(const std::string &s) -> std::string
{
// stringstream cause crash in OS/X, bug#3597944
std::string cp = s;
transform(cp.begin(), cp.end(), cp.begin(), toupper);
auto i = codeMap.constBegin();
while (i != codeMap.constEnd())
static auto map = QMap<std::string, QSet<std::string>>{
// Latin/Thai
{"ANSI_874", {"ANSI_874", "CP874", "ISO8859-11", "TIS-620"}}, // Central Europe and Eastern Europe
{"ANSI_1250", {"ANSI_1250", "CP1250", "ISO8859-2"}}, // Cyrillic script
{"ANSI_1251", {"ANSI_1251", "CP1251", "ISO8859-5", "KOI8-R", "KOI8-U", "IBM 866"}}, // Western Europe
{"ANSI_1252",
{"ANSI_1252", "CP1252", "LATIN1", "ISO-8859-1", "CP819", "CSISO", "IBM819", "ISO_8859-1", "APPLE ROMAN",
"ISO8859-1", "ISO8859-15", "ISO-IR-100", "L1", "IBM 850"}}, // Greek
{"ANSI_1253", {"ANSI_1253", "CP1253", "ISO8859-7"}}, // Turkish
{"ANSI_1254", {"ANSI_1254", "CP1254", "ISO8859-9", "ISO8859-3"}}, // Hebrew
{"ANSI_1255", {"ANSI_1255", "CP1255", "ISO8859-8"}}, // Arabic
{"ANSI_1256", {"ANSI_1256", "CP1256", "ISO8859-6"}}, // Baltic
{"ANSI_1257", {"ANSI_1257", "CP1257", "ISO8859-4", "ISO8859-10", "ISO8859-13"}}, // Vietnamese
{"ANSI_1258", {"ANSI_1258", "CP1258"}}, // Japanese
{"ANSI_932",
{"ANSI_932", "SHIFT-JIS", "SHIFT_JIS", "CSSHIFTJIS", "CSWINDOWS31J", "MS_KANJI", "X-MS-CP932", "X-SJIS",
"EUCJP", "EUC-JP", "CSEUCPKDFMTJAPANESE", "X-EUC", "X-EUC-JP", "JIS7"}}, // Chinese PRC GBK (XGB) simplified
{"ANSI_936",
{"ANSI_936", "GBK", "GB2312", "CHINESE", "CN-GB", "CSGB2312", "CSGB231280", "CSISO58BG231280", "GB_2312-80",
"GB231280", "GB2312-80", "ISO-IR-58", "GB18030"}}, // Korean
{"ANSI_949", {"ANSI_949", "EUCKR"}}, // Chinese Big5 (Taiwan, Hong Kong SAR)
{"ANSI_950", {"ANSI_950", "BIG5", "CN-BIG5", "CSBIG5", "X-X-BIG5", "BIG5-HKSCS"}}, // celtic
// {"ISO8859-14", {"ISO8859-14"}},
// {"TSCII", {"TSCII"}},//tamil
{"UTF-8", {"UTF-8", "UTF8", "UTF8-BIT"}},
{"UTF-16", {"UTF-16", "UTF16", "UTF16-BIT"}}};
auto i = map.constBegin();
while (i != map.constEnd())
{
if (i.value().contains(codePage))
if (i.value().contains(cp))
{
return i.key().toStdString();
return i.key();
}
++i;
}
return "ANSI_1252";
}
auto DRW_Converter::toUtf8(const std::string &s) -> std::string
{
std::string result;
unsigned int j = 0;
unsigned int i = 0;
for (i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c < 0x80)
{ // ascii check for /U+????
if (c == '\\' && i + 6 < s.length() && s.at(i + 1) == 'U' && s.at(i + 2) == '+')
{
result += s.substr(j, i - j);
result += encodeText(s.substr(i, 7));
i += 6;
j = i + 1;
}
}
else if (c < 0xE0)
{ // 2 bits
i++;
}
else if (c < 0xF0)
{ // 3 bits
i += 2;
}
else if (c < 0xF8)
{ // 4 bits
i += 3;
}
}
result += s.substr(j);
return result;
}
auto DRW_ConvTable::fromUtf8(const std::string &s) -> std::string
{
std::string result;
bool notFound;
int code;
unsigned int j = 0;
for (unsigned int i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c > 0x7F)
{ // need to decode
result += s.substr(j, i - j);
std::string part1 = s.substr(i, 4);
unsigned int l;
code = decodeNum(part1, &l);
j = i + l;
i = j - 1;
notFound = true;
for (int k = 0; k < cpLength; k++)
{
if (table[k] == code)
{
result += static_cast<char>(CPOFFSET + k); // translate from table
notFound = false;
break;
}
}
if (notFound)
{
result += decodeText(code);
}
}
}
result += s.substr(j);
return result;
}
auto DRW_ConvTable::toUtf8(const std::string &s) -> std::string
{
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
auto c = static_cast<unsigned char>(*it);
if (c < 0x80)
{
// check for \U+ encoded text
if (c == '\\')
{
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
{
res += encodeText(std::string(it, it + 7));
it += 6;
}
else
{
res += static_cast<char>(c); // no \U+ encoded text write
}
}
else
{
res += static_cast<char>(c); // c!='\' ascii char write
}
}
else
{ // end c < 0x80
res += encodeNum(table[c - 0x80]); // translate from table
}
} // end for
return res;
}
auto DRW_Converter::encodeText(const std::string &stmp) -> std::string
{
int code;
#if defined(__APPLE__)
int Succeeded = sscanf(&(stmp.substr(3, 4)[0]), "%x", &code);
if (!Succeeded || Succeeded == EOF)
code = 0;
#else
std::istringstream sd(stmp.substr(3, 4));
sd >> std::hex >> code;
#endif
return encodeNum(code);
}
auto DRW_Converter::decodeText(int c) -> std::string
{
std::string res = "\\U+";
std::string num;
#if defined(__APPLE__)
std::string str(16, '\0');
snprintf(&(str[0]), 16, "%04X", c);
num = str;
#else
std::stringstream ss;
ss << std::uppercase << std::setfill('0') << std::setw(4) << std::hex << c;
ss >> num;
#endif
res += num;
return res;
}
auto DRW_Converter::encodeNum(int c) -> std::string
{
unsigned char ret[5];
if (c < 128)
{ // 0-7F US-ASCII 7 bits
ret[0] = static_cast<unsigned char>(c);
ret[1] = 0;
}
else if (c < 0x800)
{ // 80-07FF 2 bytes
ret[0] = static_cast<unsigned char>(0xC0 | (c >> 6));
ret[1] = 0x80 | (c & 0x3f);
ret[2] = 0;
}
else if (c < 0x10000)
{ // 800-FFFF 3 bytes
ret[0] = static_cast<unsigned char>(0xe0 | (c >> 12));
ret[1] = 0x80 | ((c >> 6) & 0x3f);
ret[2] = 0x80 | (c & 0x3f);
ret[3] = 0;
}
else
{ // 10000-10FFFF 4 bytes
ret[0] = static_cast<unsigned char>(0xf0 | (c >> 18));
ret[1] = 0x80 | ((c >> 12) & 0x3f);
ret[2] = 0x80 | ((c >> 6) & 0x3f);
ret[3] = 0x80 | (c & 0x3f);
ret[4] = 0;
}
return {reinterpret_cast<char *>(ret)};
}
/** 's' is a string with at least 4 bytes length
** returned 'b' is byte length of encoded char: 2,3 or 4
**/
auto DRW_Converter::decodeNum(const std::string &s, unsigned int *b) -> int
{
int code = 0;
auto c = static_cast<unsigned char>(s.at(0));
if ((c & 0xE0) == 0xC0)
{ // 2 bytes
code = (c & 0x1F) << 6;
code = (s.at(1) & 0x3F) | code;
*b = 2;
}
else if ((c & 0xF0) == 0xE0)
{ // 3 bytes
code = (c & 0x0F) << 12;
code = ((s.at(1) & 0x3F) << 6) | code;
code = (s.at(2) & 0x3F) | code;
*b = 3;
}
else if ((c & 0xF8) == 0xF0)
{ // 4 bytes
code = (c & 0x07) << 18;
code = ((s.at(1) & 0x3F) << 12) | code;
code = ((s.at(2) & 0x3F) << 6) | code;
code = (s.at(3) & 0x3F) | code;
*b = 4;
}
return code;
}
auto DRW_ConvDBCSTable::fromUtf8(const std::string &s) -> std::string
{
std::string result;
bool notFound;
int code;
unsigned int j = 0;
for (unsigned int i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c > 0x7F)
{ // need to decode
result += s.substr(j, i - j);
std::string part1 = s.substr(i, 4);
unsigned int l;
code = decodeNum(part1, &l);
j = i + l;
i = j - 1;
notFound = true;
for (int k = 0; k < cpLength; k++)
{
if (doubleTable[k][1] == code)
{
int data = doubleTable[k][0];
char d[3];
d[0] = static_cast<char>(data >> 8);
d[1] = static_cast<char>(data & 0xFF);
d[2] = '\0';
result += d; // translate from table
notFound = false;
break;
}
}
if (notFound)
{
result += decodeText(code);
}
} // direct conversion
}
result += s.substr(j);
return result;
}
auto DRW_ConvDBCSTable::toUtf8(const std::string &s) -> std::string
{
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
bool notFound = true;
auto c = static_cast<unsigned char>(*it);
if (c < 0x80)
{
notFound = false;
// check for \U+ encoded text
if (c == '\\')
{
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
{
res += encodeText(std::string(it, it + 7));
it += 6;
}
else
{
res += static_cast<char>(c); // no \U+ encoded text write
}
}
else
res += static_cast<char>(c); // c!='\' ascii char write
}
else if (c == 0x80)
{ // 1 byte table
notFound = false;
res += encodeNum(0x20AC); // euro sign
}
else
{ // 2 bytes
++it;
int code = (c << 8) | static_cast<unsigned char>(*it);
int sta = leadTable[c - 0x81];
int end = leadTable[c - 0x80];
for (int k = sta; k < end; k++)
{
if (doubleTable[k][0] == code)
{
res += encodeNum(doubleTable[k][1]); // translate from table
notFound = false;
break;
}
}
}
// not found
if (notFound)
{
res += encodeNum(NOTFOUND936);
}
} // end for
return res;
}
DRW_Conv932Table::DRW_Conv932Table()
: DRW_Converter(DRW_Table932, CPLENGTH932)
{
}
auto DRW_Conv932Table::fromUtf8(const std::string &s) -> std::string
{
std::string result;
bool notFound;
int code;
unsigned int j = 0;
for (unsigned int i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c > 0x7F)
{ // need to decode
result += s.substr(j, i - j);
std::string part1 = s.substr(i, 4);
unsigned int l;
code = decodeNum(part1, &l);
j = i + l;
i = j - 1;
notFound = true;
// 1 byte table
if (code > 0xff60 && code < 0xFFA0)
{
result += static_cast<char>(code - CPOFFSET932); // translate from table
notFound = false;
}
if (notFound &&
(code < 0xF8 || (code > 0x390 && code < 0x542) || (code > 0x200F && code < 0x9FA1) || code > 0xF928))
{
for (int k = 0; k < cpLength; k++)
{
if (DRW_DoubleTable932[k][1] == code)
{
int data = DRW_DoubleTable932[k][0];
char d[3];
d[0] = static_cast<char>(data >> 8);
d[1] = static_cast<char>(data & 0xFF);
d[2] = '\0';
result += d; // translate from table
notFound = false;
break;
}
}
}
if (notFound)
result += decodeText(code);
} // direct conversion
}
result += s.substr(j);
return result;
}
auto DRW_Conv932Table::toUtf8(const std::string &s) -> std::string
{
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
bool notFound = true;
auto c = static_cast<unsigned char>(*it);
if (c < 0x80)
{
notFound = false;
// check for \U+ encoded text
if (c == '\\')
{
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
{
res += encodeText(std::string(it, it + 7));
it += 6;
}
else
{
res += static_cast<char>(c); // no \U+ encoded text write
}
}
else
{
res += static_cast<char>(c); // c!='\' ascii char write
}
}
else if (c > 0xA0 && c < 0xE0)
{ // 1 byte table
notFound = false;
res += encodeNum(c + CPOFFSET932); // translate from table
}
else
{ // 2 bytes
++it;
int code = (c << 8) | static_cast<unsigned char>(*it);
int sta = 0;
int end = 0;
if (c > 0x80 && c < 0xA0)
{
sta = DRW_LeadTable932[c - 0x81];
end = DRW_LeadTable932[c - 0x80];
}
else if (c > 0xDF && c < 0xFD)
{
sta = DRW_LeadTable932[c - 0xC1];
end = DRW_LeadTable932[c - 0xC0];
}
if (end > 0)
{
for (int k = sta; k < end; k++)
{
if (DRW_DoubleTable932[k][0] == code)
{
res += encodeNum(DRW_DoubleTable932[k][1]); // translate from table
notFound = false;
break;
}
}
}
}
// not found
if (notFound)
{
res += encodeNum(NOTFOUND932);
}
} // end for
return res;
}
auto DRW_ConvUTF16::fromUtf8(const std::string &s) -> std::string
{
DRW_UNUSED(s);
// RLZ: to be written (only needed for write dwg 2007+)
return {};
}
auto DRW_ConvUTF16::toUtf8(const std::string &s) -> std::string
{ // RLZ: pending to write
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
auto c1 = static_cast<unsigned char>(*it);
auto c2 = static_cast<unsigned char>(*(++it));
auto ch = static_cast<duint16>((c2 << 8) | c1);
res += encodeNum(ch);
} // end for
return res;
}

View file

@ -1,26 +1,23 @@
#ifndef DRW_TEXTCODEC_H
#define DRW_TEXTCODEC_H
#include <string>
#include <QtGlobal>
#include "../drw_base.h"
#include <QtCore/qcontainerfwd.h>
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
#include "../vmisc/vtextcodec.h"
#else
#include "../vmisc/defglobal.h"
#include <QTextCodec>
#endif
#include <QtGlobal>
#include <memory>
#include <string>
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
class QStringList;
#endif
class DRW_Converter;
class DRW_TextCodec
{
public:
DRW_TextCodec();
~DRW_TextCodec() = default;
auto fromUtf8(const std::string &s) -> std::string;
auto toUtf8(const std::string &s) -> std::string;
auto getVersion() const -> int { return version; }
@ -29,18 +26,84 @@ public:
void setCodePage(const std::string &c, bool dxfFormat);
auto getCodePage() const -> std::string { return cp; }
static auto DXFCodePageMap() -> QMap<QString, QStringList>;
static auto CodecForName(const QString &name) -> VTextCodec *;
private:
static auto correctCodePage(const std::string& s) -> std::string;
static auto correctCodePage(const std::string &s) -> std::string;
private:
// cppcheck-suppress unknownMacro
Q_DISABLE_COPY_MOVE(DRW_TextCodec) // NOLINT
DRW::Version version{DRW::UNKNOWNV};
std::string cp{};
VTextCodec *conv{nullptr};
std::unique_ptr<DRW_Converter> conv;
};
class DRW_Converter
{
public:
DRW_Converter(const int *t, int l)
: table{t},
cpLength{l}
{
}
virtual ~DRW_Converter() = default;
virtual auto fromUtf8(const std::string &s) -> std::string { return s; }
virtual auto toUtf8(const std::string &s) -> std::string;
static auto encodeText(const std::string &stmp) -> std::string;
static auto decodeText(int c) -> std::string;
static auto encodeNum(int c) -> std::string;
static auto decodeNum(const std::string &s, unsigned int *b) -> int;
const int *table{nullptr};
int cpLength;
};
class DRW_ConvUTF16 : public DRW_Converter
{
public:
DRW_ConvUTF16()
: DRW_Converter(nullptr, 0)
{
}
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
};
class DRW_ConvTable : public DRW_Converter
{
public:
DRW_ConvTable(const int *t, int l)
: DRW_Converter(t, l)
{
}
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
};
class DRW_ConvDBCSTable : public DRW_Converter
{
public:
DRW_ConvDBCSTable(const int *t, const int *lt, const int dt[][2], int l)
: DRW_Converter(t, l),
leadTable{lt},
doubleTable{dt}
{
}
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
private:
const int *leadTable{nullptr};
const int (*doubleTable)[2];
};
class DRW_Conv932Table : public DRW_Converter
{
public:
DRW_Conv932Table();
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
};
#endif // DRW_TEXTCODEC_H

View file

@ -9,6 +9,11 @@ SOURCES += \
$$PWD/libdxfrw/intern/drw_textcodec.cpp \
$$PWD/libdxfrw/intern/dxfreader.cpp \
$$PWD/libdxfrw/intern/dxfwriter.cpp \
$$PWD/libdxfrw/intern/drw_cptable932.h \
$$PWD/libdxfrw/intern/drw_cptable936.h \
$$PWD/libdxfrw/intern/drw_cptable949.h \
$$PWD/libdxfrw/intern/drw_cptable950.h \
$$PWD/libdxfrw/intern/drw_cptables.h \
$$PWD/libdxfrw/drw_classes.cpp \
$$PWD/libdxfrw/drw_entities.cpp \
$$PWD/libdxfrw/drw_header.cpp \

View file

@ -21,6 +21,11 @@ VLib {
prefix: "libdxfrw/"
files: [
"drw_base.cpp",
"intern/drw_cptable932.h",
"intern/drw_cptable936.h",
"intern/drw_cptable949.h",
"intern/drw_cptable950.h",
"intern/drw_cptables.h",
"intern/drw_dbg.cpp",
"intern/drw_textcodec.cpp",
"intern/dxfreader.cpp",