Return libdxfrw internal text codec converter.

This commit is contained in:
Roman Telezhynskyi 2023-08-21 19:13:47 +03:00
parent c2fea54864
commit 5f091344fa
9 changed files with 62614 additions and 127 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,25 +1,26 @@
#include "drw_textcodec.h" #include "drw_textcodec.h"
#include <sstream> #include "../drw_base.h"
#include <iomanip> #include "drw_cptable932.h"
#include "drw_cptable936.h"
#include "drw_cptable949.h"
#include "drw_cptable950.h"
#include "drw_cptables.h"
#include <QDebug>
#include <QString>
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include <QString> #include <iomanip>
#include <QDebug> #include <memory>
#include "../drw_base.h" #include <sstream>
#include "../vmisc/vabstractvalapplication.h"
#include "../ifc/exception/vexception.h"
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
#include "../vmisc/vtextcodec.h"
#else
#include <QTextCodec>
#endif
DRW_TextCodec::DRW_TextCodec() DRW_TextCodec::DRW_TextCodec()
: version(DRW::AC1021) : version(DRW::AC1021),
{} conv(new DRW_Converter(nullptr, 0))
{
}
void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat)
{
switch (v) switch (v)
{ {
case DRW::UNKNOWNV: case DRW::UNKNOWNV:
@ -47,10 +48,10 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
case DRW::AC1018: case DRW::AC1018:
{ {
version = DRW::AC1015; version = DRW::AC1015;
// if (cp.empty()) { //codepage not set, initialize // if (cp.empty()) { //codepage not set, initialize
cp = "ANSI_1252"; cp = "ANSI_1252";
setCodePage(cp, dxfFormat); setCodePage(cp, dxfFormat);
// } // }
break; break;
} }
case DRW::AC1021: case DRW::AC1021:
@ -61,11 +62,11 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
version = DRW::AC1021; version = DRW::AC1021;
if (dxfFormat) if (dxfFormat)
{ {
cp = "UTF-8";//RLZ: can be UCS2 or UTF-16 16bits per char cp = "UTF-8"; // RLZ: can be UCS2 or UTF-16 16bits per char
} }
else else
{ {
cp = "UTF-16";//RLZ: can be UCS2 or UTF-16 16bits per char cp = "UTF-16"; // RLZ: can be UCS2 or UTF-16 16bits per char
} }
setCodePage(cp, dxfFormat); setCodePage(cp, dxfFormat);
break; break;
@ -75,134 +76,609 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
} }
} }
void DRW_TextCodec::setVersion(const std::string &v, bool dxfFormat){ void DRW_TextCodec::setVersion(const std::string &v, bool dxfFormat)
{
version = DRW::UNKNOWNV; version = DRW::UNKNOWNV;
for (auto dwgVersionString : DRW::dwgVersionStrings) for (auto dwgVersionString : DRW::dwgVersionStrings)
{ {
if (std::strcmp( v.c_str(), dwgVersionString.first ) == 0) if (std::strcmp(v.c_str(), dwgVersionString.first) == 0)
{ {
version = dwgVersionString.second; version = dwgVersionString.second;
setVersion( dwgVersionString.second, dxfFormat); setVersion(dwgVersionString.second, dxfFormat);
break; break;
} }
} }
} }
void DRW_TextCodec::setCodePage(const std::string &c, bool dxfFormat){ void DRW_TextCodec::setCodePage(const std::string &c, bool dxfFormat)
{
cp = correctCodePage(c); cp = correctCodePage(c);
if (version < DRW::AC1021) conv.reset();
if (version == DRW::AC1009 || version == DRW::AC1015)
{ {
if (cp == "UTF-8") if (cp == "ANSI_874")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table874, CPLENGTHCOMMON);
}
else if (cp == "ANSI_932")
{
conv = std::make_unique<DRW_Conv932Table>();
}
else if (cp == "ANSI_936")
{
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table936, DRW_LeadTable936, DRW_DoubleTable936, CPLENGTH936);
}
else if (cp == "ANSI_949")
{
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table949, DRW_LeadTable949, DRW_DoubleTable949, CPLENGTH949);
}
else if (cp == "ANSI_950")
{
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table950, DRW_LeadTable950, DRW_DoubleTable950, CPLENGTH950);
}
else if (cp == "ANSI_1250")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1250, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1251")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1251, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1253")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1253, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1254")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1254, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1255")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1255, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1256")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1256, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1257")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1257, CPLENGTHCOMMON);
}
else if (cp == "ANSI_1258")
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1258, CPLENGTHCOMMON);
}
else if (cp == "UTF-8")
{ // DXF older than 2007 are write in win codepages { // DXF older than 2007 are write in win codepages
cp = "ANSI_1252"; cp = "ANSI_1252";
conv = std::make_unique<DRW_Converter>(nullptr, 0);
}
else
{
conv = std::make_unique<DRW_ConvTable>(DRW_Table1252, CPLENGTHCOMMON);
} }
conv = DRW_TextCodec::CodecForName(QString::fromStdString(cp));
} }
else else
{ {
conv = DRW_TextCodec::CodecForName(dxfFormat ? QStringLiteral("UTF-8") : QStringLiteral("UTF-16")); if (dxfFormat)
}
if (conv == nullptr)
{
const QString errorMsg = QCoreApplication::translate("DRW_TextCodec", "No available codec for code page '%1'.")
.arg(cp.c_str());
VAbstractApplication::VApp()->IsPedantic()
? throw VException(errorMsg) : qWarning() << VAbstractValApplication::warningMessageSignature + errorMsg;
if (version < DRW::AC1021 && cp == "UTF-8")
{ {
cp = "ANSI_1252"; conv = std::make_unique<DRW_Converter>(nullptr, 0); // utf16 to utf8
conv = DRW_TextCodec::CodecForName(QString::fromStdString(cp)); // Fallback to latin }
else
{
conv = std::make_unique<DRW_ConvUTF16>(); // utf16 to utf8
} }
} }
} }
auto DRW_TextCodec::DXFCodePageMap() -> QMap<QString, QStringList>
{
static auto map = QMap<QString, QStringList>
{
{"ANSI_874", {"ANSI_874", "CP874", "ISO8859-11", "TIS-620"}}, // Latin/Thai
{"ANSI_932", {"ANSI_932", "SHIFT-JIS", "SHIFT_JIS", "CSSHIFTJIS", "CSWINDOWS31J", "MS_KANJI", "X-MS-CP932",
"X-SJIS", "EUCJP", "EUC-JP", "CSEUCPKDFMTJAPANESE", "X-EUC", "X-EUC-JP", "CP932",
"JIS7"}}, // Japanese
{"ANSI_936", {"ANSI_936", "GBK", "GB2312", "CHINESE", "CN-GB", "CSGB2312", "CSGB231280", "CSISO58BG231280",
"GB_2312-80", "GB231280", "GB2312-80", "ISO-IR-58", "GB18030"}}, // Chinese PRC GBK (XGB) simplified
{"ANSI_949", {"ANSI_949", "EUCKR"}}, // Korean
{"ANSI_950", {"ANSI_950", "BIG5", "CN-BIG5", "CSBIG5", "X-X-BIG5",
"BIG5-HKSCS"}}, // Chinese Big5 (Taiwan, Hong Kong SAR)
{"ANSI_1250", {"ANSI_1250", "CP1250", "ISO8859-2"}}, //Central Europe and Eastern Europe
{"ANSI_1251", {"ANSI_1251", "CP1251", "ISO8859-5", "KOI8-R", "KOI8-U", "IBM 866"}}, // Cyrillic script
{"ANSI_1252", {"ANSI_1252", "CP1252", "LATIN1", "ISO-8859-1", "CP819", "CSISO", "IBM819", "L1",
"ISO_8859-1", "APPLE ROMAN", "ISO8859-1", "ISO8859-15", "ISO-IR-100", "IBM 850"}}, // Western Europe
{"ANSI_1253", {"ANSI_1253", "CP1253", "ISO8859-7"}}, // Greek
{"ANSI_1254", {"ANSI_1254", "CP1254", "ISO8859-9", "iso8859-3"}}, // Turkish
{"ANSI_1255", {"ANSI_1255", "CP1255", "ISO8859-8"}}, // Hebrew
{"ANSI_1256", {"ANSI_1256", "CP1256", "ISO8859-6"}}, // Arabic
{"ANSI_1257", {"ANSI_1257", "CP1257", "ISO8859-4", "ISO8859-10", "ISO8859-13"}}, // Baltic
{"ANSI_1258", {"ANSI_1258", "CP1258"}}, // Vietnamese
{"UTF-8", {"UTF-8", "UTF8", "UTF8-BIT"}},
{"UTF-16", {"UTF-16", "UTF16", "UTF16-BIT"}},
};
return map;
}
auto DRW_TextCodec::CodecForName(const QString &name) -> VTextCodec *
{
QMap<QString, QStringList> knownCodecs = DXFCodePageMap();
if (knownCodecs.contains(name))
{
QStringList aliases = knownCodecs.value(name);
for (auto &alias : aliases)
{
if (VTextCodec *codec = VTextCodec::codecForName(alias.toLatin1()))
{
return codec;
}
}
}
return nullptr;
}
auto DRW_TextCodec::toUtf8(const std::string &s) -> std::string auto DRW_TextCodec::toUtf8(const std::string &s) -> std::string
{ {
if (conv == nullptr) return conv->toUtf8(s);
{
return s;
}
const QString encodedString = conv->toUnicode(s.c_str());
return encodedString.toStdString();
} }
auto DRW_TextCodec::fromUtf8(const std::string &s) -> std::string auto DRW_TextCodec::fromUtf8(const std::string &s) -> std::string
{ {
if (conv == nullptr) return conv->fromUtf8(s);
{
return s;
}
const QByteArray encodedString = conv->fromUnicode(QString::fromStdString(s));
return std::string(encodedString.constData());
} }
auto DRW_TextCodec::correctCodePage(const std::string& s) -> std::string { auto DRW_TextCodec::correctCodePage(const std::string &s) -> std::string
//stringstream cause crash in OS/X, bug#3597944 {
QString codePage = QString::fromStdString(s); // stringstream cause crash in OS/X, bug#3597944
codePage = codePage.toUpper(); std::string cp = s;
QMap<QString, QStringList> codeMap = DRW_TextCodec::DXFCodePageMap(); transform(cp.begin(), cp.end(), cp.begin(), toupper);
auto i = codeMap.constBegin(); static auto map = QMap<std::string, QSet<std::string>>{
while (i != codeMap.constEnd()) // Latin/Thai
{"ANSI_874", {"ANSI_874", "CP874", "ISO8859-11", "TIS-620"}}, // Central Europe and Eastern Europe
{"ANSI_1250", {"ANSI_1250", "CP1250", "ISO8859-2"}}, // Cyrillic script
{"ANSI_1251", {"ANSI_1251", "CP1251", "ISO8859-5", "KOI8-R", "KOI8-U", "IBM 866"}}, // Western Europe
{"ANSI_1252",
{"ANSI_1252", "CP1252", "LATIN1", "ISO-8859-1", "CP819", "CSISO", "IBM819", "ISO_8859-1", "APPLE ROMAN",
"ISO8859-1", "ISO8859-15", "ISO-IR-100", "L1", "IBM 850"}}, // Greek
{"ANSI_1253", {"ANSI_1253", "CP1253", "ISO8859-7"}}, // Turkish
{"ANSI_1254", {"ANSI_1254", "CP1254", "ISO8859-9", "ISO8859-3"}}, // Hebrew
{"ANSI_1255", {"ANSI_1255", "CP1255", "ISO8859-8"}}, // Arabic
{"ANSI_1256", {"ANSI_1256", "CP1256", "ISO8859-6"}}, // Baltic
{"ANSI_1257", {"ANSI_1257", "CP1257", "ISO8859-4", "ISO8859-10", "ISO8859-13"}}, // Vietnamese
{"ANSI_1258", {"ANSI_1258", "CP1258"}}, // Japanese
{"ANSI_932",
{"ANSI_932", "SHIFT-JIS", "SHIFT_JIS", "CSSHIFTJIS", "CSWINDOWS31J", "MS_KANJI", "X-MS-CP932", "X-SJIS",
"EUCJP", "EUC-JP", "CSEUCPKDFMTJAPANESE", "X-EUC", "X-EUC-JP", "JIS7"}}, // Chinese PRC GBK (XGB) simplified
{"ANSI_936",
{"ANSI_936", "GBK", "GB2312", "CHINESE", "CN-GB", "CSGB2312", "CSGB231280", "CSISO58BG231280", "GB_2312-80",
"GB231280", "GB2312-80", "ISO-IR-58", "GB18030"}}, // Korean
{"ANSI_949", {"ANSI_949", "EUCKR"}}, // Chinese Big5 (Taiwan, Hong Kong SAR)
{"ANSI_950", {"ANSI_950", "BIG5", "CN-BIG5", "CSBIG5", "X-X-BIG5", "BIG5-HKSCS"}}, // celtic
// {"ISO8859-14", {"ISO8859-14"}},
// {"TSCII", {"TSCII"}},//tamil
{"UTF-8", {"UTF-8", "UTF8", "UTF8-BIT"}},
{"UTF-16", {"UTF-16", "UTF16", "UTF16-BIT"}}};
auto i = map.constBegin();
while (i != map.constEnd())
{ {
if (i.value().contains(codePage)) if (i.value().contains(cp))
{ {
return i.key().toStdString(); return i.key();
} }
++i; ++i;
} }
return "ANSI_1252"; return "ANSI_1252";
} }
auto DRW_Converter::toUtf8(const std::string &s) -> std::string
{
std::string result;
unsigned int j = 0;
unsigned int i = 0;
for (i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c < 0x80)
{ // ascii check for /U+????
if (c == '\\' && i + 6 < s.length() && s.at(i + 1) == 'U' && s.at(i + 2) == '+')
{
result += s.substr(j, i - j);
result += encodeText(s.substr(i, 7));
i += 6;
j = i + 1;
}
}
else if (c < 0xE0)
{ // 2 bits
i++;
}
else if (c < 0xF0)
{ // 3 bits
i += 2;
}
else if (c < 0xF8)
{ // 4 bits
i += 3;
}
}
result += s.substr(j);
return result;
}
auto DRW_ConvTable::fromUtf8(const std::string &s) -> std::string
{
std::string result;
bool notFound;
int code;
unsigned int j = 0;
for (unsigned int i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c > 0x7F)
{ // need to decode
result += s.substr(j, i - j);
std::string part1 = s.substr(i, 4);
unsigned int l;
code = decodeNum(part1, &l);
j = i + l;
i = j - 1;
notFound = true;
for (int k = 0; k < cpLength; k++)
{
if (table[k] == code)
{
result += static_cast<char>(CPOFFSET + k); // translate from table
notFound = false;
break;
}
}
if (notFound)
{
result += decodeText(code);
}
}
}
result += s.substr(j);
return result;
}
auto DRW_ConvTable::toUtf8(const std::string &s) -> std::string
{
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
auto c = static_cast<unsigned char>(*it);
if (c < 0x80)
{
// check for \U+ encoded text
if (c == '\\')
{
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
{
res += encodeText(std::string(it, it + 7));
it += 6;
}
else
{
res += static_cast<char>(c); // no \U+ encoded text write
}
}
else
{
res += static_cast<char>(c); // c!='\' ascii char write
}
}
else
{ // end c < 0x80
res += encodeNum(table[c - 0x80]); // translate from table
}
} // end for
return res;
}
auto DRW_Converter::encodeText(const std::string &stmp) -> std::string
{
int code;
#if defined(__APPLE__)
int Succeeded = sscanf(&(stmp.substr(3, 4)[0]), "%x", &code);
if (!Succeeded || Succeeded == EOF)
code = 0;
#else
std::istringstream sd(stmp.substr(3, 4));
sd >> std::hex >> code;
#endif
return encodeNum(code);
}
auto DRW_Converter::decodeText(int c) -> std::string
{
std::string res = "\\U+";
std::string num;
#if defined(__APPLE__)
std::string str(16, '\0');
snprintf(&(str[0]), 16, "%04X", c);
num = str;
#else
std::stringstream ss;
ss << std::uppercase << std::setfill('0') << std::setw(4) << std::hex << c;
ss >> num;
#endif
res += num;
return res;
}
auto DRW_Converter::encodeNum(int c) -> std::string
{
unsigned char ret[5];
if (c < 128)
{ // 0-7F US-ASCII 7 bits
ret[0] = static_cast<unsigned char>(c);
ret[1] = 0;
}
else if (c < 0x800)
{ // 80-07FF 2 bytes
ret[0] = static_cast<unsigned char>(0xC0 | (c >> 6));
ret[1] = 0x80 | (c & 0x3f);
ret[2] = 0;
}
else if (c < 0x10000)
{ // 800-FFFF 3 bytes
ret[0] = static_cast<unsigned char>(0xe0 | (c >> 12));
ret[1] = 0x80 | ((c >> 6) & 0x3f);
ret[2] = 0x80 | (c & 0x3f);
ret[3] = 0;
}
else
{ // 10000-10FFFF 4 bytes
ret[0] = static_cast<unsigned char>(0xf0 | (c >> 18));
ret[1] = 0x80 | ((c >> 12) & 0x3f);
ret[2] = 0x80 | ((c >> 6) & 0x3f);
ret[3] = 0x80 | (c & 0x3f);
ret[4] = 0;
}
return {reinterpret_cast<char *>(ret)};
}
/** 's' is a string with at least 4 bytes length
** returned 'b' is byte length of encoded char: 2,3 or 4
**/
auto DRW_Converter::decodeNum(const std::string &s, unsigned int *b) -> int
{
int code = 0;
auto c = static_cast<unsigned char>(s.at(0));
if ((c & 0xE0) == 0xC0)
{ // 2 bytes
code = (c & 0x1F) << 6;
code = (s.at(1) & 0x3F) | code;
*b = 2;
}
else if ((c & 0xF0) == 0xE0)
{ // 3 bytes
code = (c & 0x0F) << 12;
code = ((s.at(1) & 0x3F) << 6) | code;
code = (s.at(2) & 0x3F) | code;
*b = 3;
}
else if ((c & 0xF8) == 0xF0)
{ // 4 bytes
code = (c & 0x07) << 18;
code = ((s.at(1) & 0x3F) << 12) | code;
code = ((s.at(2) & 0x3F) << 6) | code;
code = (s.at(3) & 0x3F) | code;
*b = 4;
}
return code;
}
auto DRW_ConvDBCSTable::fromUtf8(const std::string &s) -> std::string
{
std::string result;
bool notFound;
int code;
unsigned int j = 0;
for (unsigned int i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c > 0x7F)
{ // need to decode
result += s.substr(j, i - j);
std::string part1 = s.substr(i, 4);
unsigned int l;
code = decodeNum(part1, &l);
j = i + l;
i = j - 1;
notFound = true;
for (int k = 0; k < cpLength; k++)
{
if (doubleTable[k][1] == code)
{
int data = doubleTable[k][0];
char d[3];
d[0] = static_cast<char>(data >> 8);
d[1] = static_cast<char>(data & 0xFF);
d[2] = '\0';
result += d; // translate from table
notFound = false;
break;
}
}
if (notFound)
{
result += decodeText(code);
}
} // direct conversion
}
result += s.substr(j);
return result;
}
auto DRW_ConvDBCSTable::toUtf8(const std::string &s) -> std::string
{
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
bool notFound = true;
auto c = static_cast<unsigned char>(*it);
if (c < 0x80)
{
notFound = false;
// check for \U+ encoded text
if (c == '\\')
{
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
{
res += encodeText(std::string(it, it + 7));
it += 6;
}
else
{
res += static_cast<char>(c); // no \U+ encoded text write
}
}
else
res += static_cast<char>(c); // c!='\' ascii char write
}
else if (c == 0x80)
{ // 1 byte table
notFound = false;
res += encodeNum(0x20AC); // euro sign
}
else
{ // 2 bytes
++it;
int code = (c << 8) | static_cast<unsigned char>(*it);
int sta = leadTable[c - 0x81];
int end = leadTable[c - 0x80];
for (int k = sta; k < end; k++)
{
if (doubleTable[k][0] == code)
{
res += encodeNum(doubleTable[k][1]); // translate from table
notFound = false;
break;
}
}
}
// not found
if (notFound)
{
res += encodeNum(NOTFOUND936);
}
} // end for
return res;
}
DRW_Conv932Table::DRW_Conv932Table()
: DRW_Converter(DRW_Table932, CPLENGTH932)
{
}
auto DRW_Conv932Table::fromUtf8(const std::string &s) -> std::string
{
std::string result;
bool notFound;
int code;
unsigned int j = 0;
for (unsigned int i = 0; i < s.length(); i++)
{
auto c = static_cast<unsigned char>(s.at(i));
if (c > 0x7F)
{ // need to decode
result += s.substr(j, i - j);
std::string part1 = s.substr(i, 4);
unsigned int l;
code = decodeNum(part1, &l);
j = i + l;
i = j - 1;
notFound = true;
// 1 byte table
if (code > 0xff60 && code < 0xFFA0)
{
result += static_cast<char>(code - CPOFFSET932); // translate from table
notFound = false;
}
if (notFound &&
(code < 0xF8 || (code > 0x390 && code < 0x542) || (code > 0x200F && code < 0x9FA1) || code > 0xF928))
{
for (int k = 0; k < cpLength; k++)
{
if (DRW_DoubleTable932[k][1] == code)
{
int data = DRW_DoubleTable932[k][0];
char d[3];
d[0] = static_cast<char>(data >> 8);
d[1] = static_cast<char>(data & 0xFF);
d[2] = '\0';
result += d; // translate from table
notFound = false;
break;
}
}
}
if (notFound)
result += decodeText(code);
} // direct conversion
}
result += s.substr(j);
return result;
}
auto DRW_Conv932Table::toUtf8(const std::string &s) -> std::string
{
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
bool notFound = true;
auto c = static_cast<unsigned char>(*it);
if (c < 0x80)
{
notFound = false;
// check for \U+ encoded text
if (c == '\\')
{
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
{
res += encodeText(std::string(it, it + 7));
it += 6;
}
else
{
res += static_cast<char>(c); // no \U+ encoded text write
}
}
else
{
res += static_cast<char>(c); // c!='\' ascii char write
}
}
else if (c > 0xA0 && c < 0xE0)
{ // 1 byte table
notFound = false;
res += encodeNum(c + CPOFFSET932); // translate from table
}
else
{ // 2 bytes
++it;
int code = (c << 8) | static_cast<unsigned char>(*it);
int sta = 0;
int end = 0;
if (c > 0x80 && c < 0xA0)
{
sta = DRW_LeadTable932[c - 0x81];
end = DRW_LeadTable932[c - 0x80];
}
else if (c > 0xDF && c < 0xFD)
{
sta = DRW_LeadTable932[c - 0xC1];
end = DRW_LeadTable932[c - 0xC0];
}
if (end > 0)
{
for (int k = sta; k < end; k++)
{
if (DRW_DoubleTable932[k][0] == code)
{
res += encodeNum(DRW_DoubleTable932[k][1]); // translate from table
notFound = false;
break;
}
}
}
}
// not found
if (notFound)
{
res += encodeNum(NOTFOUND932);
}
} // end for
return res;
}
auto DRW_ConvUTF16::fromUtf8(const std::string &s) -> std::string
{
DRW_UNUSED(s);
// RLZ: to be written (only needed for write dwg 2007+)
return {};
}
auto DRW_ConvUTF16::toUtf8(const std::string &s) -> std::string
{ // RLZ: pending to write
std::string res;
for (auto it = s.begin(); it < s.end(); ++it)
{
auto c1 = static_cast<unsigned char>(*it);
auto c2 = static_cast<unsigned char>(*(++it));
auto ch = static_cast<duint16>((c2 << 8) | c1);
res += encodeNum(ch);
} // end for
return res;
}

View file

@ -1,26 +1,23 @@
#ifndef DRW_TEXTCODEC_H #ifndef DRW_TEXTCODEC_H
#define DRW_TEXTCODEC_H #define DRW_TEXTCODEC_H
#include <string>
#include <QtGlobal>
#include "../drw_base.h" #include "../drw_base.h"
#include <QtCore/qcontainerfwd.h> #include <QtCore/qcontainerfwd.h>
#include <QtGlobal>
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0) #include <memory>
#include "../vmisc/vtextcodec.h" #include <string>
#else
#include "../vmisc/defglobal.h"
#include <QTextCodec>
#endif
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
class QStringList; class QStringList;
#endif #endif
class DRW_Converter;
class DRW_TextCodec class DRW_TextCodec
{ {
public: public:
DRW_TextCodec(); DRW_TextCodec();
~DRW_TextCodec() = default;
auto fromUtf8(const std::string &s) -> std::string; auto fromUtf8(const std::string &s) -> std::string;
auto toUtf8(const std::string &s) -> std::string; auto toUtf8(const std::string &s) -> std::string;
auto getVersion() const -> int { return version; } auto getVersion() const -> int { return version; }
@ -29,18 +26,84 @@ public:
void setCodePage(const std::string &c, bool dxfFormat); void setCodePage(const std::string &c, bool dxfFormat);
auto getCodePage() const -> std::string { return cp; } auto getCodePage() const -> std::string { return cp; }
static auto DXFCodePageMap() -> QMap<QString, QStringList>;
static auto CodecForName(const QString &name) -> VTextCodec *;
private: private:
static auto correctCodePage(const std::string& s) -> std::string; static auto correctCodePage(const std::string &s) -> std::string;
private: private:
// cppcheck-suppress unknownMacro // cppcheck-suppress unknownMacro
Q_DISABLE_COPY_MOVE(DRW_TextCodec) // NOLINT Q_DISABLE_COPY_MOVE(DRW_TextCodec) // NOLINT
DRW::Version version{DRW::UNKNOWNV}; DRW::Version version{DRW::UNKNOWNV};
std::string cp{}; std::string cp{};
VTextCodec *conv{nullptr}; std::unique_ptr<DRW_Converter> conv;
};
class DRW_Converter
{
public:
DRW_Converter(const int *t, int l)
: table{t},
cpLength{l}
{
}
virtual ~DRW_Converter() = default;
virtual auto fromUtf8(const std::string &s) -> std::string { return s; }
virtual auto toUtf8(const std::string &s) -> std::string;
static auto encodeText(const std::string &stmp) -> std::string;
static auto decodeText(int c) -> std::string;
static auto encodeNum(int c) -> std::string;
static auto decodeNum(const std::string &s, unsigned int *b) -> int;
const int *table{nullptr};
int cpLength;
};
class DRW_ConvUTF16 : public DRW_Converter
{
public:
DRW_ConvUTF16()
: DRW_Converter(nullptr, 0)
{
}
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
};
class DRW_ConvTable : public DRW_Converter
{
public:
DRW_ConvTable(const int *t, int l)
: DRW_Converter(t, l)
{
}
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
};
class DRW_ConvDBCSTable : public DRW_Converter
{
public:
DRW_ConvDBCSTable(const int *t, const int *lt, const int dt[][2], int l)
: DRW_Converter(t, l),
leadTable{lt},
doubleTable{dt}
{
}
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
private:
const int *leadTable{nullptr};
const int (*doubleTable)[2];
};
class DRW_Conv932Table : public DRW_Converter
{
public:
DRW_Conv932Table();
auto fromUtf8(const std::string &s) -> std::string override;
auto toUtf8(const std::string &s) -> std::string override;
}; };
#endif // DRW_TEXTCODEC_H #endif // DRW_TEXTCODEC_H

View file

@ -9,6 +9,11 @@ SOURCES += \
$$PWD/libdxfrw/intern/drw_textcodec.cpp \ $$PWD/libdxfrw/intern/drw_textcodec.cpp \
$$PWD/libdxfrw/intern/dxfreader.cpp \ $$PWD/libdxfrw/intern/dxfreader.cpp \
$$PWD/libdxfrw/intern/dxfwriter.cpp \ $$PWD/libdxfrw/intern/dxfwriter.cpp \
$$PWD/libdxfrw/intern/drw_cptable932.h \
$$PWD/libdxfrw/intern/drw_cptable936.h \
$$PWD/libdxfrw/intern/drw_cptable949.h \
$$PWD/libdxfrw/intern/drw_cptable950.h \
$$PWD/libdxfrw/intern/drw_cptables.h \
$$PWD/libdxfrw/drw_classes.cpp \ $$PWD/libdxfrw/drw_classes.cpp \
$$PWD/libdxfrw/drw_entities.cpp \ $$PWD/libdxfrw/drw_entities.cpp \
$$PWD/libdxfrw/drw_header.cpp \ $$PWD/libdxfrw/drw_header.cpp \

View file

@ -21,6 +21,11 @@ VLib {
prefix: "libdxfrw/" prefix: "libdxfrw/"
files: [ files: [
"drw_base.cpp", "drw_base.cpp",
"intern/drw_cptable932.h",
"intern/drw_cptable936.h",
"intern/drw_cptable949.h",
"intern/drw_cptable950.h",
"intern/drw_cptables.h",
"intern/drw_dbg.cpp", "intern/drw_dbg.cpp",
"intern/drw_textcodec.cpp", "intern/drw_textcodec.cpp",
"intern/dxfreader.cpp", "intern/dxfreader.cpp",