Fixed text conversion.

--HG--
branch : feature
This commit is contained in:
Roman Telezhynskyi 2017-06-29 20:03:35 +03:00
parent 1226db1735
commit 22b448f98d
9 changed files with 37 additions and 62420 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2,20 +2,13 @@
#include <sstream>
#include <iomanip>
#include <algorithm>
#include <QString>
#include <QTextCodec>
#include "../drw_base.h"
#include "drw_cptables.h"
#include "drw_cptable932.h"
#include "drw_cptable936.h"
#include "drw_cptable949.h"
#include "drw_cptable950.h"
DRW_TextCodec::DRW_TextCodec() {
version = DRW::AC1021;
conv = new DRW_Converter(NULL, 0);
}
DRW_TextCodec::~DRW_TextCodec() {
delete conv;
conv = nullptr;
}
void DRW_TextCodec::setVersion(int v, bool dxfFormat){
@ -48,416 +41,53 @@ void DRW_TextCodec::setVersion(std::string *v, bool dxfFormat){
|| versionStr == "AC1015" || versionStr == "AC1018") {
setVersion(DRW::AC1015, dxfFormat);
}
setVersion(DRW::AC1021, dxfFormat);
else
{
setVersion(DRW::AC1021, dxfFormat);
}
}
void DRW_TextCodec::setCodePage(std::string *c, bool dxfFormat){
cp = correctCodePage(*c);
delete conv;
if (version == DRW::AC1009 || version == DRW::AC1015) {
if (cp == "ANSI_874")
conv = new DRW_ConvTable(DRW_Table874, CPLENGHTCOMMON);
else if (cp == "ANSI_932")
conv = new DRW_Conv932Table(DRW_Table932, DRW_LeadTable932,
DRW_DoubleTable932, CPLENGHT932);
else if (cp == "ANSI_936")
conv = new DRW_ConvDBCSTable(DRW_Table936, DRW_LeadTable936,
DRW_DoubleTable936, CPLENGHT936);
else if (cp == "ANSI_949")
conv = new DRW_ConvDBCSTable(DRW_Table949, DRW_LeadTable949,
DRW_DoubleTable949, CPLENGHT949);
else if (cp == "ANSI_950")
conv = new DRW_ConvDBCSTable(DRW_Table950, DRW_LeadTable950,
DRW_DoubleTable950, CPLENGHT950);
else if (cp == "ANSI_1250")
conv = new DRW_ConvTable(DRW_Table1250, CPLENGHTCOMMON);
else if (cp == "ANSI_1251")
conv = new DRW_ConvTable(DRW_Table1251, CPLENGHTCOMMON);
else if (cp == "ANSI_1253")
conv = new DRW_ConvTable(DRW_Table1253, CPLENGHTCOMMON);
else if (cp == "ANSI_1254")
conv = new DRW_ConvTable(DRW_Table1254, CPLENGHTCOMMON);
else if (cp == "ANSI_1255")
conv = new DRW_ConvTable(DRW_Table1255, CPLENGHTCOMMON);
else if (cp == "ANSI_1256")
conv = new DRW_ConvTable(DRW_Table1256, CPLENGHTCOMMON);
else if (cp == "ANSI_1257")
conv = new DRW_ConvTable(DRW_Table1257, CPLENGHTCOMMON);
else if (cp == "ANSI_1258")
conv = new DRW_ConvTable(DRW_Table1258, CPLENGHTCOMMON);
else if (cp == "UTF-8") { //DXF older than 2007 are write in win codepages
if (version < DRW::AC1021)
{
if (cp == "UTF-8")
{ //DXF older than 2007 are write in win codepages
cp = "ANSI_1252";
conv = new DRW_Converter(NULL, 0);
} else
conv = new DRW_ConvTable(DRW_Table1252, CPLENGHTCOMMON);
} else {
}
conv = QTextCodec::codecForName(cp.c_str());
}
else
{
if (dxfFormat)
conv = new DRW_Converter(NULL, 0);//utf16 to utf8
{
conv = QTextCodec::codecForName("UTF-8");
}
else
conv = new DRW_ConvUTF16();//utf16 to utf8
{
conv = QTextCodec::codecForName("UTF-16");
}
}
}
std::string DRW_TextCodec::toUtf8(std::string s) {
return conv->toUtf8(&s);
if (conv == nullptr)
{
return s;
}
const QString encodedString = conv->toUnicode(s.c_str());
return encodedString.toStdString();
}
std::string DRW_TextCodec::fromUtf8(std::string s) {
return conv->fromUtf8(&s);
}
std::string DRW_Converter::toUtf8(std::string *s) {
std::string result;
int j = 0;
unsigned int i= 0;
for (i=0; i < s->length(); i++) {
unsigned char c = s->at(i);
if (c < 0x80) { //ascii check for /U+????
if (c == '\\' && i+6 < s->length() && s->at(i+1) == 'U' && s->at(i+2) == '+') {
result += s->substr(j,i-j);
result += encodeText(s->substr(i,7));
i +=6;
j = i+1;
}
} else if (c < 0xE0 ) {//2 bits
i++;
} else if (c < 0xF0 ) {//3 bits
i +=2;
} else if (c < 0xF8 ) {//4 bits
i +=3;
}
}
result += s->substr(j);
return result;
}
std::string DRW_ConvTable::fromUtf8(std::string *s) {
std::string result;
bool notFound;
int code;
int j = 0;
for (unsigned int i=0; i < s->length(); i++) {
unsigned char c = s->at(i);
if (c > 0x7F) { //need to decode
result += s->substr(j,i-j);
std::string part1 = s->substr(i,4);
int l;
code = decodeNum(part1, &l);
j = i+l;
i = j - 1;
notFound = true;
for (int k=0; k<cpLenght; k++){
if(table[k] == code) {
result += CPOFFSET + k; //translate from table
notFound = false;
break;
}
}
if (notFound)
result += decodeText(code);
}
}
result += s->substr(j);
return result;
}
std::string DRW_ConvTable::toUtf8(std::string *s) {
std::string res;
std::string::iterator it;
for ( it=s->begin() ; it < s->end(); ++it ) {
unsigned char c = *it;
if (c < 0x80) {
//check for \U+ encoded text
if (c == '\\') {
if (it+6 < s->end() && *(it+1) == 'U' && *(it+2) == '+') {
res += encodeText(std::string(it, it+7));
it +=6;
} else {
res +=c; //no \U+ encoded text write
}
} else
res +=c; //c!='\' ascii char write
} else {//end c < 0x80
res += encodeNum(table[c-0x80]); //translate from table
}
} //end for
return res;
}
std::string DRW_Converter::encodeText(std::string stmp){
int code;
#if defined(__APPLE__)
int Succeeded = sscanf (&( stmp.substr(3,4)[0]), "%x", &code );
if ( !Succeeded || Succeeded == EOF )
code = 0;
#else
std::istringstream sd(stmp.substr(3,4));
sd >> std::hex >> code;
#endif
return encodeNum(code);
}
std::string DRW_Converter::decodeText(int c){
std::string res = "\\U+";
std::string num;
#if defined(__APPLE__)
std::string str(16, '\0');
snprintf (&(str[0]), 16, "%04X", c );
num = str;
#else
std::stringstream ss;
ss << std::uppercase << std::setfill('0') << std::setw(4) << std::hex << c;
ss >> num;
#endif
res += num;
return res;
}
std::string DRW_Converter::encodeNum(int c){
unsigned char ret[5];
if (c < 128) { // 0-7F US-ASCII 7 bits
ret[0] = c;
ret[1] = 0;
} else if (c < 0x800) { //80-07FF 2 bytes
ret[0] = 0xC0 | (c >> 6);
ret[1] = 0x80 | (c & 0x3f);
ret[2] = 0;
} else if (c< 0x10000) { //800-FFFF 3 bytes
ret[0] = 0xe0 | (c >> 12);
ret[1] = 0x80 | ((c >> 6) & 0x3f);
ret[2] = 0x80 | (c & 0x3f);
ret[3] = 0;
} else { //10000-10FFFF 4 bytes
ret[0] = 0xf0 | (c >> 18);
ret[1] = 0x80 | ((c >> 12) & 0x3f);
ret[2] = 0x80 | ((c >> 6) & 0x3f);
ret[3] = 0x80 | (c & 0x3f);
ret[4] = 0;
}
return std::string((char*)ret);
}
/** 's' is a string with at least 4 bytes lenght
** returned 'b' is byte lenght of encoded char: 2,3 or 4
**/
int DRW_Converter::decodeNum(std::string s, int *b){
int code= 0;
unsigned char c = s.at(0);
if ( (c& 0xE0) == 0xC0) { //2 bytes
code = ( c&0x1F)<<6;
code = (s.at(1) &0x3F) | code;
*b = 2;
} else if ( (c& 0xF0) == 0xE0) { //3 bytes
code = ( c&0x0F)<<12;
code = ((s.at(1) &0x3F)<<6) | code;
code = (s.at(2) &0x3F) | code;
*b = 3;
} else if ( (c& 0xF8) == 0xF0) { //4 bytes
code = ( c&0x07)<<18;
code = ((s.at(1) &0x3F)<<12) | code;
code = ((s.at(2) &0x3F)<<6) | code;
code = (s.at(3) &0x3F) | code;
*b = 4;
if (conv == nullptr)
{
return s;
}
return code;
}
std::string DRW_ConvDBCSTable::fromUtf8(std::string *s) {
std::string result;
bool notFound;
int code;
int j = 0;
for (unsigned int i=0; i < s->length(); i++) {
unsigned char c = s->at(i);
if (c > 0x7F) { //need to decode
result += s->substr(j,i-j);
std::string part1 = s->substr(i,4);
int l;
code = decodeNum(part1, &l);
j = i+l;
i = j - 1;
notFound = true;
for (int k=0; k<cpLenght; k++){
if(doubleTable[k][1] == code) {
int data = doubleTable[k][0];
char d[3];
d[0] = data >> 8;
d[1] = data & 0xFF;
d[2]= '\0';
result += d; //translate from table
notFound = false;
break;
}
}
if (notFound)
result += decodeText(code);
} //direct conversion
}
result += s->substr(j);
return result;
}
std::string DRW_ConvDBCSTable::toUtf8(std::string *s) {
std::string res;
std::string::iterator it;
for ( it=s->begin() ; it < s->end(); ++it ) {
bool notFound = true;
unsigned char c = *it;
if (c < 0x80) {
notFound = false;
//check for \U+ encoded text
if (c == '\\') {
if (it+6 < s->end() && *(it+1) == 'U' && *(it+2) == '+') {
res += encodeText(std::string(it, it+7));
it +=6;
} else {
res +=c; //no \U+ encoded text write
}
} else
res +=c; //c!='\' ascii char write
} else if(c == 0x80 ){//1 byte table
notFound = false;
res += encodeNum(0x20AC);//euro sign
} else {//2 bytes
++it;
int code = (c << 8) | (unsigned char )(*it);
int sta = leadTable[c-0x81];
int end = leadTable[c-0x80];
for (int k=sta; k<end; k++){
if(doubleTable[k][0] == code) {
res += encodeNum(doubleTable[k][1]); //translate from table
notFound = false;
break;
}
}
}
//not found
if (notFound) res += encodeNum(NOTFOUND936);
} //end for
return res;
}
std::string DRW_Conv932Table::fromUtf8(std::string *s) {
std::string result;
bool notFound;
int code;
int j = 0;
for (unsigned int i=0; i < s->length(); i++) {
unsigned char c = s->at(i);
if (c > 0x7F) { //need to decode
result += s->substr(j,i-j);
std::string part1 = s->substr(i,4);
int l;
code = decodeNum(part1, &l);
j = i+l;
i = j - 1;
notFound = true;
// 1 byte table
if (code > 0xff60 && code < 0xFFA0) {
result += code - CPOFFSET932; //translate from table
notFound = false;
}
if (notFound && ( code<0xF8 || (code>0x390 && code<0x542) ||
(code>0x200F && code<0x9FA1) || code>0xF928 )) {
for (int k=0; k<cpLenght; k++){
if(doubleTable[k][1] == code) {
int data = doubleTable[k][0];
char d[3];
d[0] = data >> 8;
d[1] = data & 0xFF;
d[2]= '\0';
result += d; //translate from table
notFound = false;
break;
}
}
}
if (notFound)
result += decodeText(code);
} //direct conversion
}
result += s->substr(j);
return result;
}
std::string DRW_Conv932Table::toUtf8(std::string *s) {
std::string res;
std::string::iterator it;
for ( it=s->begin() ; it < s->end(); ++it ) {
bool notFound = true;
unsigned char c = *it;
if (c < 0x80) {
notFound = false;
//check for \U+ encoded text
if (c == '\\') {
if (it+6 < s->end() && *(it+1) == 'U' && *(it+2) == '+') {
res += encodeText(std::string(it, it+7));
it +=6;
} else {
res +=c; //no \U+ encoded text write
}
} else
res +=c; //c!='\' ascii char write
} else if(c > 0xA0 && c < 0xE0 ){//1 byte table
notFound = false;
res += encodeNum(c + CPOFFSET932); //translate from table
} else {//2 bytes
++it;
int code = (c << 8) | (unsigned char )(*it);
int sta;
int end=0;
if (c > 0x80 && c < 0xA0) {
sta = DRW_LeadTable932[c-0x81];
end = DRW_LeadTable932[c-0x80];
} else if (c > 0xDF && c < 0xFD){
sta = DRW_LeadTable932[c-0xC1];
end = DRW_LeadTable932[c-0xC0];
}
if (end > 0) {
for (int k=sta; k<end; k++){
if(DRW_DoubleTable932[k][0] == code) {
res += encodeNum(DRW_DoubleTable932[k][1]); //translate from table
notFound = false;
break;
}
}
}
}
//not found
if (notFound) res += encodeNum(NOTFOUND932);
} //end for
return res;
}
std::string DRW_ConvUTF16::fromUtf8(std::string *s){
DRW_UNUSED(s);
//RLZ: to be writen (only needed for write dwg 2007+)
return std::string();
}
std::string DRW_ConvUTF16::toUtf8(std::string *s){//RLZ: pending to write
std::string res;
std::string::iterator it;
for ( it=s->begin() ; it < s->end(); ++it ) {
unsigned char c1 = *it;
unsigned char c2 = *(++it);
duint16 ch = (c2 <<8) | c1;
res +=encodeNum(ch);
} //end for
return res;
const QByteArray encodedString = conv->fromUnicode(QString::fromStdString(s));
return std::string(encodedString.constData());
}
std::string DRW_TextCodec::correctCodePage(const std::string& s) {

View file

@ -3,13 +3,12 @@
#include <string>
class DRW_Converter;
class QTextCodec;
class DRW_TextCodec
{
public:
DRW_TextCodec();
~DRW_TextCodec();
std::string fromUtf8(std::string s);
std::string toUtf8(std::string s);
int getVersion(){return version;}
@ -25,67 +24,7 @@ private:
private:
int version;
std::string cp;
DRW_Converter *conv;
};
class DRW_Converter
{
public:
DRW_Converter(const int *t, int l){table = t;
cpLenght = l;}
virtual ~DRW_Converter(){}
virtual std::string fromUtf8(std::string *s) {return *s;}
virtual std::string toUtf8(std::string *s);
std::string encodeText(std::string stmp);
std::string decodeText(int c);
std::string encodeNum(int c);
int decodeNum(std::string s, int *b);
const int *table;
int cpLenght;
};
class DRW_ConvUTF16 : public DRW_Converter {
public:
DRW_ConvUTF16():DRW_Converter(NULL, 0) {}
virtual std::string fromUtf8(std::string *s);
virtual std::string toUtf8(std::string *s);
};
class DRW_ConvTable : public DRW_Converter {
public:
DRW_ConvTable(const int *t, int l):DRW_Converter(t, l) {}
virtual std::string fromUtf8(std::string *s);
virtual std::string toUtf8(std::string *s);
};
class DRW_ConvDBCSTable : public DRW_Converter {
public:
DRW_ConvDBCSTable(const int *t, const int *lt, const int dt[][2], int l):DRW_Converter(t, l) {
leadTable = lt;
doubleTable = dt;
}
virtual std::string fromUtf8(std::string *s);
virtual std::string toUtf8(std::string *s);
private:
const int *leadTable;
const int (*doubleTable)[2];
};
class DRW_Conv932Table : public DRW_Converter {
public:
DRW_Conv932Table(const int *t, const int *lt, const int dt[][2], int l):DRW_Converter(t, l) {
leadTable = lt;
doubleTable = dt;
}
virtual std::string fromUtf8(std::string *s);
virtual std::string toUtf8(std::string *s);
private:
const int *leadTable;
const int (*doubleTable)[2];
QTextCodec *conv;
};
#endif // DRW_TEXTCODEC_H

View file

@ -16,8 +16,6 @@
#include <algorithm>
#include <sstream>
#include <cassert>
#include <QString>
#include <QTextCodec>
#include "intern/drw_textcodec.h"
#include "intern/dxfreader.h"
#include "intern/dxfwriter.h"
@ -1169,14 +1167,7 @@ bool dxfRW::writeText(DRW_Text *ent){
writer->writeDouble(20, ent->basePoint.y);
writer->writeDouble(30, ent->basePoint.z);
writer->writeDouble(40, ent->height);
QString dxfText = QString::fromStdString(ent->text);
QTextCodec *codec = QTextCodec::codecForName("ANSI_1251");
QByteArray encodedString = codec->fromUnicode(dxfText);
writer->writeString(1, encodedString.constData());
//writer->writeUtf8String(1, ent->text);
writer->writeUtf8String(1, ent->text);
writer->writeDouble(50, ent->angle);
writer->writeDouble(41, ent->widthscale);
writer->writeDouble(51, ent->oblique);

View file

@ -33,11 +33,6 @@ HEADERS += \
$$PWD/vdxfengine.h \
$$PWD/vdxfpaintdevice.h \
$$PWD/dxfdef.h \
$$PWD/libdxfrw/intern/drw_cptable932.h \
$$PWD/libdxfrw/intern/drw_cptable936.h \
$$PWD/libdxfrw/intern/drw_cptable949.h \
$$PWD/libdxfrw/intern/drw_cptable950.h \
$$PWD/libdxfrw/intern/drw_cptables.h \
$$PWD/libdxfrw/intern/drw_dbg.h \
$$PWD/libdxfrw/intern/drw_textcodec.h \
$$PWD/libdxfrw/intern/dwgbuffer.h \