- STEP: support UTF16, UTF32 and Mac/Roman escape sequences.
parent
da88ab4408
commit
03c01685d3
|
@ -384,6 +384,8 @@ SET(IFC_SRCS
|
|||
STEPFile.h
|
||||
STEPFileReader.h
|
||||
STEPFileReader.cpp
|
||||
STEPFileEncoding.cpp
|
||||
STEPFileEncoding.h
|
||||
)
|
||||
SOURCE_GROUP( IFC FILES ${IFC_SRCS})
|
||||
|
||||
|
|
|
@ -0,0 +1,433 @@
|
|||
/*
|
||||
Open Asset Import Library (assimp)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2006-2012, assimp team
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use of this software in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
|
||||
* Neither the name of the assimp team, nor the names of its
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior
|
||||
written permission of the assimp team.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/** @file STEPFileEncoding.cpp
|
||||
* @brief STEP character handling, string unescaping
|
||||
*/
|
||||
#include "AssimpPCH.h"
|
||||
#include "STEPFileEncoding.h"
|
||||
#include "fast_atof.h"
|
||||
|
||||
#include "../contrib/ConvertUTF/ConvertUTF.h"
|
||||
|
||||
using namespace Assimp;
|
||||
|
||||
// roman1 to utf16 table
|
||||
static const UTF16 mac_codetable[] = {
|
||||
// 0x20 unassig./nonprint. slots
|
||||
0x0020 ,
|
||||
0x0021 ,
|
||||
0x0022 ,
|
||||
0x0023 ,
|
||||
0x0024 ,
|
||||
0x0025 ,
|
||||
0x0026 ,
|
||||
0x0027 ,
|
||||
0x0028 ,
|
||||
0x0029 ,
|
||||
0x002A ,
|
||||
0x002B ,
|
||||
0x002C ,
|
||||
0x002D ,
|
||||
0x002E ,
|
||||
0x002F ,
|
||||
0x0030 ,
|
||||
0x0031 ,
|
||||
0x0032 ,
|
||||
0x0033 ,
|
||||
0x0034 ,
|
||||
0x0035 ,
|
||||
0x0036 ,
|
||||
0x0037 ,
|
||||
0x0038 ,
|
||||
0x0039 ,
|
||||
0x003A ,
|
||||
0x003B ,
|
||||
0x003C ,
|
||||
0x003D ,
|
||||
0x003E ,
|
||||
0x003F ,
|
||||
0x0040 ,
|
||||
0x0041 ,
|
||||
0x0042 ,
|
||||
0x0043 ,
|
||||
0x0044 ,
|
||||
0x0045 ,
|
||||
0x0046 ,
|
||||
0x0047 ,
|
||||
0x0048 ,
|
||||
0x0049 ,
|
||||
0x004A ,
|
||||
0x004B ,
|
||||
0x004C ,
|
||||
0x004D ,
|
||||
0x004E ,
|
||||
0x004F ,
|
||||
0x0050 ,
|
||||
0x0051 ,
|
||||
0x0052 ,
|
||||
0x0053 ,
|
||||
0x0054 ,
|
||||
0x0055 ,
|
||||
0x0056 ,
|
||||
0x0057 ,
|
||||
0x0058 ,
|
||||
0x0059 ,
|
||||
0x005A ,
|
||||
0x005B ,
|
||||
0x005C ,
|
||||
0x005D ,
|
||||
0x005E ,
|
||||
0x005F ,
|
||||
0x0060 ,
|
||||
0x0061 ,
|
||||
0x0062 ,
|
||||
0x0063 ,
|
||||
0x0064 ,
|
||||
0x0065 ,
|
||||
0x0066 ,
|
||||
0x0067 ,
|
||||
0x0068 ,
|
||||
0x0069 ,
|
||||
0x006A ,
|
||||
0x006B ,
|
||||
0x006C ,
|
||||
0x006D ,
|
||||
0x006E ,
|
||||
0x006F ,
|
||||
0x0070 ,
|
||||
0x0071 ,
|
||||
0x0072 ,
|
||||
0x0073 ,
|
||||
0x0074 ,
|
||||
0x0075 ,
|
||||
0x0076 ,
|
||||
0x0077 ,
|
||||
0x0078 ,
|
||||
0x0079 ,
|
||||
0x007A ,
|
||||
0x007B ,
|
||||
0x007C ,
|
||||
0x007D ,
|
||||
0x007E ,
|
||||
0x0000 , // unassig.
|
||||
0x00C4 ,
|
||||
0x00C5 ,
|
||||
0x00C7 ,
|
||||
0x00C9 ,
|
||||
0x00D1 ,
|
||||
0x00D6 ,
|
||||
0x00DC ,
|
||||
0x00E1 ,
|
||||
0x00E0 ,
|
||||
0x00E2 ,
|
||||
0x00E4 ,
|
||||
0x00E3 ,
|
||||
0x00E5 ,
|
||||
0x00E7 ,
|
||||
0x00E9 ,
|
||||
0x00E8 ,
|
||||
0x00EA ,
|
||||
0x00EB ,
|
||||
0x00ED ,
|
||||
0x00EC ,
|
||||
0x00EE ,
|
||||
0x00EF ,
|
||||
0x00F1 ,
|
||||
0x00F3 ,
|
||||
0x00F2 ,
|
||||
0x00F4 ,
|
||||
0x00F6 ,
|
||||
0x00F5 ,
|
||||
0x00FA ,
|
||||
0x00F9 ,
|
||||
0x00FB ,
|
||||
0x00FC ,
|
||||
0x2020 ,
|
||||
0x00B0 ,
|
||||
0x00A2 ,
|
||||
0x00A3 ,
|
||||
0x00A7 ,
|
||||
0x2022 ,
|
||||
0x00B6 ,
|
||||
0x00DF ,
|
||||
0x00AE ,
|
||||
0x00A9 ,
|
||||
0x2122 ,
|
||||
0x00B4 ,
|
||||
0x00A8 ,
|
||||
0x2260 ,
|
||||
0x00C6 ,
|
||||
0x00D8 ,
|
||||
0x221E ,
|
||||
0x00B1 ,
|
||||
0x2264 ,
|
||||
0x2265 ,
|
||||
0x00A5 ,
|
||||
0x00B5 ,
|
||||
0x2202 ,
|
||||
0x2211 ,
|
||||
0x220F ,
|
||||
0x03C0 ,
|
||||
0x222B ,
|
||||
0x00AA ,
|
||||
0x00BA ,
|
||||
0x03A9 ,
|
||||
0x00E6 ,
|
||||
0x00F8 ,
|
||||
0x00BF ,
|
||||
0x00A1 ,
|
||||
0x00AC ,
|
||||
0x221A ,
|
||||
0x0192 ,
|
||||
0x2248 ,
|
||||
0x2206 ,
|
||||
0x00AB ,
|
||||
0x00BB ,
|
||||
0x2026 ,
|
||||
0x00A0 ,
|
||||
0x00C0 ,
|
||||
0x00C3 ,
|
||||
0x00D5 ,
|
||||
0x0152 ,
|
||||
0x0153 ,
|
||||
0x2013 ,
|
||||
0x2014 ,
|
||||
0x201C ,
|
||||
0x201D ,
|
||||
0x2018 ,
|
||||
0x2019 ,
|
||||
0x00F7 ,
|
||||
0x25CA ,
|
||||
0x00FF ,
|
||||
0x0178 ,
|
||||
0x2044 ,
|
||||
0x20AC ,
|
||||
0x2039 ,
|
||||
0x203A ,
|
||||
0xFB01 ,
|
||||
0xFB02 ,
|
||||
0x2021 ,
|
||||
0x00B7 ,
|
||||
0x201A ,
|
||||
0x201E ,
|
||||
0x2030 ,
|
||||
0x00C2 ,
|
||||
0x00CA ,
|
||||
0x00C1 ,
|
||||
0x00CB ,
|
||||
0x00C8 ,
|
||||
0x00CD ,
|
||||
0x00CE ,
|
||||
0x00CF ,
|
||||
0x00CC ,
|
||||
0x00D3 ,
|
||||
0x00D4 ,
|
||||
0xF8FF ,
|
||||
0x00D2 ,
|
||||
0x00DA ,
|
||||
0x00DB ,
|
||||
0x00D9 ,
|
||||
0x0131 ,
|
||||
0x02C6 ,
|
||||
0x02DC ,
|
||||
0x00AF ,
|
||||
0x02D8 ,
|
||||
0x02D9 ,
|
||||
0x02DA ,
|
||||
0x00B8 ,
|
||||
0x02DD ,
|
||||
0x02DB ,
|
||||
0x02C7
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
bool STEP::StringToUTF8(std::string& s)
|
||||
{
|
||||
// very basic handling for escaped string sequences
|
||||
// http://doc.spatial.com/index.php?title=InterOp:Connect/STEP&redirect=no
|
||||
|
||||
for (size_t i = 0; i < s.size(); ) {
|
||||
if (s[i] == '\\') {
|
||||
// \S\X - cp1252 (X is the character remapped to [0,127])
|
||||
if (i+3 < s.size() && s[i+1] == 'S' && s[i+2] == '\\') {
|
||||
// http://stackoverflow.com/questions/5586214/how-to-convert-char-from-iso-8859-1-to-utf-8-in-c-multiplatformly
|
||||
ai_assert((uint8_t)s[i+3] < 0x80);
|
||||
const uint8_t ch = s[i+3] + 0x80;
|
||||
|
||||
s[i] = 0xc0 | (ch & 0xc0) >> 6;
|
||||
s[i+1] = 0x80 | (ch & 0x3f);
|
||||
|
||||
s.erase(i + 2,2);
|
||||
++i;
|
||||
}
|
||||
// \X\xx - mac/roman (xx is a hex sequence)
|
||||
else if (i+4 < s.size() && s[i+1] == 'X' && s[i+2] == '\\') {
|
||||
|
||||
const uint8_t macval = HexOctetToDecimal(s.c_str() + i + 3);
|
||||
if(macval < 0x20) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ai_assert(sizeof(mac_codetable) / sizeof(mac_codetable[0]) == 0x100-0x20);
|
||||
|
||||
const UTF32 unival = mac_codetable[macval - 0x20], *univalp = &unival;
|
||||
|
||||
UTF8 temp[5], *tempp = temp;
|
||||
ai_assert(sizeof(UTF8) == 1);
|
||||
|
||||
if(ConvertUTF32toUTF8(&univalp, univalp+1, &tempp, tempp+sizeof(temp), lenientConversion) != conversionOK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t outcount = static_cast<size_t>(tempp-temp);
|
||||
|
||||
s.erase(i,5);
|
||||
s.insert(i, reinterpret_cast<char*>(temp), outcount);
|
||||
i += outcount;
|
||||
}
|
||||
// \Xn\ .. \X0\ - various unicode encodings (n=2: utf16; n=4: utf32)
|
||||
else if (i+3 < s.size() && s[i+1] == 'X' && s[i+2] >= '0' && s[i+2] <= '9') {
|
||||
switch(s[i+2]) {
|
||||
// utf16
|
||||
case '2':
|
||||
// utf32
|
||||
case '4':
|
||||
if (s[i+3] == '\\') {
|
||||
const size_t basei = i+4;
|
||||
// scan for \X0\
|
||||
size_t j = basei, jend = s.size()-4;
|
||||
for (; j < jend; ++j) {
|
||||
if (s[j] == '\\' && s[j] == 'X' && s[j] == '0' && s[j] == '\\') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == jend) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (j == basei) {
|
||||
s.erase(i,8);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (s[i+2] == '2') {
|
||||
if (((j - basei) % 4) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t count = (j-basei)/4;
|
||||
boost::scoped_array<UTF16> src = boost::scoped_array<UTF16>(new UTF16[count]);
|
||||
|
||||
const char* cur = s.c_str() + basei;
|
||||
for (size_t k = 0; k < count; ++k, cur += 4) {
|
||||
src[k] = (static_cast<UTF16>(HexOctetToDecimal(cur)) << 8u) |
|
||||
static_cast<UTF16>(HexOctetToDecimal(cur+2));
|
||||
}
|
||||
|
||||
const size_t dcount = count * 3; // this is enough to hold all possible outputs
|
||||
boost::scoped_array<UTF8> dest = boost::scoped_array<UTF8>(new UTF8[dcount]);
|
||||
|
||||
const UTF16* srct = src.get();
|
||||
UTF8* destt = dest.get();
|
||||
if(ConvertUTF16toUTF8(&srct, srct+count, &destt, destt+dcount, lenientConversion) != conversionOK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t outcount = static_cast<size_t>(destt-dest.get());
|
||||
|
||||
s.erase(i,(j+4-i));
|
||||
|
||||
ai_assert(sizeof(UTF8) == 1);
|
||||
s.insert(i, reinterpret_cast<char*>(dest.get()), outcount);
|
||||
|
||||
i += outcount;
|
||||
continue;
|
||||
}
|
||||
else if (s[i+2] == '4') {
|
||||
if (((j - basei) % 8) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t count = (j-basei)/8;
|
||||
boost::scoped_array<UTF32> src = boost::scoped_array<UTF32>(new UTF32[count]);
|
||||
|
||||
const char* cur = s.c_str() + basei;
|
||||
for (size_t k = 0; k < count; ++k, cur += 8) {
|
||||
src[k] = (static_cast<UTF32>(HexOctetToDecimal(cur )) << 24u) |
|
||||
(static_cast<UTF32>(HexOctetToDecimal(cur+2)) << 16u) |
|
||||
(static_cast<UTF32>(HexOctetToDecimal(cur+4)) << 8u) |
|
||||
(static_cast<UTF32>(HexOctetToDecimal(cur+6)));
|
||||
}
|
||||
|
||||
const size_t dcount = count * 5; // this is enough to hold all possible outputs
|
||||
boost::scoped_array<UTF8> dest = boost::scoped_array<UTF8>(new UTF8[dcount]);
|
||||
|
||||
const UTF32* srct = src.get();
|
||||
UTF8* destt = dest.get();
|
||||
if(ConvertUTF32toUTF8(&srct, srct+count, &destt, destt+dcount, lenientConversion) != conversionOK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t outcount = static_cast<size_t>(destt-dest.get());
|
||||
|
||||
s.erase(i,(j+4-i));
|
||||
|
||||
ai_assert(sizeof(UTF8) == 1);
|
||||
s.insert(i, reinterpret_cast<char*>(dest.get()), outcount);
|
||||
|
||||
i += outcount;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
// TODO: other encoding patterns?
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
++i;
|
||||
}
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
Open Asset Import Library (assimp)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2006-2012, assimp team
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use of this software in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
|
||||
* Neither the name of the assimp team, nor the names of its
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior
|
||||
written permission of the assimp team.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_AI_STEPFILEENCODING_H
|
||||
#define INCLUDED_AI_STEPFILEENCODING_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Assimp {
|
||||
namespace STEP {
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Convert an ASCII STEP identifier with possibly escaped character
|
||||
// sequences using foreign encodings to plain UTF8.
|
||||
//
|
||||
// Return false if an error occurs, s may or may not be modified in
|
||||
// this case and could still contain escape sequences (even partly
|
||||
// escaped ones).
|
||||
bool StringToUTF8(std::string& s);
|
||||
|
||||
|
||||
} // ! STEP
|
||||
} // ! Assimp
|
||||
|
||||
#endif
|
|
@ -44,9 +44,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
#include "AssimpPCH.h"
|
||||
#include "STEPFileReader.h"
|
||||
#include "STEPFileEncoding.h"
|
||||
#include "TinyFormatter.h"
|
||||
#include "fast_atof.h"
|
||||
|
||||
|
||||
using namespace Assimp;
|
||||
namespace EXPRESS = STEP::EXPRESS;
|
||||
|
||||
|
@ -331,7 +333,6 @@ void STEP::ReadFile(DB& db,const EXPRESS::ConversionSchema& scheme,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
boost::shared_ptr<const EXPRESS::DataType> EXPRESS::DataType::Parse(const char*& inout,uint64_t line, const EXPRESS::ConversionSchema* schema /*= NULL*/)
|
||||
{
|
||||
|
@ -419,34 +420,12 @@ boost::shared_ptr<const EXPRESS::DataType> EXPRESS::DataType::Parse(const char*&
|
|||
|
||||
inout = cur + 1;
|
||||
|
||||
// very basic handling for escaped string sequences
|
||||
// http://doc.spatial.com/index.php?title=InterOp:Connect/STEP&redirect=no
|
||||
// UTF16: \X2\ ... \X0\
|
||||
// UTF32: \X4\ ... \X0\
|
||||
// Mac: \X8\xx (xx is a hex sequence)
|
||||
// cp1252: \S\X (X is the character remapped to [0,127])
|
||||
// ? more of them ?
|
||||
|
||||
// Note: assimp is supposed to output UTF8 strings
|
||||
|
||||
// assimp is supposed to output UTF8 strings, so we have to deal
|
||||
// with foreign encodings.
|
||||
std::string stemp = std::string(start, static_cast<size_t>(cur - start));
|
||||
for (size_t i = 0; i < stemp.size(); ++i) {
|
||||
if (stemp[i] == '\\') {
|
||||
if (i+3 < stemp.size() && stemp[i+1] == 'S' && stemp[i+2] == '\\') {
|
||||
// http://stackoverflow.com/questions/5586214/how-to-convert-char-from-iso-8859-1-to-utf-8-in-c-multiplatformly
|
||||
ai_assert((uint8_t)stemp[i+3] < 0x80);
|
||||
const uint8_t ch = stemp[i+3] + 0x80;
|
||||
|
||||
stemp[i] = 0xc0 | (ch & 0xc0) >> 6;
|
||||
stemp[i+1] = 0x80 | (ch & 0x3f);
|
||||
|
||||
stemp.erase(i + 2,2);
|
||||
++i;
|
||||
}
|
||||
else if (i+2 < stemp.size() && stemp[i+1] == 'X' && IsNumeric(stemp[i+2])) {
|
||||
// TODO: warn
|
||||
}
|
||||
}
|
||||
if(!StringToUTF8(stemp)) {
|
||||
// TODO: route this to a correct logger with line numbers etc., better error messages
|
||||
DefaultLogger::get()->error("an error occurred reading escape sequences in ASCII text");
|
||||
}
|
||||
|
||||
return boost::make_shared<EXPRESS::STRING>(stemp);
|
||||
|
|
Loading…
Reference in New Issue