diff --git a/code/ACLoader.cpp b/code/ACLoader.cpp index 62bcac519..98c8780ce 100644 --- a/code/ACLoader.cpp +++ b/code/ACLoader.cpp @@ -726,19 +726,18 @@ void AC3DImporter::InternReadFile( const std::string& pFile, if( file.get() == NULL) throw new ImportErrorException( "Failed to open AC3D file " + pFile + "."); - const unsigned int fileSize = (unsigned int)file->FileSize(); - // allocate storage and copy the contents of the file to a memory buffer - std::vector mBuffer2(fileSize+1); - file->Read(&mBuffer2[0], 1, fileSize); - mBuffer2[fileSize] = '\0'; + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); + buffer = &mBuffer2[0]; mNumMeshes = 0; lights = polys = worlds = groups = 0; - if (::strncmp(buffer,"AC3D",4)) + if (::strncmp(buffer,"AC3D",4)) { throw new ImportErrorException("AC3D: No valid AC3D file, magic sequence not found"); + } // print the file format version to the console unsigned int version = HexDigitToDecimal( buffer[4] ); diff --git a/code/ASELoader.cpp b/code/ASELoader.cpp index 07e4a14c1..2cd1f71ec 100644 --- a/code/ASELoader.cpp +++ b/code/ASELoader.cpp @@ -108,18 +108,13 @@ void ASEImporter::InternReadFile( const std::string& pFile, boost::scoped_ptr file( pIOHandler->Open( pFile, "rb")); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open ASE file " + pFile + "."); - - size_t fileSize = file->FileSize(); - if (!fileSize) - throw new ImportErrorException( "ASE: File is empty"); + } // Allocate storage and copy the contents of the file to a memory buffer - // (terminate it with zero) - std::vector mBuffer2(fileSize+1); - file->Read( &mBuffer2[0], 1, fileSize); - mBuffer2[fileSize] = '\0'; + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); this->mBuffer = &mBuffer2[0]; this->pcScene = pScene; @@ -131,8 +126,8 @@ void ASEImporter::InternReadFile( const std::string& pFile, // ------------------------------------------------------------------ unsigned int defaultFormat; std::string::size_type s = pFile.length()-1; - switch (pFile.c_str()[s]) - { + switch (pFile.c_str()[s]) { + case 'C': case 'c': defaultFormat = AI_ASE_OLD_FILE_FORMAT; @@ -150,8 +145,8 @@ void ASEImporter::InternReadFile( const std::string& pFile, // Check whether we god at least one mesh. If we did - generate // materials and copy meshes. // ------------------------------------------------------------------ - if ( !mParser->m_vMeshes.empty()) - { + if ( !mParser->m_vMeshes.empty()) { + // If absolutely no material has been loaded from the file // we need to generate a default material GenerateDefaultMaterial(); @@ -161,12 +156,15 @@ void ASEImporter::InternReadFile( const std::string& pFile, std::vector avOutMeshes; avOutMeshes.reserve(mParser->m_vMeshes.size()*2); for (std::vector::iterator i = mParser->m_vMeshes.begin();i != mParser->m_vMeshes.end();++i) { - if ((*i).bSkip)continue; + if ((*i).bSkip) { + continue; + } BuildUniqueRepresentation(*i); // Need to generate proper vertex normals if necessary - if(GenerateNormals(*i)) + if(GenerateNormals(*i)) { tookNormals = true; + } // Convert all meshes to aiMesh objects ConvertMeshes(*i,avOutMeshes); @@ -181,7 +179,9 @@ void ASEImporter::InternReadFile( const std::string& pFile, pScene->mNumMeshes = (unsigned int)avOutMeshes.size(); aiMesh** pp = pScene->mMeshes = new aiMesh*[pScene->mNumMeshes]; for (std::vector::const_iterator i = avOutMeshes.begin();i != avOutMeshes.end();++i) { - if (!(*i)->mNumFaces)continue; + if (!(*i)->mNumFaces) { + continue; + } *pp++ = *i; } pScene->mNumMeshes = (unsigned int)(pp - pScene->mMeshes); diff --git a/code/BaseImporter.cpp b/code/BaseImporter.cpp index 56925531e..69bba33bd 100644 --- a/code/BaseImporter.cpp +++ b/code/BaseImporter.cpp @@ -195,8 +195,9 @@ void BaseImporter::SetupProperties(const Importer* pImp) { ai_assert(size <= 16 && _magic); - if (!pIOHandler) + if (!pIOHandler) { return false; + } const char* magic = (const char*)_magic; boost::scoped_ptr pStream (pIOHandler->Open(pFile)); @@ -207,8 +208,9 @@ void BaseImporter::SetupProperties(const Importer* pImp) // read 'size' characters from the file char data[16]; - if(size != pStream->Read(data,1,size)) + if(size != pStream->Read(data,1,size)) { return false; + } for (unsigned int i = 0; i < num; ++i) { // also check against big endian versions of tokens with size 2,4 @@ -217,19 +219,22 @@ void BaseImporter::SetupProperties(const Importer* pImp) if (2 == size) { int16_t rev = *((int16_t*)magic); ByteSwap::Swap(&rev); - if (*((int16_t*)data) == ((int16_t*)magic)[i] || *((int16_t*)data) == rev) + if (*((int16_t*)data) == ((int16_t*)magic)[i] || *((int16_t*)data) == rev) { return true; + } } else if (4 == size) { int32_t rev = *((int32_t*)magic); ByteSwap::Swap(&rev); - if (*((int32_t*)data) == ((int32_t*)magic)[i] || *((int32_t*)data) == rev) + if (*((int32_t*)data) == ((int32_t*)magic)[i] || *((int32_t*)data) == rev) { return true; + } } else { // any length ... just compare - if(!::memcmp(magic,data,size)) + if(!memcmp(magic,data,size)) { return true; + } } magic += size; } @@ -237,6 +242,123 @@ void BaseImporter::SetupProperties(const Importer* pImp) return false; } +#include "../contrib/ConvertUTF/ConvertUTF.h" + +// ------------------------------------------------------------------------------------------------ +void ReportResult(ConversionResult res) +{ + if(res == sourceExhausted) { + DefaultLogger::get()->error("Source ends with incomplete character sequence, Unicode transformation to UTF-8 fails"); + } + else if(res == sourceIllegal) { + DefaultLogger::get()->error("Source contains illegal character sequence, Unicode transformation to UTF-8 fails"); + } +} + +// ------------------------------------------------------------------------------------------------ +// Convert to UTF8 data +void BaseImporter::ConvertToUTF8(std::vector& data) +{ + ConversionResult result; + if(data.size() < 8) { + throw new ImportErrorException("File is too small"); + } + + // UTF 8 with BOM + if((uint8_t)data[0] == 0xEF && (uint8_t)data[1] == 0xBB && (uint8_t)data[2] == 0xBF) { + DefaultLogger::get()->debug("Found UTF-8 BOM ..."); + + std::copy(data.begin()+3,data.end(),data.begin()); + data.resize(data.size()-3); + return; + } + + // UTF 32 BE with BOM + if(*((uint32_t*)&data.front()) == 0xFFFE0000) { + + // swap the endianess .. + for(uint32_t* p = (uint32_t*)&data.front(), *end = (uint32_t*)&data.back(); p <= end; ++p) { + AI_SWAP4P(p); + } + } + + // UTF 32 LE with BOM + if(*((uint32_t*)&data.front()) == 0x0000FFFE) { + DefaultLogger::get()->debug("Found UTF-32 BOM ..."); + + const uint32_t* sstart = (uint32_t*)&data.front()+1, *send = (uint32_t*)&data.back()+1; + char* dstart,*dend; + std::vector output; + do { + output.resize(output.size()?output.size()*3/2:data.size()/2); + dstart = &output.front(),dend = &output.back()+1; + + result = ConvertUTF32toUTF8((const UTF32**)&sstart,(const UTF32*)send,(UTF8**)&dstart,(UTF8*)dend,lenientConversion); + } while(result == targetExhausted); + + ReportResult(result); + + // copy to output buffer. + const size_t outlen = (size_t)(dstart-&output.front()); + data.assign(output.begin(),output.begin()+outlen); + return; + } + + // UTF 16 BE with BOM + if(*((uint16_t*)&data.front()) == 0xFFFE) { + + // swap the endianess .. + for(uint16_t* p = (uint16_t*)&data.front(), *end = (uint16_t*)&data.back(); p <= end; ++p) { + ByteSwap::Swap2(p); + } + } + + // UTF 16 LE with BOM + if(*((uint16_t*)&data.front()) == 0xFEFF) { + DefaultLogger::get()->debug("Found UTF-16 BOM ..."); + + const uint16_t* sstart = (uint16_t*)&data.front()+1, *send = (uint16_t*)&data.back()+1; + char* dstart,*dend; + std::vector output; + do { + output.resize(output.size()?output.size()*3/2:data.size()*3/4); + dstart = &output.front(),dend = &output.back()+1; + + result = ConvertUTF16toUTF8((const UTF16**)&sstart,(const UTF16*)send,(UTF8**)&dstart,(UTF8*)dend,lenientConversion); + } while(result == targetExhausted); + + ReportResult(result); + + // copy to output buffer. + const size_t outlen = (size_t)(dstart-&output.front()); + data.assign(output.begin(),output.begin()+outlen); + return; + } +} + +// ------------------------------------------------------------------------------------------------ +void BaseImporter::TextFileToBuffer(IOStream* stream, + std::vector& data) +{ + ai_assert(NULL != stream); + + const size_t fileSize = stream->FileSize(); + if(!fileSize) { + throw new ImportErrorException("File is empty"); + } + + data.reserve(fileSize+1); + data.resize(fileSize); + if(fileSize != stream->Read( &data[0], 1, fileSize)) { + throw new ImportErrorException("File read error"); + } + + ConvertToUTF8(data); + + // append a binary zero to simplify string parsing + data.push_back(0); +} + // ------------------------------------------------------------------------------------------------ namespace Assimp { diff --git a/code/BaseImporter.h b/code/BaseImporter.h index 706454927..0409b90b1 100644 --- a/code/BaseImporter.h +++ b/code/BaseImporter.h @@ -201,7 +201,6 @@ public: */ aiScene* ReadFile( const std::string& pFile, IOSystem* pIOHandler); - // ------------------------------------------------------------------- /** Returns the error description of the last error that occured. * @return A description of the last error that occured. An empty @@ -211,7 +210,6 @@ public: return mErrorText; } - // ------------------------------------------------------------------- /** Called prior to ReadFile(). * The function is a request to the importer to update its configuration @@ -283,6 +281,7 @@ protected: virtual void InternReadFile( const std::string& pFile, aiScene* pScene, IOSystem* pIOHandler) = 0; +public: // static utilities // ------------------------------------------------------------------- /** A utility for CanRead(). @@ -345,20 +344,24 @@ protected: unsigned int offset = 0, unsigned int size = 4); -#if 0 /** TODO **/ // ------------------------------------------------------------------- /** An utility for all text file loaders. It converts a file to our - * ASCII/UTF8 character set. Special unicode characters are lost. - * - * @param buffer Input buffer. Needn't be terminated with zero. - * @param length Length of the input buffer, in bytes. Receives the - * number of output characters, excluding the terminal char. - * @return true if the source format did not match our internal - * format so it was converted. - */ - static bool ConvertToUTF8(const char* buffer, - unsigned int& length); -#endif + * UTF8 character set. Errors are reported, but ignored. + * + * @param data File buffer to be converted to UTF8 data. The buffer + * is resized as appropriate. */ + static void ConvertToUTF8(std::vector& data); + + // ------------------------------------------------------------------- + /** Utility for text file loaders which copies the contents of the + * file into a memory buffer and converts it to our UTF8 + * representation. + * @param stream Stream to read from. + * @param data Output buffer to be resized and filled with the + * converted text file data. The buffer is terminated with + * a binary 0. */ + static void TextFileToBuffer(IOStream* stream, + std::vector& data); protected: diff --git a/code/CMakeLists.txt b/code/CMakeLists.txt index af8e56932..1ea385e99 100644 --- a/code/CMakeLists.txt +++ b/code/CMakeLists.txt @@ -298,6 +298,11 @@ SOURCE_GROUP( IrrXML FILES ../contrib/irrXML/irrXML.h ) +SOURCE_GROUP( ConvertUTF FILES + ../contrib/ConvertUTF/ConvertUTF.h + ../contrib/ConvertUTF/ConvertUTF.c +) + SOURCE_GROUP( zlib FILES ../contrib/zlib/adler32.c ../contrib/zlib/compress.c @@ -567,6 +572,7 @@ ADD_LIBRARY( assimp SHARED ../contrib/zlib/zlib.h ../contrib/zlib/zutil.c ../contrib/zlib/zutil.h + ../contrib/ConvertUTF/ConvertUTF.c ) ADD_DEFINITIONS(-DASSIMP_BUILD_DLL_EXPORT) diff --git a/code/CSMLoader.cpp b/code/CSMLoader.cpp index ae4bb71fa..3e30db3c0 100644 --- a/code/CSMLoader.cpp +++ b/code/CSMLoader.cpp @@ -103,15 +103,13 @@ void CSMImporter::InternReadFile( const std::string& pFile, boost::scoped_ptr file( pIOHandler->Open( pFile, "rb")); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open CSM file " + pFile + "."); - - size_t fileSize = file->FileSize(); + } // allocate storage and copy the contents of the file to a memory buffer - std::vector mBuffer2(fileSize+1); - file->Read(&mBuffer2[0], 1, fileSize);mBuffer2[fileSize] = '\0'; - + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); const char* buffer = &mBuffer2[0]; aiAnimation* anim = new aiAnimation(); diff --git a/code/DXFLoader.cpp b/code/DXFLoader.cpp index b2776d95a..0bd5773b9 100644 --- a/code/DXFLoader.cpp +++ b/code/DXFLoader.cpp @@ -161,15 +161,14 @@ void DXFImporter::InternReadFile( const std::string& pFile, boost::scoped_ptr file( pIOHandler->Open( pFile)); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open DXF file " + pFile + ""); + } // read the contents of the file in a buffer - size_t m = file->FileSize(); - std::vector buffer2(m+1); + std::vector buffer2; + TextFileToBuffer(file.get(),buffer2); buffer = &buffer2[0]; - file->Read( &buffer2[0], m,1); - buffer2[m] = '\0'; bRepeat = false; mDefaultLayer = NULL; @@ -216,7 +215,7 @@ void DXFImporter::InternReadFile( const std::string& pFile, throw new ImportErrorException("DXF: this file contains no 3d data"); pScene->mMeshes = new aiMesh*[ pScene->mNumMeshes ]; - m = 0; + unsigned int m = 0; for (std::vector::const_iterator it = mLayers.begin(),end = mLayers.end();it != end;++it) { if ((*it).vPositions.empty()) { continue; @@ -288,7 +287,7 @@ void DXFImporter::InternReadFile( const std::string& pFile, for (m = 0; m < pScene->mRootNode->mNumChildren;++m) { aiNode* p = pScene->mRootNode->mChildren[m] = new aiNode(); p->mName.length = ::strlen( mLayers[m].name ); - ::strcpy(p->mName.data, mLayers[m].name); + strcpy(p->mName.data, mLayers[m].name); p->mMeshes = new unsigned int[p->mNumMeshes = 1]; p->mMeshes[0] = m; diff --git a/code/LWSLoader.cpp b/code/LWSLoader.cpp index 45b953b1e..278cffe57 100644 --- a/code/LWSLoader.cpp +++ b/code/LWSLoader.cpp @@ -469,13 +469,13 @@ void LWSImporter::InternReadFile( const std::string& pFile, aiScene* pScene, boost::scoped_ptr file( pIOHandler->Open( pFile, "rb")); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open LWS file " + pFile + "."); + } // Allocate storage and copy the contents of the file to a memory buffer - const size_t fileSize = file->FileSize(); - std::vector< char > mBuffer(fileSize); - file->Read( &mBuffer[0], 1, fileSize); + std::vector< char > mBuffer; + TextFileToBuffer(file.get(),mBuffer); // Parse the file structure LWS::Element root; const char* dummy = &mBuffer[0]; diff --git a/code/NFFLoader.cpp b/code/NFFLoader.cpp index 983119808..2c332d07e 100644 --- a/code/NFFLoader.cpp +++ b/code/NFFLoader.cpp @@ -117,8 +117,7 @@ void NFFImporter::LoadNFF2MaterialTable(std::vector& output, boost::scoped_ptr file( pIOHandler->Open( path, "rb")); // Check whether we can read from the file - if( !file.get()) - { + if( !file.get()) { DefaultLogger::get()->error("NFF2: Unable to open material library " + path + "."); return; } @@ -129,16 +128,14 @@ void NFFImporter::LoadNFF2MaterialTable(std::vector& output, // allocate storage and copy the contents of the file to a memory buffer // (terminate it with zero) std::vector mBuffer2(m+1); - file->Read(&mBuffer2[0],m,1); + TextFileToBuffer(file.get(),mBuffer2); const char* buffer = &mBuffer2[0]; - mBuffer2[m] = '\0'; // First of all: remove all comments from the file CommentRemover::RemoveLineComments("//",&mBuffer2[0]); // The file should start with the magic sequence "mat" - if (!TokenMatch(buffer,"mat",3)) - { + if (!TokenMatch(buffer,"mat",3)) { DefaultLogger::get()->error("NFF2: Not a valid material library " + path + "."); return; } @@ -229,13 +226,11 @@ void NFFImporter::InternReadFile( const std::string& pFile, // allocate storage and copy the contents of the file to a memory buffer // (terminate it with zero) - std::vector mBuffer2(m+1); - file->Read(&mBuffer2[0],m,1); + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); const char* buffer = &mBuffer2[0]; - mBuffer2[m] = '\0'; - // mesh arrays - separate here to make the handling of - // the pointers below easier. + // mesh arrays - separate here to make the handling of the pointers below easier. std::vector meshes; std::vector meshesWithNormals; std::vector meshesWithUVCoords; diff --git a/code/OFFLoader.cpp b/code/OFFLoader.cpp index 4eb64a891..4d76cf734 100644 --- a/code/OFFLoader.cpp +++ b/code/OFFLoader.cpp @@ -90,25 +90,25 @@ void OFFImporter::GetExtensionList(std::string& append) // ------------------------------------------------------------------------------------------------ // Imports the given file into the given scene structure. void OFFImporter::InternReadFile( const std::string& pFile, - aiScene* pScene, IOSystem* pIOHandler) + aiScene* pScene, IOSystem* pIOHandler) { boost::scoped_ptr file( pIOHandler->Open( pFile, "rb")); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open OFF file " + pFile + "."); - - unsigned int fileSize = (unsigned int)file->FileSize(); - + } + // allocate storage and copy the contents of the file to a memory buffer - std::vector mBuffer2(fileSize+1); - file->Read(&mBuffer2[0], 1, fileSize); - mBuffer2[fileSize] = '\0'; + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); const char* buffer = &mBuffer2[0]; char line[4096]; GetNextLine(buffer,line); - if ('O' == line[0])GetNextLine(buffer,line); // skip the 'OFF' line + if ('O' == line[0]) { + GetNextLine(buffer,line); // skip the 'OFF' line + } const char* sz = line; SkipSpaces(&sz); const unsigned int numVertices = strtol10(sz,&sz);SkipSpaces(&sz); diff --git a/code/ObjFileImporter.cpp b/code/ObjFileImporter.cpp index e137a23b7..75d1af19b 100644 --- a/code/ObjFileImporter.cpp +++ b/code/ObjFileImporter.cpp @@ -101,10 +101,7 @@ void ObjFileImporter::InternReadFile( const std::string& pFile, aiScene* pScene, throw new ImportErrorException( "OBJ-file is too small."); // Allocate buffer and read file into it - m_Buffer.resize( fileSize + 1 ); - m_Buffer[ fileSize ] = '\0'; - const size_t readsize = file->Read( &m_Buffer.front(), sizeof(char), fileSize ); - assert( readsize == fileSize ); + TextFileToBuffer(file.get(),m_Buffer); // std::string strDirectory( 1, io.getOsSeparator() ), strModelName; @@ -124,6 +121,9 @@ void ObjFileImporter::InternReadFile( const std::string& pFile, aiScene* pScene, // And create the proper return structures out of it CreateDataFromImport(parser.GetModel(), pScene); + + // Clean up allocated storage for the next import + m_Buffer.clear(); } // ------------------------------------------------------------------------------------------------ diff --git a/code/ObjFileParser.cpp b/code/ObjFileParser.cpp index 4648f3571..562f0edfc 100644 --- a/code/ObjFileParser.cpp +++ b/code/ObjFileParser.cpp @@ -452,10 +452,8 @@ void ObjFileParser::getMaterialLib() } // Import material library data from file - size_t size = pFile->FileSize(); - std::vector buffer( size + 1 ); - buffer[ size ] = '\0'; - pFile->Read( &buffer[ 0 ], sizeof( char ), size ); + std::vector buffer; + BaseImporter::TextFileToBuffer(pFile,buffer); io->Close( pFile ); // Importing the material library diff --git a/code/PlyLoader.cpp b/code/PlyLoader.cpp index 806d3c750..d99322fe7 100644 --- a/code/PlyLoader.cpp +++ b/code/PlyLoader.cpp @@ -87,27 +87,20 @@ void PLYImporter::GetExtensionList(std::string& append) // ------------------------------------------------------------------------------------------------ // Imports the given file into the given scene structure. -void PLYImporter::InternReadFile( - const std::string& pFile, aiScene* pScene, IOSystem* pIOHandler) +void PLYImporter::InternReadFile( const std::string& pFile, + aiScene* pScene, IOSystem* pIOHandler) { boost::scoped_ptr file( pIOHandler->Open( pFile)); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open PLY file " + pFile + "."); - - // check whether the ply file is large enough to contain - // at least the file header - size_t fileSize = file->FileSize(); - if( fileSize < 10) - throw new ImportErrorException( "PLY File is too small."); + } // allocate storage and copy the contents of the file to a memory buffer - // (terminate it with zero) - std::vector mBuffer2(fileSize+1); - file->Read( &mBuffer2[0], 1, fileSize); - mBuffer = &mBuffer2[0]; - mBuffer[fileSize] = '\0'; + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); + mBuffer = (unsigned char*)&mBuffer2[0]; // the beginning of the file must be PLY - magic, magic if (mBuffer[0] != 'P' && mBuffer[0] != 'p' || diff --git a/code/RawLoader.cpp b/code/RawLoader.cpp index 1a2e89b6d..c50e51093 100644 --- a/code/RawLoader.cpp +++ b/code/RawLoader.cpp @@ -85,17 +85,14 @@ void RAWImporter::InternReadFile( const std::string& pFile, boost::scoped_ptr file( pIOHandler->Open( pFile, "rb")); // Check whether we can read from the file - if( file.get() == NULL) + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open RAW file " + pFile + "."); - - unsigned int fileSize = (unsigned int)file->FileSize(); + } // allocate storage and copy the contents of the file to a memory buffer // (terminate it with zero) - std::vector mBuffer2(fileSize+1); - - file->Read(&mBuffer2[0], 1, fileSize); - mBuffer2[fileSize] = '\0'; + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); const char* buffer = &mBuffer2[0]; // list of groups loaded from the file diff --git a/code/SMDLoader.cpp b/code/SMDLoader.cpp index 226b7f2d0..3dfde4b1e 100644 --- a/code/SMDLoader.cpp +++ b/code/SMDLoader.cpp @@ -99,8 +99,7 @@ void SMDImporter::InternReadFile( boost::scoped_ptr file( pIOHandler->Open( pFile, "rt")); // Check whether we can read from the file - if( file.get() == NULL) - { + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open SMD/VTA file " + pFile + "."); } @@ -110,8 +109,7 @@ void SMDImporter::InternReadFile( this->pScene = pScene; std::vector buff(iFileSize+1); - file->Read( &buff[0], 1, iFileSize); - buff[iFileSize] = '\0'; + TextFileToBuffer(file.get(),buff); mBuffer = &buff[0]; iSmallestFrame = (1 << 31); diff --git a/code/STLLoader.cpp b/code/STLLoader.cpp index 60baf3ce7..8f3697af1 100644 --- a/code/STLLoader.cpp +++ b/code/STLLoader.cpp @@ -87,25 +87,22 @@ void STLImporter::GetExtensionList(std::string& append) // ------------------------------------------------------------------------------------------------ // Imports the given file into the given scene structure. -void STLImporter::InternReadFile( - const std::string& pFile, aiScene* pScene, IOSystem* pIOHandler) +void STLImporter::InternReadFile( const std::string& pFile, + aiScene* pScene, IOSystem* pIOHandler) { boost::scoped_ptr file( pIOHandler->Open( pFile, "rb")); // Check whether we can read from the file - if( file.get() == NULL) - { + if( file.get() == NULL) { throw new ImportErrorException( "Failed to open STL file " + pFile + "."); } - this->fileSize = (unsigned int)file->FileSize(); + fileSize = (unsigned int)file->FileSize(); // allocate storage and copy the contents of the file to a memory buffer // (terminate it with zero) - std::vector mBuffer2(fileSize+1); - - file->Read(&mBuffer2[0], 1, fileSize); - mBuffer2[fileSize] = '\0'; + std::vector mBuffer2; + TextFileToBuffer(file.get(),mBuffer2); this->pScene = pScene; this->mBuffer = &mBuffer2[0]; @@ -129,18 +126,20 @@ void STLImporter::InternReadFile( // check whether the file starts with 'solid' - // in this case we can simply assume it IS a text file. finished. - if (!::strncmp(mBuffer,"solid",5)) - this->LoadASCIIFile(); - else bMatClr = this->LoadBinaryFile(); + if (!::strncmp(mBuffer,"solid",5)) { + LoadASCIIFile(); + } + else bMatClr = LoadBinaryFile(); // now copy faces pMesh->mFaces = new aiFace[pMesh->mNumFaces]; - for (unsigned int i = 0, p = 0; i < pMesh->mNumFaces;++i) - { + for (unsigned int i = 0, p = 0; i < pMesh->mNumFaces;++i) { + aiFace& face = pMesh->mFaces[i]; face.mIndices = new unsigned int[face.mNumIndices = 3]; - for (unsigned int o = 0; o < 3;++o,++p) + for (unsigned int o = 0; o < 3;++o,++p) { face.mIndices[o] = p; + } } // create a single default material - everything white, as we have vertex colors @@ -150,7 +149,9 @@ void STLImporter::InternReadFile( pcMat->AddProperty(&s, AI_MATKEY_NAME); aiColor4D clrDiffuse(1.0f,1.0f,1.0f,1.0f); - if (bMatClr)clrDiffuse = this->clrColorDefault; + if (bMatClr) { + clrDiffuse = clrColorDefault; + } pcMat->AddProperty(&clrDiffuse,1,AI_MATKEY_COLOR_DIFFUSE); pcMat->AddProperty(&clrDiffuse,1,AI_MATKEY_COLOR_SPECULAR); clrDiffuse = aiColor4D(0.05f,0.05f,0.05f,1.0f); @@ -169,14 +170,16 @@ void STLImporter::LoadASCIIFile() const char* sz = mBuffer + 5; // skip the "solid" SkipSpaces(&sz); const char* szMe = sz; - while (!::IsSpaceOrNewLine(*sz))sz++; - unsigned int temp; + while (!::IsSpaceOrNewLine(*sz)) { + sz++; + } + size_t temp; // setup the name of the node - if ((temp = (unsigned int)(sz-szMe))) - { + if ((temp = (size_t)(sz-szMe))) { + pScene->mRootNode->mName.length = temp; - ::memcpy(pScene->mRootNode->mName.data,szMe,temp); + memcpy(pScene->mRootNode->mName.data,szMe,temp); pScene->mRootNode->mName.data[temp] = '\0'; } else pScene->mRootNode->mName.Set(""); @@ -185,7 +188,7 @@ void STLImporter::LoadASCIIFile() // assume we'll need 160 bytes for each face pMesh->mNumVertices = ( pMesh->mNumFaces = fileSize / 160 ) * 3; pMesh->mVertices = new aiVector3D[pMesh->mNumVertices]; - pMesh->mNormals = new aiVector3D[pMesh->mNumVertices]; + pMesh->mNormals = new aiVector3D[pMesh->mNumVertices]; unsigned int curFace = 0, curVertex = 3; while (true) @@ -198,11 +201,12 @@ void STLImporter::LoadASCIIFile() break; } // facet normal -0.13 -0.13 -0.98 - if (!::strncmp(sz,"facet",5) && ::IsSpaceOrNewLine(*(sz+5))) - { - if (3 != curVertex)DefaultLogger::get()->warn("STL: A new facet begins but the old is not yet complete"); - if (pMesh->mNumFaces == curFace) - { + if (!strncmp(sz,"facet",5) && IsSpaceOrNewLine(*(sz+5))) { + + if (3 != curVertex) { + DefaultLogger::get()->warn("STL: A new facet begins but the old is not yet complete"); + } + if (pMesh->mNumFaces == curFace) { // need to resize the arrays, our size estimate was wrong unsigned int iNeededSize = (unsigned int)(sz-mBuffer) / pMesh->mNumFaces; if (iNeededSize <= 160)iNeededSize >>= 1; // prevent endless looping @@ -210,11 +214,11 @@ void STLImporter::LoadASCIIFile() add += add >> 3; // add 12.5% as buffer iNeededSize = (pMesh->mNumFaces + add)*3; aiVector3D* pv = new aiVector3D[iNeededSize]; - ::memcpy(pv,pMesh->mVertices,pMesh->mNumVertices*sizeof(aiVector3D)); + memcpy(pv,pMesh->mVertices,pMesh->mNumVertices*sizeof(aiVector3D)); delete[] pMesh->mVertices; pMesh->mVertices = pv; pv = new aiVector3D[iNeededSize]; - ::memcpy(pv,pMesh->mNormals,pMesh->mNumVertices*sizeof(aiVector3D)); + memcpy(pv,pMesh->mNormals,pMesh->mNumVertices*sizeof(aiVector3D)); delete[] pMesh->mNormals; pMesh->mNormals = pv; @@ -226,8 +230,7 @@ void STLImporter::LoadASCIIFile() sz += 6; curVertex = 0; SkipSpaces(&sz); - if (::strncmp(sz,"normal",6)) - { + if (strncmp(sz,"normal",6)) { DefaultLogger::get()->warn("STL: a facet normal vector was expected but not found"); } else @@ -244,10 +247,9 @@ void STLImporter::LoadASCIIFile() } } // vertex 1.50000 1.50000 0.00000 - else if (!::strncmp(sz,"vertex",6) && ::IsSpaceOrNewLine(*(sz+6))) + else if (!strncmp(sz,"vertex",6) && ::IsSpaceOrNewLine(*(sz+6))) { - if (3 == curVertex) - { + if (3 == curVertex) { DefaultLogger::get()->error("STL: a facet with more than 3 vertices has been found"); } else @@ -262,17 +264,17 @@ void STLImporter::LoadASCIIFile() sz = fast_atof_move(sz, (float&)vn->z ); } } - else if (!::strncmp(sz,"endsolid",8)) - { + else if (!::strncmp(sz,"endsolid",8)) { // finished! break; } // else skip the whole identifier - else while (!::IsSpaceOrNewLine(*sz))++sz; + else while (!::IsSpaceOrNewLine(*sz)) { + ++sz; + } } - if (!curFace) - { + if (!curFace) { pMesh->mNumFaces = 0; throw new ImportErrorException("STL: ASCII file is empty or invalid; no data loaded"); } @@ -280,31 +282,32 @@ void STLImporter::LoadASCIIFile() pMesh->mNumVertices = curFace*3; // we are finished! } + // ------------------------------------------------------------------------------------------------ // Read a binary STL file bool STLImporter::LoadBinaryFile() { // skip the first 80 bytes - if (fileSize < 84) + if (fileSize < 84) { throw new ImportErrorException("STL: file is too small for the header"); - + } bool bIsMaterialise = false; // search for an occurence of "COLOR=" in the header const char* sz2 = (const char*)mBuffer; const char* const szEnd = sz2+80; - while (sz2 < szEnd) - { + while (sz2 < szEnd) { + if ('C' == *sz2++ && 'O' == *sz2++ && 'L' == *sz2++ && - 'O' == *sz2++ && 'R' == *sz2++ && '=' == *sz2++) - { + 'O' == *sz2++ && 'R' == *sz2++ && '=' == *sz2++) { + // read the default vertex color for facets bIsMaterialise = true; DefaultLogger::get()->info("STL: Taking code path for Materialise files"); - this->clrColorDefault.r = (*sz2++) / 255.0f; - this->clrColorDefault.g = (*sz2++) / 255.0f; - this->clrColorDefault.b = (*sz2++) / 255.0f; - this->clrColorDefault.a = (*sz2++) / 255.0f; + clrColorDefault.r = (*sz2++) / 255.0f; + clrColorDefault.g = (*sz2++) / 255.0f; + clrColorDefault.b = (*sz2++) / 255.0f; + clrColorDefault.a = (*sz2++) / 255.0f; break; } } @@ -317,10 +320,13 @@ bool STLImporter::LoadBinaryFile() pMesh->mNumFaces = *((uint32_t*)sz); sz += 4; - if (fileSize < 84 + pMesh->mNumFaces*50) - throw new ImportErrorException("STL: file is too small to keep all facets"); - if (!pMesh->mNumFaces) + if (fileSize < 84 + pMesh->mNumFaces*50) { + throw new ImportErrorException("STL: file is too small to hold all facets"); + } + + if (!pMesh->mNumFaces) { throw new ImportErrorException("STL: file is empty. There are no facets defined"); + } pMesh->mNumVertices = pMesh->mNumFaces*3; @@ -328,9 +334,9 @@ bool STLImporter::LoadBinaryFile() vp = pMesh->mVertices = new aiVector3D[pMesh->mNumVertices]; vn = pMesh->mNormals = new aiVector3D[pMesh->mNumVertices]; - for (unsigned int i = 0; i < pMesh->mNumFaces;++i) - { - // NOTE: Blender sometimes writes empty normals this is not + for (unsigned int i = 0; i < pMesh->mNumFaces;++i) { + + // NOTE: Blender sometimes writes empty normals ... this is not // our fault ... the RemoveInvalidData helper step should fix that *vn = *((aiVector3D*)sz); sz += sizeof(aiVector3D); diff --git a/code/UnrealLoader.cpp b/code/UnrealLoader.cpp index 3ca30996d..bfabd37fb 100644 --- a/code/UnrealLoader.cpp +++ b/code/UnrealLoader.cpp @@ -217,11 +217,8 @@ void UnrealImporter::InternReadFile( const std::string& pFile, boost::scoped_ptr pb (pIOHandler->Open(uc_path)); if (pb.get()) { - size_t s = pb->FileSize(); - std::vector _data(s+1); - pb->Read(&_data[0],s,1); - - _data[s] = 0; + std::vector _data; + TextFileToBuffer(pb.get(),_data); const char* data = &_data[0]; std::vector< std::pair< std::string,std::string > > tempTextures; diff --git a/code/XFileImporter.cpp b/code/XFileImporter.cpp index b7508b06e..c56e2d955 100644 --- a/code/XFileImporter.cpp +++ b/code/XFileImporter.cpp @@ -96,8 +96,10 @@ void XFileImporter::InternReadFile( const std::string& pFile, aiScene* pScene, I if( fileSize < 16) throw new ImportErrorException( "XFile is too small."); + // in the hope that binary files will never start with a BOM ... mBuffer.resize( fileSize); file->Read( &mBuffer.front(), 1, fileSize); + ConvertToUTF8(mBuffer); // parse the file into a temporary representation XFileParser parser( mBuffer); diff --git a/code/XFileParser.cpp b/code/XFileParser.cpp index e482389c0..8a23f1bb9 100644 --- a/code/XFileParser.cpp +++ b/code/XFileParser.cpp @@ -249,8 +249,9 @@ XFileParser::XFileParser( const std::vector& pBuffer) ParseFile(); // filter the imported hierarchy for some degenerated cases - if( mScene->mRootNode) + if( mScene->mRootNode) { FilterHierarchy( mScene->mRootNode); + } } // ------------------------------------------------------------------------------------------------ diff --git a/code/irrXMLWrapper.h b/code/irrXMLWrapper.h index 6b6b60489..4ec2fe3ff 100644 --- a/code/irrXMLWrapper.h +++ b/code/irrXMLWrapper.h @@ -1,3 +1,42 @@ +/* +Open Asset Import Library (ASSIMP) +---------------------------------------------------------------------- + +Copyright (c) 2006-2008, ASSIMP Development Team +All rights reserved. + +Redistribution and use of this software in source and binary forms, +with or without modification, are permitted provided that the +following conditions are met: + +* Redistributions of source code must retain the above +copyright notice, this list of conditions and the +following disclaimer. + +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other +materials provided with the distribution. + +* Neither the name of the ASSIMP team, nor the names of its +contributors may be used to endorse or promote products +derived from this software without specific prior +written permission of the ASSIMP Development Team. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- +*/ #ifndef INCLUDED_AI_IRRXML_WRAPPER #define INCLUDED_AI_IRRXML_WRAPPER @@ -5,43 +44,69 @@ // some long includes .... #include "./../contrib/irrXML/irrXML.h" #include "./../include/IOStream.h" - -namespace Assimp -{ +namespace Assimp { // --------------------------------------------------------------------------------- /** @brief Utility class to make IrrXML work together with our custom IO system - * - * See the IrrXML docs for more details. - */ -class CIrrXML_IOStreamReader : public irr::io::IFileReadCallBack + * See the IrrXML docs for more details.*/ +class CIrrXML_IOStreamReader + : public irr::io::IFileReadCallBack { public: + // ---------------------------------------------------------------------------------- //! Construction from an existing IOStream CIrrXML_IOStreamReader(IOStream* _stream) : stream (_stream) - {} + , t (0) + { + // Map the buffer into memory and convert it to UTF8. IrrXML provides its + // own conversion, which is merely a cast from uintNN_t to uint8_t. Thus, + // it is not suitable for our purposes and we have to do it BEFORE IrrXML + // gets the buffer. Sadly, this forces as to map the whole file into + // memory. + + data.resize(stream->FileSize()); + stream->Read(&data[0],data.size(),1); + + BaseImporter::ConvertToUTF8(data); + } + + // ---------------------------------------------------------------------------------- //! Virtual destructor virtual ~CIrrXML_IOStreamReader() {}; + // ---------------------------------------------------------------------------------- //! Reads an amount of bytes from the file. /** @param buffer: Pointer to output buffer. * @param sizeToRead: Amount of bytes to read - * @return Returns how much bytes were read. - */ + * @return Returns how much bytes were read. */ virtual int read(void* buffer, int sizeToRead) { - return (int)stream->Read(buffer,1,sizeToRead); + if(sizeToRead<0) { + return 0; + } + if(t+sizeToRead>data.size()) { + sizeToRead = data.size()-t; + } + + memcpy(buffer,&data.front()+t,sizeToRead); + + t += sizeToRead; + return sizeToRead; } + // ---------------------------------------------------------------------------------- //! Returns size of file in bytes virtual int getSize() { - return (int)stream->FileSize(); + return (int)data.size(); } private: IOStream* stream; + std::vector data; + size_t t; + }; // ! class CIrrXML_IOStreamReader } // ! Assimp diff --git a/code/makefile b/code/makefile index a343d7211..ba9d13108 100644 --- a/code/makefile +++ b/code/makefile @@ -1,6 +1,3 @@ - -# UNTESTED!!!! - # Makefile for Open Asset Import Library (GNU-make) # aramis_acg@users.sourceforge.net @@ -23,8 +20,9 @@ OBJECTS := $(patsubst %.cpp,%.o, $(wildcard *.cpp)) OBJECTS += $(patsubst %.cpp,%.o, $(wildcard extra/*.cpp)) OBJECTS += $(patsubst %.cpp,%.o, $(wildcard ./../contrib/irrXML/*.cpp)) -# C object files (mainly from zlib) +# C object files OBJECTSC := $(patsubst %.c,%.oc, $(wildcard ./../contrib/zlib/*.c)) +OBJECTSC += $(patsubst %.c,%.oc, $(wildcard ./../contrib/ConvertUTF/*.c)) # Include flags for gcc INCLUDEFLAGS = diff --git a/code/makefile.mingw b/code/makefile.mingw index c6c761b7d..0d159f77c 100644 --- a/code/makefile.mingw +++ b/code/makefile.mingw @@ -23,8 +23,9 @@ OBJECTS := $(patsubst %.cpp,%.o, $(wildcard *.cpp)) OBJECTS += $(patsubst %.cpp,%.o, $(wildcard extra/*.cpp)) OBJECTS += $(patsubst %.cpp,%.o, $(wildcard ./../contrib/irrXML/*.cpp)) -# C object files (mainly from zlib) +# C object files OBJECTSC := $(patsubst %.c,%.oc, $(wildcard ./../contrib/zlib/*.c)) +OBJECTSC += $(patsubst %.c,%.oc, $(wildcard ./../contrib/ConvertUTF/*.c)) # Include flags for gcc INCLUDEFLAGS = diff --git a/contrib/ConvertUTF/ConvertUTF.c b/contrib/ConvertUTF/ConvertUTF.c new file mode 100644 index 000000000..9b3deebd6 --- /dev/null +++ b/contrib/ConvertUTF/ConvertUTF.c @@ -0,0 +1,539 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + + +#include "ConvertUTF.h" +#ifdef CVTUTF_DEBUG +#include +#endif + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define false 0 +#define true 1 + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG +if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); +} +#endif + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static Boolean isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (source+length > sourceEnd) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up the source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ diff --git a/contrib/ConvertUTF/ConvertUTF.h b/contrib/ConvertUTF/ConvertUTF.h new file mode 100644 index 000000000..e26491536 --- /dev/null +++ b/contrib/ConvertUTF/ConvertUTF.h @@ -0,0 +1,149 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Header file. + + Several funtions are included here, forming a complete set of + conversions between the three formats. UTF-7 is not included + here, but is handled in a separate source file. + + Each of these routines takes pointers to input buffers and output + buffers. The input buffers are const. + + Each routine converts the text between *sourceStart and sourceEnd, + putting the result into the buffer between *targetStart and + targetEnd. Note: the end pointers are *after* the last item: e.g. + *(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, + and if not, whether the problem was in the source or target buffers. + (Only the first encountered problem is indicated.) + + After the conversion, *sourceStart and *targetStart are both + updated to point to the end of last text successfully converted in + the respective buffers. + + Input parameters: + sourceStart - pointer to a pointer to the source buffer. + The contents of this are modified on return so that + it points at the next thing to be converted. + targetStart - similarly, pointer to pointer to the target buffer. + sourceEnd, targetEnd - respectively pointers to the ends of the + two buffers, for overflow checking only. + + These conversion functions take a ConversionFlags argument. When this + flag is set to strict, both irregular sequences and isolated surrogates + will cause an error. When the flag is set to lenient, both irregular + sequences and isolated surrogates are converted. + + Whether the flag is strict or lenient, all illegal sequences will cause + an error return. This includes sequences such as: , , + or in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + must check for illegal sequences. + + When the flag is set to lenient, characters over 0x10FFFF are converted + to the replacement character; otherwise (when the flag is set to strict) + they constitute an error. + + Output parameters: + The value "sourceIllegal" is returned from some routines if the input + sequence is malformed. When "sourceIllegal" is returned, the source + value will point to the illegal value that caused the problem. E.g., + in UTF-8 when a sequence is malformed, it points to the start of the + malformed sequence. + + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Fixes & updates, Sept 2001. + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- + The following 4 definitions are compiler-specific. + The C standard does not guarantee that wchar_t has at least + 16 bits, so wchar_t is no less portable than unsigned short! + All should be unsigned values to avoid sign extension during + bit mask & shift operations. +------------------------------------------------------------------------ */ + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ +typedef unsigned char Boolean; /* 0 or 1 */ + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +typedef enum { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +/* This is for C++ and does no harm in C */ +#ifdef __cplusplus +extern "C" { +#endif + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); + +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); + +#ifdef __cplusplus +} +#endif + +/* --------------------------------------------------------------------- */ diff --git a/contrib/ConvertUTF/readme.txt b/contrib/ConvertUTF/readme.txt new file mode 100644 index 000000000..b9f17fb81 --- /dev/null +++ b/contrib/ConvertUTF/readme.txt @@ -0,0 +1,43 @@ + +The accompanying C source code file "ConvertUTF.c" and the associated header +file "ConvertUTF.h" provide for conversion between various transformation +formats of Unicode characters. The following conversions are supported: + + UTF-32 to UTF-16 + UTF-32 to UTF-8 + UTF-16 to UTF-32 + UTF-16 to UTF-8 + UTF-8 to UTF-16 + UTF-8 to UTF-32 + +In addition, there is a test harness which runs various tests. + +The files "CVTUTF7.C" and "CVTUTF7.H" are for archival and historical purposes +only. They have not been updated to Unicode 3.0 or later and should be +considered obsolescent. "CVTUTF7.C" contains two functions that can convert +between UCS2 (i.e., the BMP characters only) and UTF-7. Surrogates are +not supported, the code has not been tested, and should be considered +unsuitable for general purpose use. + +Please submit any bug reports about these programs here: + + http://www.unicode.org/unicode/reporting.html + +Version 1.0: initial version. + +Version 1.1: corrected some minor problems; added stricter checks. + +Version 1.2: corrected switch statements associated with "extraBytesToRead" + in 4 & 5 byte cases, in functions for conversion from UTF8. + Note: formally, the 4 & 5 byte cases are illegal in the latest + UTF8, but the table and this code has always catered for those, + cases since at one time they were legal. + +Version 1.3: Updated UTF-8 legality check; + updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions + Updated UTF-8 legality tests in harness.c + + +Last update: October 19, 2004 + + diff --git a/include/aiTypes.h b/include/aiTypes.h index 307f45197..c90261970 100644 --- a/include/aiTypes.h +++ b/include/aiTypes.h @@ -255,10 +255,24 @@ struct aiColor4D #include "./Compiler/poppack1.h" // ---------------------------------------------------------------------------------- -/** Represents a string, zero byte terminated. +/** Represents an UTF-8 string, zero byte terminated. * - * We use this representation to be C-compatible. The length of such a string is - * limited to MAXLEN characters (excluding the terminal zero). + * The character set of an aiString is explicitly defined to be UTF-8. This Unicode + * transformation was chosen in the belief that most strings in 3d files are limited + * to the ASCII characters, thus the character set needed to be ASCII compatible. + * + * Most text file loaders provide proper Unicode input file handling, special unicode + * characters are correctly transcoded to UTF8 and are kept throughout the libraries' + * import pipeline. + * + * For most applications, it will be absolutely sufficient to interpret the + * aiString as ASCII data and work with it as one would work with a plain char*. + * Windows users in need of proper support for i.e asian characters can use the + * #MultiByteToWideChar(), #WideCharToMultiByte() WinAPI functionality to convert the + * UTF-8 strings to their working character set (i.e. MBCS, WideChar). + * + * We use this representation instead of std::string to be C-compatible. The + * (binary) length of such a string is limited to MAXLEN characters (excluding the 0). */ struct aiString { @@ -271,7 +285,7 @@ struct aiString #ifdef _DEBUG // Debug build: overwrite the string on its full length with ESC (27) - ::memset(data+1,27,MAXLEN-1); + memset(data+1,27,MAXLEN-1); #endif } @@ -279,7 +293,7 @@ struct aiString aiString(const aiString& rOther) : length(rOther.length) { - ::memcpy( data, rOther.data, rOther.length); + memcpy( data, rOther.data, rOther.length); data[length] = '\0'; } @@ -344,7 +358,7 @@ struct aiString return; } - ::memcpy(&data[length],app,len+1); + memcpy(&data[length],app,len+1); length += len; } @@ -355,13 +369,15 @@ struct aiString #ifdef _DEBUG // Debug build: overwrite the string on its full length with ESC (27) - ::memset(data+1,27,MAXLEN-1); + memset(data+1,27,MAXLEN-1); #endif } #endif // !__cplusplus - /** Length of the string excluding the terminal 0 */ + /** Binary length of the string excluding the terminal 0. This is NOT the + * logical length of strings containing UTF-8 multibyte sequences! It's + * the number of bytes from the beginning of the string to its end.*/ size_t length; /** String buffer. Size limit is MAXLEN */ diff --git a/mkutil/revision.h b/mkutil/revision.h index 6dcb547f5..2be4c4f5c 100644 --- a/mkutil/revision.h +++ b/mkutil/revision.h @@ -1 +1 @@ -#define SVNRevision 433 +#define SVNRevision 467 diff --git a/test/models/AC/SphereWithLight_UTF16LE.ac b/test/models/AC/SphereWithLight_UTF16LE.ac new file mode 100644 index 000000000..14d158aa6 Binary files /dev/null and b/test/models/AC/SphereWithLight_UTF16LE.ac differ diff --git a/test/models/AC/SphereWithLight_UTF8BOM.ac b/test/models/AC/SphereWithLight_UTF8BOM.ac new file mode 100644 index 000000000..656a6ca56 --- /dev/null +++ b/test/models/AC/SphereWithLight_UTF8BOM.ac @@ -0,0 +1,1134 @@ +AC3Db +MATERIAL "ac3dmat1" rgb 1 1 1 amb 0.2 0.2 0.2 emis 0 0 0 spec 0.2 0.2 0.2 shi 128 trans 0 +OBJECT world +kids 2 +OBJECT light +name "中国菜中国菜2" +loc 0.000424567 -0.0127304 0 +kids 0 +OBJECT poly +name "中国菜" +loc -0.0624103 -0.012381 0.0558408 +texture "./../LWO/LWO2/MappingModes/earthSpherical.jpg" +crease 45.000000 +numvert 134 +-0.00202139 0.0563461 0 +0.0108348 0.0544951 -0.00722633 +0.00540113 0.0544951 -0.0125164 +-0.00202139 0.0544951 -0.0144527 +-0.0094439 0.0544951 -0.0125164 +-0.0148776 0.0544951 -0.00722633 +-0.0168664 0.0544951 0 +-0.0148776 0.0544951 0.00722633 +-0.00944391 0.0544951 0.0125164 +-0.00202139 0.0544951 0.0144527 +0.00540113 0.0544951 0.0125164 +0.0108348 0.0544951 0.00722633 +0.0128236 0.0544951 0 +0.0228148 0.049068 -0.0139602 +0.0123178 0.049068 -0.0241798 +-0.00202138 0.049068 -0.0279204 +-0.0163606 0.049068 -0.0241798 +-0.0268576 0.049068 -0.0139602 +-0.0306998 0.049068 -3.72529e-009 +-0.0268576 0.049068 0.0139602 +-0.0163606 0.049068 0.0241798 +-0.00202139 0.049068 0.0279204 +0.0123178 0.049068 0.0241798 +0.0228148 0.049068 0.0139602 +0.026657 0.049068 0 +0.0331024 0.0404348 -0.0197427 +0.0182573 0.0404348 -0.0341954 +-0.00202138 0.0404348 -0.0394854 +-0.0223001 0.0404348 -0.0341954 +-0.0371451 0.0404348 -0.0197427 +-0.0425788 0.0404348 -3.72529e-009 +-0.0371451 0.0404348 0.0197427 +-0.0223001 0.0404348 0.0341954 +-0.00202139 0.0404348 0.0394854 +0.0182573 0.0404348 0.0341954 +0.0331024 0.0404348 0.0197427 +0.038536 0.0404348 0 +0.0409962 0.0291838 -0.0241798 +0.0228149 0.0291838 -0.0418806 +-0.00202138 0.0291838 -0.0483595 +-0.0268576 0.0291838 -0.0418806 +-0.045039 0.0291838 -0.0241798 +-0.0516939 0.0291838 -3.72529e-009 +-0.045039 0.0291838 0.0241798 +-0.0268576 0.0291838 0.0418806 +-0.00202139 0.0291838 0.0483595 +0.0228148 0.0291838 0.0418806 +0.0409962 0.0291838 0.0241798 +0.0476511 0.0291838 0 +0.0459585 0.0160817 -0.026969 +0.0256798 0.0160817 -0.0467117 +-0.00202138 0.0160817 -0.0539381 +-0.0297226 0.0160817 -0.0467117 +-0.0500013 0.0160817 -0.026969 +-0.0574238 0.0160817 -3.72529e-009 +-0.0500013 0.0160817 0.026969 +-0.0297226 0.0160817 0.0467117 +-0.00202139 0.0160817 0.0539381 +0.0256798 0.0160817 0.0467117 +0.0459585 0.0160817 0.026969 +0.053381 0.0160817 0 +0.0476511 0.00202139 -0.0279204 +0.026657 0.00202139 -0.0483595 +-0.00202138 0.00202139 -0.0558408 +-0.0306998 0.00202139 -0.0483595 +-0.0516938 0.00202139 -0.0279204 +-0.0593782 0.00202139 -3.72529e-009 +-0.0516939 0.00202139 0.0279204 +-0.0306998 0.00202139 0.0483595 +-0.00202139 0.00202139 0.0558408 +0.026657 0.00202139 0.0483595 +0.0476511 0.00202139 0.0279204 +0.0553354 0.00202139 0 +0.0459585 -0.0120389 -0.026969 +0.0256798 -0.0120389 -0.0467117 +-0.00202138 -0.0120389 -0.0539381 +-0.0297226 -0.0120389 -0.0467117 +-0.0500013 -0.0120389 -0.026969 +-0.0574238 -0.0120389 -3.72529e-009 +-0.0500013 -0.0120389 0.026969 +-0.0297226 -0.0120389 0.0467117 +-0.00202139 -0.0120389 0.0539381 +0.0256798 -0.0120389 0.0467117 +0.0459585 -0.0120389 0.026969 +0.053381 -0.0120389 0 +0.0409962 -0.025141 -0.0241798 +0.0228149 -0.025141 -0.0418806 +-0.00202138 -0.025141 -0.0483595 +-0.0268576 -0.025141 -0.0418806 +-0.045039 -0.025141 -0.0241798 +-0.0516939 -0.025141 -3.72529e-009 +-0.045039 -0.025141 0.0241798 +-0.0268576 -0.025141 0.0418806 +-0.00202139 -0.025141 0.0483595 +0.0228148 -0.025141 0.0418806 +0.0409962 -0.025141 0.0241798 +0.0476511 -0.025141 0 +0.0331024 -0.036392 -0.0197427 +0.0182573 -0.036392 -0.0341954 +-0.00202138 -0.036392 -0.0394854 +-0.0223001 -0.036392 -0.0341954 +-0.0371451 -0.036392 -0.0197427 +-0.0425788 -0.036392 -3.72529e-009 +-0.0371451 -0.036392 0.0197427 +-0.0223001 -0.036392 0.0341954 +-0.00202139 -0.036392 0.0394854 +0.0182573 -0.036392 0.0341954 +0.0331024 -0.036392 0.0197427 +0.038536 -0.036392 0 +0.0228148 -0.0450252 -0.0139602 +0.0123178 -0.0450252 -0.0241798 +-0.00202138 -0.0450252 -0.0279204 +-0.0163606 -0.0450252 -0.0241798 +-0.0268576 -0.0450252 -0.0139602 +-0.0306998 -0.0450252 -3.72529e-009 +-0.0268576 -0.0450252 0.0139602 +-0.0163606 -0.0450252 0.0241798 +-0.00202139 -0.0450252 0.0279204 +0.0123178 -0.0450252 0.0241798 +0.0228148 -0.0450252 0.0139602 +0.026657 -0.0450252 0 +0.0108348 -0.0504523 -0.00722633 +0.00540113 -0.0504523 -0.0125164 +-0.00202139 -0.0504523 -0.0144527 +-0.0094439 -0.0504523 -0.0125164 +-0.0148776 -0.0504523 -0.00722633 +-0.0168664 -0.0504523 0 +-0.0148776 -0.0504523 0.00722633 +-0.00944391 -0.0504523 0.0125164 +-0.00202139 -0.0504523 0.0144527 +0.00540113 -0.0504523 0.0125164 +0.0108348 -0.0504523 0.00722633 +0.0128236 -0.0504523 0 +-0.00202139 -0.0523034 0 +numsurf 144 +SURF 0x10 +mat 0 +refs 4 +119 0.916667 0.166667 +131 0.916667 0.0833333 +132 1 0.0833333 +120 1 0.166667 +SURF 0x10 +mat 0 +refs 4 +118 0.833333 0.166667 +130 0.833333 0.0833333 +131 0.916667 0.0833333 +119 0.916667 0.166667 +SURF 0x10 +mat 0 +refs 4 +117 0.75 0.166667 +129 0.75 0.0833333 +130 0.833333 0.0833333 +118 0.833333 0.166667 +SURF 0x10 +mat 0 +refs 4 +116 0.666667 0.166667 +128 0.666667 0.0833333 +129 0.75 0.0833333 +117 0.75 0.166667 +SURF 0x10 +mat 0 +refs 4 +115 0.583333 0.166667 +127 0.583333 0.0833333 +128 0.666667 0.0833333 +116 0.666667 0.166667 +SURF 0x10 +mat 0 +refs 4 +114 0.5 0.166667 +126 0.5 0.0833333 +127 0.583333 0.0833333 +115 0.583333 0.166667 +SURF 0x10 +mat 0 +refs 4 +113 0.416667 0.166667 +125 0.416667 0.0833333 +126 0.5 0.0833333 +114 0.5 0.166667 +SURF 0x10 +mat 0 +refs 4 +112 0.333333 0.166667 +124 0.333333 0.0833333 +125 0.416667 0.0833333 +113 0.416667 0.166667 +SURF 0x10 +mat 0 +refs 4 +111 0.25 0.166667 +123 0.25 0.0833333 +124 0.333333 0.0833333 +112 0.333333 0.166667 +SURF 0x10 +mat 0 +refs 4 +110 0.166667 0.166667 +122 0.166667 0.0833333 +123 0.25 0.0833333 +111 0.25 0.166667 +SURF 0x10 +mat 0 +refs 4 +109 0.0833333 0.166667 +121 0.0833333 0.0833333 +122 0.166667 0.0833333 +110 0.166667 0.166667 +SURF 0x10 +mat 0 +refs 4 +120 -2.98023e-008 0.166667 +132 -2.98023e-008 0.0833333 +121 0.0833333 0.0833333 +109 0.0833333 0.166667 +SURF 0x10 +mat 0 +refs 4 +107 0.916667 0.25 +119 0.916667 0.166667 +120 1 0.166667 +108 1 0.25 +SURF 0x10 +mat 0 +refs 4 +106 0.833333 0.25 +118 0.833333 0.166667 +119 0.916667 0.166667 +107 0.916667 0.25 +SURF 0x10 +mat 0 +refs 4 +105 0.75 0.25 +117 0.75 0.166667 +118 0.833333 0.166667 +106 0.833333 0.25 +SURF 0x10 +mat 0 +refs 4 +104 0.666667 0.25 +116 0.666667 0.166667 +117 0.75 0.166667 +105 0.75 0.25 +SURF 0x10 +mat 0 +refs 4 +103 0.583333 0.25 +115 0.583333 0.166667 +116 0.666667 0.166667 +104 0.666667 0.25 +SURF 0x10 +mat 0 +refs 4 +102 0.5 0.25 +114 0.5 0.166667 +115 0.583333 0.166667 +103 0.583333 0.25 +SURF 0x10 +mat 0 +refs 4 +101 0.416667 0.25 +113 0.416667 0.166667 +114 0.5 0.166667 +102 0.5 0.25 +SURF 0x10 +mat 0 +refs 4 +100 0.333333 0.25 +112 0.333333 0.166667 +113 0.416667 0.166667 +101 0.416667 0.25 +SURF 0x10 +mat 0 +refs 4 +99 0.25 0.25 +111 0.25 0.166667 +112 0.333333 0.166667 +100 0.333333 0.25 +SURF 0x10 +mat 0 +refs 4 +98 0.166667 0.25 +110 0.166667 0.166667 +111 0.25 0.166667 +99 0.25 0.25 +SURF 0x10 +mat 0 +refs 4 +97 0.0833333 0.25 +109 0.0833333 0.166667 +110 0.166667 0.166667 +98 0.166667 0.25 +SURF 0x10 +mat 0 +refs 4 +108 -2.98023e-008 0.25 +120 -2.98023e-008 0.166667 +109 0.0833333 0.166667 +97 0.0833333 0.25 +SURF 0x10 +mat 0 +refs 4 +95 0.916667 0.333333 +107 0.916667 0.25 +108 1 0.25 +96 1 0.333333 +SURF 0x10 +mat 0 +refs 4 +94 0.833333 0.333333 +106 0.833333 0.25 +107 0.916667 0.25 +95 0.916667 0.333333 +SURF 0x10 +mat 0 +refs 4 +93 0.75 0.333333 +105 0.75 0.25 +106 0.833333 0.25 +94 0.833333 0.333333 +SURF 0x10 +mat 0 +refs 4 +92 0.666667 0.333333 +104 0.666667 0.25 +105 0.75 0.25 +93 0.75 0.333333 +SURF 0x10 +mat 0 +refs 4 +91 0.583333 0.333333 +103 0.583333 0.25 +104 0.666667 0.25 +92 0.666667 0.333333 +SURF 0x10 +mat 0 +refs 4 +90 0.5 0.333333 +102 0.5 0.25 +103 0.583333 0.25 +91 0.583333 0.333333 +SURF 0x10 +mat 0 +refs 4 +89 0.416667 0.333333 +101 0.416667 0.25 +102 0.5 0.25 +90 0.5 0.333333 +SURF 0x10 +mat 0 +refs 4 +88 0.333333 0.333333 +100 0.333333 0.25 +101 0.416667 0.25 +89 0.416667 0.333333 +SURF 0x10 +mat 0 +refs 4 +87 0.25 0.333333 +99 0.25 0.25 +100 0.333333 0.25 +88 0.333333 0.333333 +SURF 0x10 +mat 0 +refs 4 +86 0.166667 0.333333 +98 0.166667 0.25 +99 0.25 0.25 +87 0.25 0.333333 +SURF 0x10 +mat 0 +refs 4 +85 0.0833333 0.333333 +97 0.0833333 0.25 +98 0.166667 0.25 +86 0.166667 0.333333 +SURF 0x10 +mat 0 +refs 4 +96 -2.98023e-008 0.333333 +108 -2.98023e-008 0.25 +97 0.0833333 0.25 +85 0.0833333 0.333333 +SURF 0x10 +mat 0 +refs 4 +83 0.916667 0.416667 +95 0.916667 0.333333 +96 1 0.333333 +84 1 0.416667 +SURF 0x10 +mat 0 +refs 4 +82 0.833333 0.416667 +94 0.833333 0.333333 +95 0.916667 0.333333 +83 0.916667 0.416667 +SURF 0x10 +mat 0 +refs 4 +81 0.75 0.416667 +93 0.75 0.333333 +94 0.833333 0.333333 +82 0.833333 0.416667 +SURF 0x10 +mat 0 +refs 4 +80 0.666667 0.416667 +92 0.666667 0.333333 +93 0.75 0.333333 +81 0.75 0.416667 +SURF 0x10 +mat 0 +refs 4 +79 0.583333 0.416667 +91 0.583333 0.333333 +92 0.666667 0.333333 +80 0.666667 0.416667 +SURF 0x10 +mat 0 +refs 4 +78 0.5 0.416667 +90 0.5 0.333333 +91 0.583333 0.333333 +79 0.583333 0.416667 +SURF 0x10 +mat 0 +refs 4 +77 0.416667 0.416667 +89 0.416667 0.333333 +90 0.5 0.333333 +78 0.5 0.416667 +SURF 0x10 +mat 0 +refs 4 +76 0.333333 0.416667 +88 0.333333 0.333333 +89 0.416667 0.333333 +77 0.416667 0.416667 +SURF 0x10 +mat 0 +refs 4 +75 0.25 0.416667 +87 0.25 0.333333 +88 0.333333 0.333333 +76 0.333333 0.416667 +SURF 0x10 +mat 0 +refs 4 +74 0.166667 0.416667 +86 0.166667 0.333333 +87 0.25 0.333333 +75 0.25 0.416667 +SURF 0x10 +mat 0 +refs 4 +73 0.0833333 0.416667 +85 0.0833333 0.333333 +86 0.166667 0.333333 +74 0.166667 0.416667 +SURF 0x10 +mat 0 +refs 4 +84 -2.98023e-008 0.416667 +96 -2.98023e-008 0.333333 +85 0.0833333 0.333333 +73 0.0833333 0.416667 +SURF 0x10 +mat 0 +refs 4 +71 0.916667 0.5 +83 0.916667 0.416667 +84 1 0.416667 +72 1 0.5 +SURF 0x10 +mat 0 +refs 4 +70 0.833333 0.5 +82 0.833333 0.416667 +83 0.916667 0.416667 +71 0.916667 0.5 +SURF 0x10 +mat 0 +refs 4 +69 0.75 0.5 +81 0.75 0.416667 +82 0.833333 0.416667 +70 0.833333 0.5 +SURF 0x10 +mat 0 +refs 4 +68 0.666667 0.5 +80 0.666667 0.416667 +81 0.75 0.416667 +69 0.75 0.5 +SURF 0x10 +mat 0 +refs 4 +67 0.583333 0.5 +79 0.583333 0.416667 +80 0.666667 0.416667 +68 0.666667 0.5 +SURF 0x10 +mat 0 +refs 4 +66 0.5 0.5 +78 0.5 0.416667 +79 0.583333 0.416667 +67 0.583333 0.5 +SURF 0x10 +mat 0 +refs 4 +65 0.416667 0.5 +77 0.416667 0.416667 +78 0.5 0.416667 +66 0.5 0.5 +SURF 0x10 +mat 0 +refs 4 +64 0.333333 0.5 +76 0.333333 0.416667 +77 0.416667 0.416667 +65 0.416667 0.5 +SURF 0x10 +mat 0 +refs 4 +63 0.25 0.5 +75 0.25 0.416667 +76 0.333333 0.416667 +64 0.333333 0.5 +SURF 0x10 +mat 0 +refs 4 +62 0.166667 0.5 +74 0.166667 0.416667 +75 0.25 0.416667 +63 0.25 0.5 +SURF 0x10 +mat 0 +refs 4 +61 0.0833333 0.5 +73 0.0833333 0.416667 +74 0.166667 0.416667 +62 0.166667 0.5 +SURF 0x10 +mat 0 +refs 4 +72 -2.98023e-008 0.5 +84 -2.98023e-008 0.416667 +73 0.0833333 0.416667 +61 0.0833333 0.5 +SURF 0x10 +mat 0 +refs 4 +59 0.916667 0.583333 +71 0.916667 0.5 +72 1 0.5 +60 1 0.583333 +SURF 0x10 +mat 0 +refs 4 +58 0.833333 0.583333 +70 0.833333 0.5 +71 0.916667 0.5 +59 0.916667 0.583333 +SURF 0x10 +mat 0 +refs 4 +57 0.75 0.583333 +69 0.75 0.5 +70 0.833333 0.5 +58 0.833333 0.583333 +SURF 0x10 +mat 0 +refs 4 +56 0.666667 0.583333 +68 0.666667 0.5 +69 0.75 0.5 +57 0.75 0.583333 +SURF 0x10 +mat 0 +refs 4 +55 0.583333 0.583333 +67 0.583333 0.5 +68 0.666667 0.5 +56 0.666667 0.583333 +SURF 0x10 +mat 0 +refs 4 +54 0.5 0.583333 +66 0.5 0.5 +67 0.583333 0.5 +55 0.583333 0.583333 +SURF 0x10 +mat 0 +refs 4 +53 0.416667 0.583333 +65 0.416667 0.5 +66 0.5 0.5 +54 0.5 0.583333 +SURF 0x10 +mat 0 +refs 4 +52 0.333333 0.583333 +64 0.333333 0.5 +65 0.416667 0.5 +53 0.416667 0.583333 +SURF 0x10 +mat 0 +refs 4 +51 0.25 0.583333 +63 0.25 0.5 +64 0.333333 0.5 +52 0.333333 0.583333 +SURF 0x10 +mat 0 +refs 4 +50 0.166667 0.583333 +62 0.166667 0.5 +63 0.25 0.5 +51 0.25 0.583333 +SURF 0x10 +mat 0 +refs 4 +49 0.0833333 0.583333 +61 0.0833333 0.5 +62 0.166667 0.5 +50 0.166667 0.583333 +SURF 0x10 +mat 0 +refs 4 +60 -2.98023e-008 0.583333 +72 -2.98023e-008 0.5 +61 0.0833333 0.5 +49 0.0833333 0.583333 +SURF 0x10 +mat 0 +refs 4 +47 0.916667 0.666667 +59 0.916667 0.583333 +60 1 0.583333 +48 1 0.666667 +SURF 0x10 +mat 0 +refs 4 +46 0.833333 0.666667 +58 0.833333 0.583333 +59 0.916667 0.583333 +47 0.916667 0.666667 +SURF 0x10 +mat 0 +refs 4 +45 0.75 0.666667 +57 0.75 0.583333 +58 0.833333 0.583333 +46 0.833333 0.666667 +SURF 0x10 +mat 0 +refs 4 +44 0.666667 0.666667 +56 0.666667 0.583333 +57 0.75 0.583333 +45 0.75 0.666667 +SURF 0x10 +mat 0 +refs 4 +43 0.583333 0.666667 +55 0.583333 0.583333 +56 0.666667 0.583333 +44 0.666667 0.666667 +SURF 0x10 +mat 0 +refs 4 +42 0.5 0.666667 +54 0.5 0.583333 +55 0.583333 0.583333 +43 0.583333 0.666667 +SURF 0x10 +mat 0 +refs 4 +41 0.416667 0.666667 +53 0.416667 0.583333 +54 0.5 0.583333 +42 0.5 0.666667 +SURF 0x10 +mat 0 +refs 4 +40 0.333333 0.666667 +52 0.333333 0.583333 +53 0.416667 0.583333 +41 0.416667 0.666667 +SURF 0x10 +mat 0 +refs 4 +39 0.25 0.666667 +51 0.25 0.583333 +52 0.333333 0.583333 +40 0.333333 0.666667 +SURF 0x10 +mat 0 +refs 4 +38 0.166667 0.666667 +50 0.166667 0.583333 +51 0.25 0.583333 +39 0.25 0.666667 +SURF 0x10 +mat 0 +refs 4 +37 0.0833333 0.666667 +49 0.0833333 0.583333 +50 0.166667 0.583333 +38 0.166667 0.666667 +SURF 0x10 +mat 0 +refs 4 +48 -2.98023e-008 0.666667 +60 -2.98023e-008 0.583333 +49 0.0833333 0.583333 +37 0.0833333 0.666667 +SURF 0x10 +mat 0 +refs 4 +35 0.916667 0.75 +47 0.916667 0.666667 +48 1 0.666667 +36 1 0.75 +SURF 0x10 +mat 0 +refs 4 +34 0.833333 0.75 +46 0.833333 0.666667 +47 0.916667 0.666667 +35 0.916667 0.75 +SURF 0x10 +mat 0 +refs 4 +33 0.75 0.75 +45 0.75 0.666667 +46 0.833333 0.666667 +34 0.833333 0.75 +SURF 0x10 +mat 0 +refs 4 +32 0.666667 0.75 +44 0.666667 0.666667 +45 0.75 0.666667 +33 0.75 0.75 +SURF 0x10 +mat 0 +refs 4 +31 0.583333 0.75 +43 0.583333 0.666667 +44 0.666667 0.666667 +32 0.666667 0.75 +SURF 0x10 +mat 0 +refs 4 +30 0.5 0.75 +42 0.5 0.666667 +43 0.583333 0.666667 +31 0.583333 0.75 +SURF 0x10 +mat 0 +refs 4 +29 0.416667 0.75 +41 0.416667 0.666667 +42 0.5 0.666667 +30 0.5 0.75 +SURF 0x10 +mat 0 +refs 4 +28 0.333333 0.75 +40 0.333333 0.666667 +41 0.416667 0.666667 +29 0.416667 0.75 +SURF 0x10 +mat 0 +refs 4 +27 0.25 0.75 +39 0.25 0.666667 +40 0.333333 0.666667 +28 0.333333 0.75 +SURF 0x10 +mat 0 +refs 4 +26 0.166667 0.75 +38 0.166667 0.666667 +39 0.25 0.666667 +27 0.25 0.75 +SURF 0x10 +mat 0 +refs 4 +25 0.0833333 0.75 +37 0.0833333 0.666667 +38 0.166667 0.666667 +26 0.166667 0.75 +SURF 0x10 +mat 0 +refs 4 +36 -2.98023e-008 0.75 +48 -2.98023e-008 0.666667 +37 0.0833333 0.666667 +25 0.0833333 0.75 +SURF 0x10 +mat 0 +refs 4 +23 0.916667 0.833333 +35 0.916667 0.75 +36 1 0.75 +24 1 0.833333 +SURF 0x10 +mat 0 +refs 4 +22 0.833333 0.833333 +34 0.833333 0.75 +35 0.916667 0.75 +23 0.916667 0.833333 +SURF 0x10 +mat 0 +refs 4 +21 0.75 0.833333 +33 0.75 0.75 +34 0.833333 0.75 +22 0.833333 0.833333 +SURF 0x10 +mat 0 +refs 4 +20 0.666667 0.833333 +32 0.666667 0.75 +33 0.75 0.75 +21 0.75 0.833333 +SURF 0x10 +mat 0 +refs 4 +19 0.583333 0.833333 +31 0.583333 0.75 +32 0.666667 0.75 +20 0.666667 0.833333 +SURF 0x10 +mat 0 +refs 4 +18 0.5 0.833333 +30 0.5 0.75 +31 0.583333 0.75 +19 0.583333 0.833333 +SURF 0x10 +mat 0 +refs 4 +17 0.416667 0.833333 +29 0.416667 0.75 +30 0.5 0.75 +18 0.5 0.833333 +SURF 0x10 +mat 0 +refs 4 +16 0.333333 0.833333 +28 0.333333 0.75 +29 0.416667 0.75 +17 0.416667 0.833333 +SURF 0x10 +mat 0 +refs 4 +15 0.25 0.833333 +27 0.25 0.75 +28 0.333333 0.75 +16 0.333333 0.833333 +SURF 0x10 +mat 0 +refs 4 +14 0.166667 0.833333 +26 0.166667 0.75 +27 0.25 0.75 +15 0.25 0.833333 +SURF 0x10 +mat 0 +refs 4 +13 0.0833333 0.833333 +25 0.0833333 0.75 +26 0.166667 0.75 +14 0.166667 0.833333 +SURF 0x10 +mat 0 +refs 4 +24 -2.98023e-008 0.833333 +36 -2.98023e-008 0.75 +25 0.0833333 0.75 +13 0.0833333 0.833333 +SURF 0x10 +mat 0 +refs 4 +11 0.916667 0.916667 +23 0.916667 0.833333 +24 1 0.833333 +12 1 0.916667 +SURF 0x10 +mat 0 +refs 4 +10 0.833333 0.916667 +22 0.833333 0.833333 +23 0.916667 0.833333 +11 0.916667 0.916667 +SURF 0x10 +mat 0 +refs 4 +9 0.75 0.916667 +21 0.75 0.833333 +22 0.833333 0.833333 +10 0.833333 0.916667 +SURF 0x10 +mat 0 +refs 4 +8 0.666667 0.916667 +20 0.666667 0.833333 +21 0.75 0.833333 +9 0.75 0.916667 +SURF 0x10 +mat 0 +refs 4 +7 0.583333 0.916667 +19 0.583333 0.833333 +20 0.666667 0.833333 +8 0.666667 0.916667 +SURF 0x10 +mat 0 +refs 4 +6 0.5 0.916667 +18 0.5 0.833333 +19 0.583333 0.833333 +7 0.583333 0.916667 +SURF 0x10 +mat 0 +refs 4 +5 0.416667 0.916667 +17 0.416667 0.833333 +18 0.5 0.833333 +6 0.5 0.916667 +SURF 0x10 +mat 0 +refs 4 +4 0.333333 0.916667 +16 0.333333 0.833333 +17 0.416667 0.833333 +5 0.416667 0.916667 +SURF 0x10 +mat 0 +refs 4 +3 0.25 0.916667 +15 0.25 0.833333 +16 0.333333 0.833333 +4 0.333333 0.916667 +SURF 0x10 +mat 0 +refs 4 +2 0.166667 0.916667 +14 0.166667 0.833333 +15 0.25 0.833333 +3 0.25 0.916667 +SURF 0x10 +mat 0 +refs 4 +1 0.0833333 0.916667 +13 0.0833333 0.833333 +14 0.166667 0.833333 +2 0.166667 0.916667 +SURF 0x10 +mat 0 +refs 4 +12 -2.98023e-008 0.916667 +24 -2.98023e-008 0.833333 +13 0.0833333 0.833333 +1 0.0833333 0.916667 +SURF 0x10 +mat 0 +refs 3 +1 0.0833333 0.916667 +0 0.0416666 1 +12 -2.98023e-008 0.916667 +SURF 0x10 +mat 0 +refs 3 +2 0.166667 0.916667 +0 0.125 1 +1 0.0833333 0.916667 +SURF 0x10 +mat 0 +refs 3 +3 0.25 0.916667 +0 0.208333 1 +2 0.166667 0.916667 +SURF 0x10 +mat 0 +refs 3 +4 0.333333 0.916667 +0 0.291667 1 +3 0.25 0.916667 +SURF 0x10 +mat 0 +refs 3 +5 0.416667 0.916667 +0 0.375 1 +4 0.333333 0.916667 +SURF 0x10 +mat 0 +refs 3 +6 0.5 0.916667 +0 0.458333 1 +5 0.416667 0.916667 +SURF 0x10 +mat 0 +refs 3 +7 0.583333 0.916667 +0 0.541667 1 +6 0.5 0.916667 +SURF 0x10 +mat 0 +refs 3 +8 0.666667 0.916667 +0 0.625 1 +7 0.583333 0.916667 +SURF 0x10 +mat 0 +refs 3 +9 0.75 0.916667 +0 0.708333 1 +8 0.666667 0.916667 +SURF 0x10 +mat 0 +refs 3 +10 0.833333 0.916667 +0 0.791667 1 +9 0.75 0.916667 +SURF 0x10 +mat 0 +refs 3 +11 0.916667 0.916667 +0 0.875 1 +10 0.833333 0.916667 +SURF 0x10 +mat 0 +refs 3 +12 1 0.916667 +0 0.958333 1 +11 0.916667 0.916667 +SURF 0x10 +mat 0 +refs 3 +132 -2.98023e-008 0.0833333 +133 0.0416666 0 +121 0.0833333 0.0833333 +SURF 0x10 +mat 0 +refs 3 +121 0.0833333 0.0833333 +133 0.125 0 +122 0.166667 0.0833333 +SURF 0x10 +mat 0 +refs 3 +122 0.166667 0.0833333 +133 0.208333 0 +123 0.25 0.0833333 +SURF 0x10 +mat 0 +refs 3 +123 0.25 0.0833333 +133 0.291667 0 +124 0.333333 0.0833333 +SURF 0x10 +mat 0 +refs 3 +124 0.333333 0.0833333 +133 0.375 0 +125 0.416667 0.0833333 +SURF 0x10 +mat 0 +refs 3 +125 0.416667 0.0833333 +133 0.458333 0 +126 0.5 0.0833333 +SURF 0x10 +mat 0 +refs 3 +126 0.5 0.0833333 +133 0.541667 0 +127 0.583333 0.0833333 +SURF 0x10 +mat 0 +refs 3 +127 0.583333 0.0833333 +133 0.625 0 +128 0.666667 0.0833333 +SURF 0x10 +mat 0 +refs 3 +128 0.666667 0.0833333 +133 0.708333 0 +129 0.75 0.0833333 +SURF 0x10 +mat 0 +refs 3 +129 0.75 0.0833333 +133 0.791667 0 +130 0.833333 0.0833333 +SURF 0x10 +mat 0 +refs 3 +130 0.833333 0.0833333 +133 0.875 0 +131 0.916667 0.0833333 +SURF 0x10 +mat 0 +refs 3 +131 0.916667 0.0833333 +133 0.958333 0 +132 1 0.0833333 +kids 0 diff --git a/test/models/ASE/ThreeCubesGreen_UTF16BE.ASE b/test/models/ASE/ThreeCubesGreen_UTF16BE.ASE new file mode 100644 index 000000000..b610a97d3 Binary files /dev/null and b/test/models/ASE/ThreeCubesGreen_UTF16BE.ASE differ diff --git a/test/models/ASE/ThreeCubesGreen_UTF16LE.ASE b/test/models/ASE/ThreeCubesGreen_UTF16LE.ASE new file mode 100644 index 000000000..bc7e4aa76 Binary files /dev/null and b/test/models/ASE/ThreeCubesGreen_UTF16LE.ASE differ diff --git a/test/models/Collada/cube_UTF16LE.dae b/test/models/Collada/cube_UTF16LE.dae new file mode 100644 index 000000000..57c10fc1e Binary files /dev/null and b/test/models/Collada/cube_UTF16LE.dae differ diff --git a/test/models/Collada/cube_UTF8BOM.dae b/test/models/Collada/cube_UTF8BOM.dae new file mode 100644 index 000000000..d2d208902 --- /dev/null +++ b/test/models/Collada/cube_UTF8BOM.dae @@ -0,0 +1,210 @@ + + + + + alorino + Maya 7.0 | ColladaMaya v2.01 Jun 9 2006 at 16:08:19 | FCollada v1.11 + Collada Maya Export Options: bakeTransforms=0;exportPolygonMeshes=1;bakeLighting=0;isSampling=0; +curveConstrainSampling=0;exportCameraAsLookat=0; +exportLights=1;exportCameras=1;exportJointsAndSkin=1; +exportAnimations=1;exportTriangles=0;exportInvisibleNodes=0; +exportNormals=1;exportTexCoords=1;exportVertexColors=1;exportTangents=0; +exportTexTangents=0;exportConstraints=0;exportPhysics=0;exportXRefs=1; +dereferenceXRefs=0;cameraXFov=0;cameraYFov=1 + +Copyright 2006 Sony Computer Entertainment Inc. +Licensed under the SCEA Shared Source License, Version 1.0 (the +"License"); you may not use this file except in compliance with the +License. You may obtain a copy of the License at: +http://research.scea.com/scea_shared_source_license.html +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + + 2006-06-21T21:23:22Z + 2006-06-21T21:23:22Z + + Y_UP + + + + + + + 37.8493 + 1 + 10 + 1000 + + + + + + + + + 37.8501 + 1 + 0.01 + 1000 + + + + + + + + + + 1 1 1 + 1 + 0 + 0 + + + + 1.000000 + + + + + + 1 1 1 + 1 + 0 + 0 + + + + + + + + + + + + + + + + 0 0 0 1 + + + 0 0 0 1 + + + 0.137255 0.403922 0.870588 1 + + + 0.5 0.5 0.5 1 + + + 16 + + + 0 0 0 1 + + + 0.5 + + + 0 0 0 1 + + + 1 + + + 0 + + + + + + + + + + + -50 50 50 50 50 50 -50 -50 50 50 -50 50 -50 50 -50 50 50 -50 -50 -50 -50 50 -50 -50 + + + + + + + + + + 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 -1 0 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1 0 0 -1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 -1 0 0 -1 0 0 -1 0 0 -1 + + + + + + + + + + + + + + + 4 4 4 4 4 4 +

0 0 2 1 3 2 1 3 0 4 1 5 5 6 4 7 6 8 7 9 3 10 2 11 0 12 4 13 6 14 2 15 3 16 7 17 5 18 1 19 5 20 7 21 6 22 4 23

+
+
+
+
+ + + + -427.749 333.855 655.017 + 0 1 0 -33 + 1 0 0 -22.1954 + 0 0 1 0 + + + + -500 1000 400 + 0 0 1 0 + 0 1 0 0 + 1 0 0 0 + + + + 0 0 1 0 + 0 1 0 0 + 1 0 0 0 + + + + + + + + + + -427.749 333.855 655.017 + 0 1 0 -33 + 1 0 0 -22.1954 + 0 0 1 0 + + + + 3 4 10 + 0 0 1 0 + 0 1 0 0 + 1 0 0 0 + + + + + + + +
diff --git a/test/models/OBJ/box_UTF16BE.obj b/test/models/OBJ/box_UTF16BE.obj new file mode 100644 index 000000000..67d315cfe Binary files /dev/null and b/test/models/OBJ/box_UTF16BE.obj differ diff --git a/tools/assimp_view/Display.cpp b/tools/assimp_view/Display.cpp index 6f90d57d9..1756beb78 100644 --- a/tools/assimp_view/Display.cpp +++ b/tools/assimp_view/Display.cpp @@ -170,10 +170,15 @@ int CDisplay::AddNodeToDisplayList( } else strcpy(chTemp,pcNode->mName.data); - TVITEMEX tvi; - TVINSERTSTRUCT sNew; - tvi.pszText = chTemp; - tvi.cchTextMax = (int)strlen(chTemp); + TVITEMEXW tvi; + TVINSERTSTRUCTW sNew; + + wchar_t tmp[512]; + int t = MultiByteToWideChar(CP_UTF8,0,chTemp,-1,tmp,512); + + tvi.pszText = tmp; + tvi.cchTextMax = (int)t; + tvi.mask = TVIF_TEXT | TVIF_SELECTEDIMAGE | TVIF_IMAGE | TVIF_HANDLE | TVIF_PARAM; tvi.iImage = this->m_aiImageList[AI_VIEW_IMGLIST_NODE]; tvi.iSelectedImage = this->m_aiImageList[AI_VIEW_IMGLIST_NODE]; @@ -185,7 +190,7 @@ int CDisplay::AddNodeToDisplayList( // add the item to the list HTREEITEM hTexture = (HTREEITEM)SendMessage(GetDlgItem(g_hDlg,IDC_TREE1), - TVM_INSERTITEM, + TVM_INSERTITEMW, 0, (LPARAM)(LPTVINSERTSTRUCT)&sNew); @@ -511,10 +516,14 @@ int CDisplay::AddMaterialToDisplayList(HTREEITEM hRoot, { sprintf(chTemp,"%s (%i)",szOut.data,iIndex+1); } - TVITEMEX tvi; - TVINSERTSTRUCT sNew; - tvi.pszText = chTemp; - tvi.cchTextMax = (int)strlen(chTemp); + TVITEMEXW tvi; + TVINSERTSTRUCTW sNew; + + wchar_t tmp[512]; + int t = MultiByteToWideChar(CP_UTF8,0,chTemp,-1,tmp,512); + + tvi.pszText = tmp; + tvi.cchTextMax = (int)t; tvi.mask = TVIF_TEXT | TVIF_SELECTEDIMAGE | TVIF_IMAGE | TVIF_HANDLE | TVIF_PARAM ; tvi.iImage = m_aiImageList[AI_VIEW_IMGLIST_MATERIAL]; tvi.iSelectedImage = m_aiImageList[AI_VIEW_IMGLIST_MATERIAL]; @@ -527,7 +536,7 @@ int CDisplay::AddMaterialToDisplayList(HTREEITEM hRoot, // add the item to the list HTREEITEM hTexture = (HTREEITEM)SendMessage(GetDlgItem(g_hDlg,IDC_TREE1), - TVM_INSERTITEM, + TVM_INSERTITEMW, 0, (LPARAM)(LPTVINSERTSTRUCT)&sNew); diff --git a/workspaces/vc8/assimp.vcproj b/workspaces/vc8/assimp.vcproj index 69db8f4f3..871dc84c1 100644 --- a/workspaces/vc8/assimp.vcproj +++ b/workspaces/vc8/assimp.vcproj @@ -3355,6 +3355,82 @@ > + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +