Merge branch 'master' into master

2019-05-22 00:14:18 +02:00 · 2019-05-22 00:14:18 +02:00 · 1d05f1b222
parent b10dc5747b 9787ac724f
commit 1d05f1b222
9 changed files with 828 additions and 56 deletions
--- a/code/FBXBinaryTokenizer.cpp
+++ b/code/FBXBinaryTokenizer.cpp
@ -98,7 +98,7 @@ namespace FBX {
 //	return (flags & to_check) != 0;
 //}
 // ------------------------------------------------------------------------------------------------
-Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
+Token::Token(const char* sbegin, const char* send, TokenType type, size_t offset)
    :
    #ifdef DEBUG
    contents(sbegin, static_cast<size_t>(send-sbegin)),
@ -122,18 +122,18 @@ namespace {

 // ------------------------------------------------------------------------------------------------
 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
-AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset) AI_WONT_RETURN_SUFFIX;
-AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset)
+AI_WONT_RETURN void TokenizeError(const std::string& message, size_t offset) AI_WONT_RETURN_SUFFIX;
+AI_WONT_RETURN void TokenizeError(const std::string& message, size_t offset)
 {
    throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
 }


 // ------------------------------------------------------------------------------------------------
-uint32_t Offset(const char* begin, const char* cursor) {
+size_t Offset(const char* begin, const char* cursor) {
    ai_assert(begin <= cursor);

-    return static_cast<unsigned int>(cursor - begin);
+    return cursor - begin;
 }

 // ------------------------------------------------------------------------------------------------
@ -424,7 +424,7 @@ bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor,

 // ------------------------------------------------------------------------------------------------
 // TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent
-void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
+void TokenizeBinary(TokenList& output_tokens, const char* input, size_t length)
 {
    ai_assert(input);

--- a/code/FBXDocument.h
+++ b/code/FBXDocument.h
@ -643,7 +643,7 @@ private:
    std::string fileName;
    std::shared_ptr<const PropertyTable> props;

-    uint32_t contentLength;
+    uint64_t contentLength;
    uint8_t* content;
 };

--- a/code/FBXImporter.cpp
+++ b/code/FBXImporter.cpp
@ -172,7 +172,7 @@ void FBXImporter::InternReadFile( const std::string& pFile, aiScene* pScene, IOS
        bool is_binary = false;
        if (!strncmp(begin,"Kaydara FBX Binary",18)) {
            is_binary = true;
-            TokenizeBinary(tokens,begin,static_cast<unsigned int>(contents.size()));
+            TokenizeBinary(tokens,begin,contents.size());
        }
        else {
            Tokenize(tokens,begin);
--- a/code/FBXMaterial.cpp
+++ b/code/FBXMaterial.cpp
@ -326,16 +326,40 @@ Video::Video(uint64_t id, const Element& element, const Document& doc, const std
                    DOMError("embedded content is not surrounded by quotation marks", &element);
                }
                else {
-                    const char* encodedData = data + 1;
-                    size_t encodedDataLen = static_cast<size_t>(token.end() - token.begin());
-                    // search for last quotation mark
-                    while (encodedDataLen > 1 && encodedData[encodedDataLen] != '"')
-                        encodedDataLen--;
-                    if (encodedDataLen % 4 != 0) {
-                        DOMError("embedded content is invalid, needs to be in base64", &element);
+                    size_t targetLength = 0;
+                    auto numTokens = Content->Tokens().size();
+                    // First time compute size (it could be large like 64Gb and it is good to allocate it once)
+                    for (uint32_t tokenIdx = 0; tokenIdx < numTokens; ++tokenIdx)
+                    {
+                        const Token& dataToken = GetRequiredToken(*Content, tokenIdx);
+                        size_t tokenLength = dataToken.end() - dataToken.begin() - 2; // ignore double quotes
+                        const char* base64data = dataToken.begin() + 1;
+                        const size_t outLength = Util::ComputeDecodedSizeBase64(base64data, tokenLength);
+                        if (outLength == 0)
+                        {
+                            DOMError("Corrupted embedded content found", &element);
                        }
-                    else {
-                        contentLength = Util::DecodeBase64(encodedData, encodedDataLen, content);
+                        targetLength += outLength;
+                    }
+                    if (targetLength == 0)
+                    {
+                        DOMError("Corrupted embedded content found", &element);
+                    }
+                    content = new uint8_t[targetLength];
+                    contentLength = static_cast<uint64_t>(targetLength);
+                    size_t dst_offset = 0;
+                    for (uint32_t tokenIdx = 0; tokenIdx < numTokens; ++tokenIdx)
+                    {
+                        const Token& dataToken = GetRequiredToken(*Content, tokenIdx);
+                        size_t tokenLength = dataToken.end() - dataToken.begin() - 2; // ignore double quotes
+                        const char* base64data = dataToken.begin() + 1;
+                        dst_offset += Util::DecodeBase64(base64data, tokenLength, content + dst_offset, targetLength - dst_offset);
+                    }
+                    if (targetLength != dst_offset)
+                    {
+                        delete[] content;
+                        contentLength = 0;
+                        DOMError("Corrupted embedded content found", &element);
                    }
                }
            }
--- a/code/FBXTokenizer.h
+++ b/code/FBXTokenizer.h
@ -93,7 +93,7 @@ public:
    Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column);

    /** construct a binary token */
-    Token(const char* sbegin, const char* send, TokenType type, unsigned int offset);
+    Token(const char* sbegin, const char* send, TokenType type, size_t offset);

    ~Token();

@ -118,14 +118,14 @@ public:
        return type;
    }

-    unsigned int Offset() const {
+    size_t Offset() const {
        ai_assert(IsBinary());
        return offset;
    }

    unsigned int Line() const {
        ai_assert(!IsBinary());
-        return line;
+        return static_cast<unsigned int>(line);
    }

    unsigned int Column() const {
@ -147,8 +147,8 @@ private:
    const TokenType type;

    union {
-        const unsigned int line;
-        unsigned int offset;
+        size_t line;
+        size_t offset;
    };
    const unsigned int column;
 };
@ -178,7 +178,7 @@ void Tokenize(TokenList& output_tokens, const char* input);
 * @param input_buffer Binary input buffer to be processed.
 * @param length Length of input buffer, in bytes. There is no 0-terminal.
 * @throw DeadlyImportError if something goes wrong */
-void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length);
+void TokenizeBinary(TokenList& output_tokens, const char* input, size_t length);


 } // ! FBX
--- a/code/FBXUtil.cpp
+++ b/code/FBXUtil.cpp
@ -86,7 +86,7 @@ const char* TokenTypeString(TokenType t)


 // ------------------------------------------------------------------------------------------------
-std::string AddOffset(const std::string& prefix, const std::string& text, unsigned int offset)
+std::string AddOffset(const std::string& prefix, const std::string& text, size_t offset)
 {
    return static_cast<std::string>( (Formatter::format() << prefix << " (offset 0x" << std::hex << offset << ") " << text) );
 }
@ -114,47 +114,66 @@ std::string AddTokenText(const std::string& prefix, const std::string& text, con
        text) );
 }

+// Generated by this formula: T["ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;
 static const uint8_t base64DecodeTable[128] = {
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 62,  0,  0,  0, 63,
-    52, 53, 54, 55, 56, 57, 58, 59, 60, 61,  0,  0,  0, 64,  0,  0,
-    0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
-    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,  0,  0,  0,  0,  0,
-    0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
-    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  0,  0,  0,  0,  0
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
+    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255,
+    255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
+    255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255
 };

 uint8_t DecodeBase64(char ch)
 {
-    return base64DecodeTable[size_t(ch)];
+    const auto idx = static_cast<uint8_t>(ch);
+    if (idx > 127)
+        return 255;
+    return base64DecodeTable[idx];
 }

-size_t DecodeBase64(const char* in, size_t inLength, uint8_t*& out)
+size_t ComputeDecodedSizeBase64(const char* in, size_t inLength)
 {
-    if (inLength < 4) {
-        out = 0;
+    if (inLength < 2)
+    {
        return 0;
    }
-
-    const size_t outLength = (inLength * 3) / 4;
-    out = new uint8_t[outLength];
-    memset(out, 0, outLength);
-
-    size_t i = 0;
-    size_t j = 0;
-    for (i = 0; i < inLength - 4; i += 4)
+    const size_t equals = size_t(in[inLength - 1] == '=') + size_t(in[inLength - 2] == '=');
+    const size_t full_length = (inLength * 3) >> 2; // div by 4
+    if (full_length < equals)
    {
-        uint8_t b0 = Util::DecodeBase64(in[i]);
-        uint8_t b1 = Util::DecodeBase64(in[i + 1]);
-        uint8_t b2 = Util::DecodeBase64(in[i + 2]);
-        uint8_t b3 = Util::DecodeBase64(in[i + 3]);
-
-        out[j++] = (uint8_t)((b0 << 2) | (b1 >> 4));
-        out[j++] = (uint8_t)((b1 << 4) | (b2 >> 2));
-        out[j++] = (uint8_t)((b2 << 6) | b3);
+        return 0;
    }
-    return outLength;
+    return full_length - equals;
+}
+
+size_t DecodeBase64(const char* in, size_t inLength, uint8_t* out, size_t maxOutLength)
+{
+    if (maxOutLength == 0 || inLength < 2) {
+        return 0;
+    }
+    const size_t realLength = inLength - size_t(in[inLength - 1] == '=') - size_t(in[inLength - 2] == '=');
+    size_t dst_offset = 0;
+    int val = 0, valb = -8;
+    for (size_t src_offset = 0; src_offset < realLength; ++src_offset)
+    {
+        const uint8_t table_value = Util::DecodeBase64(in[src_offset]);
+        if (table_value == 255)
+        {
+            return 0;
+        }
+        val = (val << 6) + table_value;
+        valb += 6;
+        if (valb >= 0)
+        {
+            out[dst_offset++] = static_cast<uint8_t>((val >> valb) & 0xFF);
+            valb -= 8;
+            val &= 0xFFF;
+        }
+    }
+    return dst_offset;
 }

 static const char to_base64_string[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
--- a/code/FBXUtil.h
+++ b/code/FBXUtil.h
@ -78,7 +78,7 @@ const char* TokenTypeString(TokenType t);
 *  @param line Line index, 1-based
 *  @param column Column index, 1-based
 *  @return A string of the following format: {prefix} (offset 0x{offset}) {text}*/
-std::string AddOffset(const std::string& prefix, const std::string& text, unsigned int offset);
+std::string AddOffset(const std::string& prefix, const std::string& text, size_t offset);


 /** Format log/error messages using a given line location in the source file.
@ -105,13 +105,21 @@ std::string AddTokenText(const std::string& prefix, const std::string& text, con
 *  @return decoded byte value*/
 uint8_t DecodeBase64(char ch);

+/** Compute decoded size of a Base64-encoded string
+*
+*  @param in Characters to decode.
+*  @param inLength Number of characters to decode.
+*  @return size of the decoded data (number of bytes)*/
+size_t ComputeDecodedSizeBase64(const char* in, size_t inLength);
+
 /** Decode a Base64-encoded string
 *
 *  @param in Characters to decode.
 *  @param inLength Number of characters to decode.
-*  @param out Reference to pointer where we will store the decoded data.
+*  @param out Pointer where we will store the decoded data.
+*  @param maxOutLength Size of output buffer.
 *  @return size of the decoded data (number of bytes)*/
-size_t DecodeBase64(const char* in, size_t inLength, uint8_t*& out);
+size_t DecodeBase64(const char* in, size_t inLength, uint8_t* out, size_t maxOutLength);

 char EncodeBase64(char byte);

--- a/test/models/FBX/embedded_ascii/box_embedded_texture_fragmented.fbx
+++ b/test/models/FBX/embedded_ascii/box_embedded_texture_fragmented.fbx
--- a/test/unit/utFBXImporterExporter.cpp
+++ b/test/unit/utFBXImporterExporter.cpp
@ -128,4 +128,28 @@ TEST_F(utFBXImporterExporter, importEmbeddedAsciiTest) {
    aiString path;
    aiTextureMapMode modes[2];
    EXPECT_EQ(aiReturn_SUCCESS, mat->GetTexture(aiTextureType_DIFFUSE, 0, &path, nullptr, nullptr, nullptr, nullptr, modes));
+
+    ASSERT_EQ(1, scene->mNumTextures);
+    ASSERT_TRUE(scene->mTextures[0]->pcData);
+    ASSERT_EQ(439176u, scene->mTextures[0]->mWidth) << "FBX ASCII base64 compression splits data by 512Kb, it should be two parts for this texture";
+}
+
+TEST_F(utFBXImporterExporter, importEmbeddedFragmentedAsciiTest) {
+    // see https://github.com/assimp/assimp/issues/1957
+    Assimp::Importer importer;
+    const aiScene *scene = importer.ReadFile(ASSIMP_TEST_MODELS_DIR "/FBX/embedded_ascii/box_embedded_texture_fragmented.fbx", aiProcess_ValidateDataStructure);
+    EXPECT_NE(nullptr, scene);
+
+    EXPECT_EQ(1, scene->mNumMaterials);
+    aiMaterial *mat = scene->mMaterials[0];
+    ASSERT_NE(nullptr, mat);
+
+    aiString path;
+    aiTextureMapMode modes[2];
+    ASSERT_EQ(aiReturn_SUCCESS, mat->GetTexture(aiTextureType_DIFFUSE, 0, &path, nullptr, nullptr, nullptr, nullptr, modes));
+    ASSERT_STREQ(path.C_Str(), "paper.png");
+
+    ASSERT_EQ(1, scene->mNumTextures);
+    ASSERT_TRUE(scene->mTextures[0]->pcData);
+    ASSERT_EQ(968029u, scene->mTextures[0]->mWidth) << "FBX ASCII base64 compression splits data by 512Kb, it should be two parts for this texture";
 }