Fragmented FBX ASCII emdedded resource

pull/2466/head
Mike Samsonov 2019-05-14 14:43:16 +01:00
parent 524d8c281e
commit c84ac7a135
6 changed files with 812 additions and 41 deletions

View File

@ -643,7 +643,7 @@ private:
std::string fileName;
std::shared_ptr<const PropertyTable> props;
uint32_t contentLength;
uint64_t contentLength;
uint8_t* content;
};

View File

@ -326,16 +326,40 @@ Video::Video(uint64_t id, const Element& element, const Document& doc, const std
DOMError("embedded content is not surrounded by quotation marks", &element);
}
else {
const char* encodedData = data + 1;
size_t encodedDataLen = static_cast<size_t>(token.end() - token.begin());
// search for last quotation mark
while (encodedDataLen > 1 && encodedData[encodedDataLen] != '"')
encodedDataLen--;
if (encodedDataLen % 4 != 0) {
DOMError("embedded content is invalid, needs to be in base64", &element);
size_t targetLength = 0;
auto numTokens = Content->Tokens().size();
// First time compute size (it could be large like 64Gb and it is good to allocate it once)
for (uint32_t tokenIdx = 0; tokenIdx < numTokens; ++tokenIdx)
{
const Token& dataToken = GetRequiredToken(*Content, tokenIdx);
size_t tokenLength = dataToken.end() - dataToken.begin() - 2; // ignore double quotes
const char* base64data = dataToken.begin() + 1;
const size_t outLength = Util::ComputeDecodedSizeBase64(base64data, tokenLength);
if (outLength == 0)
{
DOMError("Corrupted embedded content found", &element);
}
targetLength += outLength;
}
else {
contentLength = Util::DecodeBase64(encodedData, encodedDataLen, content);
if (targetLength == 0)
{
DOMError("Corrupted embedded content found", &element);
}
content = new uint8_t[targetLength];
contentLength = static_cast<uint64_t>(targetLength);
size_t dst_offset = 0;
for (uint32_t tokenIdx = 0; tokenIdx < numTokens; ++tokenIdx)
{
const Token& dataToken = GetRequiredToken(*Content, tokenIdx);
size_t tokenLength = dataToken.end() - dataToken.begin() - 2; // ignore double quotes
const char* base64data = dataToken.begin() + 1;
dst_offset += Util::DecodeBase64(base64data, tokenLength, content + dst_offset, targetLength - dst_offset);
}
if (targetLength != dst_offset)
{
delete[] content;
contentLength = 0;
DOMError("Corrupted embedded content found", &element);
}
}
}

View File

@ -114,47 +114,65 @@ std::string AddTokenText(const std::string& prefix, const std::string& text, con
text) );
}
// Generated by this formula: T["ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;
static const uint8_t base64DecodeTable[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 64, 0, 0,
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255,
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255
};
uint8_t DecodeBase64(char ch)
{
return base64DecodeTable[size_t(ch)];
const auto idx = static_cast<uint8_t>(ch);
if (idx > 127)
return 255;
return base64DecodeTable[idx];
}
size_t DecodeBase64(const char* in, size_t inLength, uint8_t*& out)
size_t ComputeDecodedSizeBase64(const char* in, size_t inLength)
{
if (inLength < 4) {
out = 0;
if (inLength < 2)
{
return 0;
}
const size_t outLength = (inLength * 3) / 4;
out = new uint8_t[outLength];
memset(out, 0, outLength);
size_t i = 0;
size_t j = 0;
for (i = 0; i < inLength - 4; i += 4)
const size_t equals = size_t(in[inLength - 1] == '=') + size_t(in[inLength - 2] == '=');
const size_t full_length = (inLength * 3) >> 2; // div by 4
if (full_length < equals)
{
uint8_t b0 = Util::DecodeBase64(in[i]);
uint8_t b1 = Util::DecodeBase64(in[i + 1]);
uint8_t b2 = Util::DecodeBase64(in[i + 2]);
uint8_t b3 = Util::DecodeBase64(in[i + 3]);
out[j++] = (uint8_t)((b0 << 2) | (b1 >> 4));
out[j++] = (uint8_t)((b1 << 4) | (b2 >> 2));
out[j++] = (uint8_t)((b2 << 6) | b3);
return 0;
}
return outLength;
return full_length - equals;
}
size_t DecodeBase64(const char* in, size_t inLength, uint8_t* out, size_t maxOutLength)
{
if (maxOutLength == 0 || inLength < 2) {
return 0;
}
const size_t realLength = inLength - size_t(in[inLength - 1] == '=') - size_t(in[inLength - 2] == '=');
size_t dst_offset = 0;
int val = 0, valb = -8;
for (size_t src_offset = 0; src_offset < realLength; ++src_offset)
{
const uint8_t table_value = Util::DecodeBase64(in[src_offset]);
if (table_value == 255)
{
return 0;
}
val = (val << 6) + table_value;
valb += 6;
if (valb >= 0)
{
out[dst_offset++] = static_cast<uint8_t>((val >> valb) & 0xFF);
valb -= 8;
}
}
return dst_offset;
}
static const char to_base64_string[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

View File

@ -105,13 +105,21 @@ std::string AddTokenText(const std::string& prefix, const std::string& text, con
* @return decoded byte value*/
uint8_t DecodeBase64(char ch);
/** Compute decoded size of a Base64-encoded string
*
* @param in Characters to decode.
* @param inLength Number of characters to decode.
* @return size of the decoded data (number of bytes)*/
size_t ComputeDecodedSizeBase64(const char* in, size_t inLength);
/** Decode a Base64-encoded string
*
* @param in Characters to decode.
* @param inLength Number of characters to decode.
* @param out Reference to pointer where we will store the decoded data.
* @param out Pointer where we will store the decoded data.
* @param maxOutLength Size of output buffer.
* @return size of the decoded data (number of bytes)*/
size_t DecodeBase64(const char* in, size_t inLength, uint8_t*& out);
size_t DecodeBase64(const char* in, size_t inLength, uint8_t* out, size_t maxOutLength);
char EncodeBase64(char byte);

File diff suppressed because one or more lines are too long

View File

@ -128,4 +128,28 @@ TEST_F(utFBXImporterExporter, importEmbeddedAsciiTest) {
aiString path;
aiTextureMapMode modes[2];
EXPECT_EQ(aiReturn_SUCCESS, mat->GetTexture(aiTextureType_DIFFUSE, 0, &path, nullptr, nullptr, nullptr, nullptr, modes));
ASSERT_EQ(1, scene->mNumTextures);
ASSERT_TRUE(scene->mTextures[0]->pcData);
ASSERT_EQ(439176u, scene->mTextures[0]->mWidth) << "FBX ASCII base64 compression splits data by 512Kb, it should be two parts for this texture";
}
TEST_F(utFBXImporterExporter, importEmbeddedFragmentedAsciiTest) {
// see https://github.com/assimp/assimp/issues/1957
Assimp::Importer importer;
const aiScene *scene = importer.ReadFile(ASSIMP_TEST_MODELS_DIR "/FBX/embedded_ascii/box_embedded_texture_fragmented.fbx", aiProcess_ValidateDataStructure);
EXPECT_NE(nullptr, scene);
EXPECT_EQ(1, scene->mNumMaterials);
aiMaterial *mat = scene->mMaterials[0];
ASSERT_NE(nullptr, mat);
aiString path;
aiTextureMapMode modes[2];
ASSERT_EQ(aiReturn_SUCCESS, mat->GetTexture(aiTextureType_DIFFUSE, 0, &path, nullptr, nullptr, nullptr, nullptr, modes));
ASSERT_STREQ(path.C_Str(), "paper.png");
ASSERT_EQ(1, scene->mNumTextures);
ASSERT_TRUE(scene->mTextures[0]->pcData);
ASSERT_EQ(968029u, scene->mTextures[0]->mWidth) << "FBX ASCII base64 compression splits data by 512Kb, it should be two parts for this texture";
}