2012-08-09 00:08:12 +00:00
/*
Open Asset Import Library ( assimp )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2022-01-10 20:13:43 +00:00
Copyright ( c ) 2006 - 2022 , assimp team
2018-01-28 18:42:05 +00:00
2017-05-09 17:57:36 +00:00
2012-08-09 00:08:12 +00:00
All rights reserved .
2015-04-20 22:36:47 +00:00
Redistribution and use of this software in source and binary forms ,
with or without modification , are permitted provided that the
2012-08-09 00:08:12 +00:00
following conditions are met :
* Redistributions of source code must retain the above
copyright notice , this list of conditions and the
following disclaimer .
* Redistributions in binary form must reproduce the above
copyright notice , this list of conditions and the
following disclaimer in the documentation and / or other
materials provided with the distribution .
* Neither the name of the assimp team , nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior
written permission of the assimp team .
2015-04-20 22:36:47 +00:00
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
" AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT
2012-08-09 00:08:12 +00:00
LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2015-04-20 22:36:47 +00:00
A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT
2012-08-09 00:08:12 +00:00
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL ,
2015-04-20 22:36:47 +00:00
SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT
2012-08-09 00:08:12 +00:00
LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
2015-04-20 22:36:47 +00:00
DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
2012-08-09 00:08:12 +00:00
OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
/** @file FBXBinaryTokenizer.cpp
* @ brief Implementation of a fake lexer for binary fbx files -
* we emit tokens so the parser needs almost no special handling
* for binary files .
*/
# ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
# include "FBXTokenizer.h"
# include "FBXUtil.h"
2016-06-06 20:04:29 +00:00
# include <assimp/defs.h>
2015-04-15 23:00:17 +00:00
# include <stdint.h>
2018-01-06 00:18:33 +00:00
# include <assimp/Exceptional.h>
# include <assimp/ByteSwapper.h>
2020-05-14 16:43:31 +00:00
# include <assimp/DefaultLogger.hpp>
2020-07-30 13:56:01 +00:00
# include <assimp/StringUtils.h>
2012-08-09 00:08:12 +00:00
namespace Assimp {
namespace FBX {
2017-07-19 22:00:53 +00:00
//enum Flag
//{
// e_unknown_0 = 1 << 0,
// e_unknown_1 = 1 << 1,
// e_unknown_2 = 1 << 2,
// e_unknown_3 = 1 << 3,
// e_unknown_4 = 1 << 4,
// e_unknown_5 = 1 << 5,
// e_unknown_6 = 1 << 6,
// e_unknown_7 = 1 << 7,
// e_unknown_8 = 1 << 8,
// e_unknown_9 = 1 << 9,
// e_unknown_10 = 1 << 10,
// e_unknown_11 = 1 << 11,
// e_unknown_12 = 1 << 12,
// e_unknown_13 = 1 << 13,
// e_unknown_14 = 1 << 14,
// e_unknown_15 = 1 << 15,
// e_unknown_16 = 1 << 16,
// e_unknown_17 = 1 << 17,
// e_unknown_18 = 1 << 18,
// e_unknown_19 = 1 << 19,
// e_unknown_20 = 1 << 20,
// e_unknown_21 = 1 << 21,
// e_unknown_22 = 1 << 22,
// e_unknown_23 = 1 << 23,
2019-02-05 14:36:49 +00:00
// e_flag_field_size_64_bit = 1 << 24, // Not sure what is
2017-07-19 22:00:53 +00:00
// e_unknown_25 = 1 << 25,
// e_unknown_26 = 1 << 26,
// e_unknown_27 = 1 << 27,
// e_unknown_28 = 1 << 28,
// e_unknown_29 = 1 << 29,
// e_unknown_30 = 1 << 30,
// e_unknown_31 = 1 << 31
//};
//
//bool check_flag(uint32_t flags, Flag to_check)
//{
// return (flags & to_check) != 0;
//}
2012-08-09 00:08:12 +00:00
// ------------------------------------------------------------------------------------------------
2019-05-14 14:24:23 +00:00
Token : : Token ( const char * sbegin , const char * send , TokenType type , size_t offset )
2015-05-19 03:57:13 +00:00
:
# ifdef DEBUG
contents ( sbegin , static_cast < size_t > ( send - sbegin ) ) ,
# endif
sbegin ( sbegin )
, send ( send )
, type ( type )
, line ( offset )
, column ( BINARY_MARKER )
2012-08-09 00:08:12 +00:00
{
2015-05-19 03:57:13 +00:00
ai_assert ( sbegin ) ;
ai_assert ( send ) ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
// binary tokens may have zero length because they are sometimes dummies
// inserted by TokenizeBinary()
ai_assert ( send > = sbegin ) ;
2012-08-09 00:08:12 +00:00
}
namespace {
// ------------------------------------------------------------------------------------------------
// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
2019-05-14 14:24:23 +00:00
AI_WONT_RETURN void TokenizeError ( const std : : string & message , size_t offset ) AI_WONT_RETURN_SUFFIX ;
AI_WONT_RETURN void TokenizeError ( const std : : string & message , size_t offset )
2012-08-09 00:08:12 +00:00
{
2020-08-19 16:20:57 +00:00
throw DeadlyImportError ( " FBX-Tokenize " , Util : : GetOffsetText ( offset ) , message ) ;
2012-08-09 00:08:12 +00:00
}
// ------------------------------------------------------------------------------------------------
2019-05-14 14:24:23 +00:00
size_t Offset ( const char * begin , const char * cursor ) {
2015-05-19 03:57:13 +00:00
ai_assert ( begin < = cursor ) ;
2018-01-06 17:03:27 +00:00
2019-05-14 14:24:23 +00:00
return cursor - begin ;
2012-08-09 00:08:12 +00:00
}
// ------------------------------------------------------------------------------------------------
2018-01-06 17:03:27 +00:00
void TokenizeError ( const std : : string & message , const char * begin , const char * cursor ) {
2015-05-19 03:57:13 +00:00
TokenizeError ( message , Offset ( begin , cursor ) ) ;
2012-08-09 00:08:12 +00:00
}
// ------------------------------------------------------------------------------------------------
2018-01-06 17:03:27 +00:00
uint32_t ReadWord ( const char * input , const char * & cursor , const char * end ) {
2016-12-02 10:32:34 +00:00
const size_t k_to_read = sizeof ( uint32_t ) ;
if ( Offset ( cursor , end ) < k_to_read ) {
2015-05-19 03:57:13 +00:00
TokenizeError ( " cannot ReadWord, out of bounds " , input , cursor ) ;
}
2012-08-09 00:08:12 +00:00
2017-10-07 17:40:35 +00:00
uint32_t word ;
2018-01-06 17:03:27 +00:00
: : memcpy ( & word , cursor , 4 ) ;
2015-05-19 03:57:13 +00:00
AI_SWAP4 ( word ) ;
2012-08-09 00:08:12 +00:00
2016-12-02 10:32:34 +00:00
cursor + = k_to_read ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
return word ;
2012-08-09 00:08:12 +00:00
}
2016-12-02 10:32:34 +00:00
// ------------------------------------------------------------------------------------------------
2017-07-09 20:17:35 +00:00
uint64_t ReadDoubleWord ( const char * input , const char * & cursor , const char * end ) {
2016-10-14 16:47:10 +00:00
const size_t k_to_read = sizeof ( uint64_t ) ;
if ( Offset ( cursor , end ) < k_to_read ) {
TokenizeError ( " cannot ReadDoubleWord, out of bounds " , input , cursor ) ;
}
2018-01-06 17:03:27 +00:00
uint64_t dword /*= *reinterpret_cast<const uint64_t*>(cursor)*/ ;
: : memcpy ( & dword , cursor , sizeof ( uint64_t ) ) ;
2016-10-14 16:47:10 +00:00
AI_SWAP8 ( dword ) ;
cursor + = k_to_read ;
return dword ;
}
2012-08-09 00:08:12 +00:00
// ------------------------------------------------------------------------------------------------
2018-01-06 17:03:27 +00:00
uint8_t ReadByte ( const char * input , const char * & cursor , const char * end ) {
2016-12-02 10:32:34 +00:00
if ( Offset ( cursor , end ) < sizeof ( uint8_t ) ) {
2015-05-19 03:57:13 +00:00
TokenizeError ( " cannot ReadByte, out of bounds " , input , cursor ) ;
}
2012-08-09 00:08:12 +00:00
2018-01-06 17:03:27 +00:00
uint8_t word ; /* = *reinterpret_cast< const uint8_t* >( cursor )*/
: : memcpy ( & word , cursor , sizeof ( uint8_t ) ) ;
2015-05-19 03:57:13 +00:00
+ + cursor ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
return word ;
2012-08-09 00:08:12 +00:00
}
// ------------------------------------------------------------------------------------------------
2018-01-06 17:03:27 +00:00
unsigned int ReadString ( const char * & sbegin_out , const char * & send_out , const char * input ,
const char * & cursor , const char * end , bool long_length = false , bool allow_null = false ) {
2015-05-19 03:57:13 +00:00
const uint32_t len_len = long_length ? 4 : 1 ;
if ( Offset ( cursor , end ) < len_len ) {
TokenizeError ( " cannot ReadString, out of bounds reading length " , input , cursor ) ;
}
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
const uint32_t length = long_length ? ReadWord ( input , cursor , end ) : ReadByte ( input , cursor , end ) ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
if ( Offset ( cursor , end ) < length ) {
TokenizeError ( " cannot ReadString, length is out of bounds " , input , cursor ) ;
}
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
sbegin_out = cursor ;
cursor + = length ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
send_out = cursor ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
if ( ! allow_null ) {
for ( unsigned int i = 0 ; i < length ; + + i ) {
if ( sbegin_out [ i ] = = ' \0 ' ) {
TokenizeError ( " failed ReadString, unexpected NUL character in string " , input , cursor ) ;
}
}
}
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
return length ;
2012-08-09 00:08:12 +00:00
}
// ------------------------------------------------------------------------------------------------
2018-01-06 17:03:27 +00:00
void ReadData ( const char * & sbegin_out , const char * & send_out , const char * input , const char * & cursor , const char * end ) {
2015-05-19 03:57:13 +00:00
if ( Offset ( cursor , end ) < 1 ) {
TokenizeError ( " cannot ReadData, out of bounds reading length " , input , cursor ) ;
}
const char type = * cursor ;
sbegin_out = cursor + + ;
switch ( type )
{
// 16 bit int
case ' Y ' :
cursor + = 2 ;
break ;
// 1 bit bool flag (yes/no)
case ' C ' :
cursor + = 1 ;
break ;
// 32 bit int
case ' I ' :
2016-04-03 00:38:00 +00:00
// <- fall through
2015-05-19 03:57:13 +00:00
// float
case ' F ' :
cursor + = 4 ;
break ;
// double
case ' D ' :
cursor + = 8 ;
break ;
// 64 bit int
case ' L ' :
cursor + = 8 ;
break ;
// note: do not write cursor += ReadWord(...cursor) as this would be UB
// raw binary data
case ' R ' :
{
const uint32_t length = ReadWord ( input , cursor , end ) ;
cursor + = length ;
break ;
}
case ' b ' :
// TODO: what is the 'b' type code? Right now we just skip over it /
// take the full range we could get
cursor = end ;
break ;
// array of *
case ' f ' :
case ' d ' :
case ' l ' :
2019-02-05 14:36:49 +00:00
case ' i ' :
case ' c ' : {
2015-05-19 03:57:13 +00:00
const uint32_t length = ReadWord ( input , cursor , end ) ;
const uint32_t encoding = ReadWord ( input , cursor , end ) ;
const uint32_t comp_len = ReadWord ( input , cursor , end ) ;
// compute length based on type and check against the stored value
if ( encoding = = 0 ) {
uint32_t stride = 0 ;
switch ( type )
{
case ' f ' :
case ' i ' :
stride = 4 ;
break ;
case ' d ' :
case ' l ' :
stride = 8 ;
break ;
2019-02-05 14:36:49 +00:00
case ' c ' :
stride = 1 ;
break ;
2015-05-19 03:57:13 +00:00
default :
ai_assert ( false ) ;
} ;
2013-08-09 23:52:15 +00:00
ai_assert ( stride > 0 ) ;
2015-05-19 03:57:13 +00:00
if ( length * stride ! = comp_len ) {
TokenizeError ( " cannot ReadData, calculated data stride differs from what the file claims " , input , cursor ) ;
}
}
// zip/deflate algorithm (encoding==1)? take given length. anything else? die
else if ( encoding ! = 1 ) {
TokenizeError ( " cannot ReadData, unknown encoding " , input , cursor ) ;
}
cursor + = comp_len ;
break ;
}
// string
case ' S ' : {
const char * sb , * se ;
// 0 characters can legally happen in such strings
ReadString ( sb , se , input , cursor , end , true , true ) ;
break ;
}
default :
TokenizeError ( " cannot ReadData, unexpected type code: " + std : : string ( & type , 1 ) , input , cursor ) ;
}
if ( cursor > end ) {
TokenizeError ( " cannot ReadData, the remaining size is too small for the data type: " + std : : string ( & type , 1 ) , input , cursor ) ;
}
// the type code is contained in the returned range
send_out = cursor ;
2012-08-09 00:08:12 +00:00
}
// ------------------------------------------------------------------------------------------------
2022-04-20 10:33:39 +00:00
bool ReadScope ( TokenList & output_tokens , StackAllocator & token_allocator , const char * input , const char * & cursor , const char * end , bool const is64bits ) {
2015-05-19 03:57:13 +00:00
// the first word contains the offset at which this block ends
2017-07-19 22:04:10 +00:00
const uint64_t end_offset = is64bits ? ReadDoubleWord ( input , cursor , end ) : ReadWord ( input , cursor , end ) ;
2015-05-19 03:57:13 +00:00
// we may get 0 if reading reached the end of the file -
// fbx files have a mysterious extra footer which I don't know
// how to extract any information from, but at least it always
// starts with a 0.
if ( ! end_offset ) {
return false ;
}
if ( end_offset > Offset ( input , end ) ) {
TokenizeError ( " block offset is out of range " , input , cursor ) ;
}
else if ( end_offset < Offset ( input , cursor ) ) {
TokenizeError ( " block offset is negative out of range " , input , cursor ) ;
}
// the second data word contains the number of properties in the scope
2017-07-19 22:04:10 +00:00
const uint64_t prop_count = is64bits ? ReadDoubleWord ( input , cursor , end ) : ReadWord ( input , cursor , end ) ;
2015-05-19 03:57:13 +00:00
// the third data word contains the length of the property list
2017-07-19 22:04:10 +00:00
const uint64_t prop_length = is64bits ? ReadDoubleWord ( input , cursor , end ) : ReadWord ( input , cursor , end ) ;
2015-05-19 03:57:13 +00:00
// now comes the name of the scope/key
const char * sbeg , * send ;
ReadString ( sbeg , send , input , cursor , end ) ;
output_tokens . push_back ( new_Token ( sbeg , send , TokenType_KEY , Offset ( input , cursor ) ) ) ;
// now come the individual properties
const char * begin_cursor = cursor ;
2020-12-14 16:49:04 +00:00
if ( ( begin_cursor + prop_length ) > end ) {
TokenizeError ( " property length out of bounds reading length " , input , cursor ) ;
}
2015-05-19 03:57:13 +00:00
for ( unsigned int i = 0 ; i < prop_count ; + + i ) {
ReadData ( sbeg , send , input , cursor , begin_cursor + prop_length ) ;
output_tokens . push_back ( new_Token ( sbeg , send , TokenType_DATA , Offset ( input , cursor ) ) ) ;
if ( i ! = prop_count - 1 ) {
output_tokens . push_back ( new_Token ( cursor , cursor + 1 , TokenType_COMMA , Offset ( input , cursor ) ) ) ;
}
}
if ( Offset ( begin_cursor , cursor ) ! = prop_length ) {
TokenizeError ( " property length not reached, something is wrong " , input , cursor ) ;
}
// at the end of each nested block, there is a NUL record to indicate
// that the sub-scope exists (i.e. to distinguish between P: and P : {})
2016-10-14 16:47:10 +00:00
// this NUL record is 13 bytes long on 32 bit version and 25 bytes long on 64 bit.
2017-07-19 22:04:10 +00:00
const size_t sentinel_block_length = is64bits ? ( sizeof ( uint64_t ) * 3 + 1 ) : ( sizeof ( uint32_t ) * 3 + 1 ) ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
if ( Offset ( input , cursor ) < end_offset ) {
2016-10-14 16:47:10 +00:00
if ( end_offset - Offset ( input , cursor ) < sentinel_block_length ) {
2015-05-19 03:57:13 +00:00
TokenizeError ( " insufficient padding bytes at block end " , input , cursor ) ;
}
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
output_tokens . push_back ( new_Token ( cursor , cursor + 1 , TokenType_OPEN_BRACKET , Offset ( input , cursor ) ) ) ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
// XXX this is vulnerable to stack overflowing ..
2016-10-14 16:47:10 +00:00
while ( Offset ( input , cursor ) < end_offset - sentinel_block_length ) {
2022-04-20 10:33:39 +00:00
ReadScope ( output_tokens , token_allocator , input , cursor , input + end_offset - sentinel_block_length , is64bits ) ;
2015-05-19 03:57:13 +00:00
}
output_tokens . push_back ( new_Token ( cursor , cursor + 1 , TokenType_CLOSE_BRACKET , Offset ( input , cursor ) ) ) ;
2012-08-09 00:08:12 +00:00
2016-10-14 16:47:10 +00:00
for ( unsigned int i = 0 ; i < sentinel_block_length ; + + i ) {
2015-05-19 03:57:13 +00:00
if ( cursor [ i ] ! = ' \0 ' ) {
TokenizeError ( " failed to read nested block sentinel, expected all bytes to be 0 " , input , cursor ) ;
}
}
2016-10-14 16:47:10 +00:00
cursor + = sentinel_block_length ;
2015-05-19 03:57:13 +00:00
}
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
if ( Offset ( input , cursor ) ! = end_offset ) {
TokenizeError ( " scope length not reached, something is wrong " , input , cursor ) ;
}
2012-08-10 08:47:50 +00:00
2015-05-19 03:57:13 +00:00
return true ;
2012-08-09 00:08:12 +00:00
}
2018-01-06 17:03:27 +00:00
} // anonymous namespace
2012-08-09 00:08:12 +00:00
// ------------------------------------------------------------------------------------------------
2017-07-19 22:04:10 +00:00
// TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent
2022-04-20 10:33:39 +00:00
void TokenizeBinary ( TokenList & output_tokens , const char * input , size_t length , StackAllocator & token_allocator ) {
2020-05-14 16:43:31 +00:00
ai_assert ( input ) ;
2020-05-14 16:46:33 +00:00
ASSIMP_LOG_DEBUG ( " Tokenizing binary FBX file " ) ;
2012-08-09 00:08:12 +00:00
2015-05-19 03:57:13 +00:00
if ( length < 0x1b ) {
TokenizeError ( " file is too short " , 0 ) ;
}
2012-08-09 00:08:12 +00:00
2016-10-14 16:47:10 +00:00
//uint32_t offset = 0x15;
2017-12-14 15:38:22 +00:00
/* const char* cursor = input + 0x15;
2016-10-14 16:47:10 +00:00
const uint32_t flags = ReadWord ( input , cursor , input + length ) ;
2012-08-09 00:08:12 +00:00
2016-10-14 16:47:10 +00:00
const uint8_t padding_0 = ReadByte ( input , cursor , input + length ) ; // unused
2017-12-14 15:38:22 +00:00
const uint8_t padding_1 = ReadByte ( input , cursor , input + length ) ; // unused*/
2015-05-19 03:57:13 +00:00
if ( strncmp ( input , " Kaydara FBX Binary " , 18 ) ) {
TokenizeError ( " magic bytes not found " , 0 ) ;
}
2012-08-09 00:08:12 +00:00
2017-07-19 22:00:53 +00:00
const char * cursor = input + 18 ;
2017-09-24 11:46:15 +00:00
/*Result ignored*/ ReadByte ( input , cursor , input + length ) ;
/*Result ignored*/ ReadByte ( input , cursor , input + length ) ;
/*Result ignored*/ ReadByte ( input , cursor , input + length ) ;
/*Result ignored*/ ReadByte ( input , cursor , input + length ) ;
/*Result ignored*/ ReadByte ( input , cursor , input + length ) ;
2017-07-19 22:00:53 +00:00
const uint32_t version = ReadWord ( input , cursor , input + length ) ;
2021-05-13 09:25:27 +00:00
ASSIMP_LOG_DEBUG ( " FBX version: " , version ) ;
2017-08-29 13:41:25 +00:00
const bool is64bits = version > = 7500 ;
2018-05-12 06:09:27 +00:00
const char * end = input + length ;
2020-07-30 13:56:01 +00:00
try
{
while ( cursor < end ) {
2022-04-20 10:33:39 +00:00
if ( ! ReadScope ( output_tokens , token_allocator , input , cursor , input + length , is64bits ) ) {
2020-07-30 13:56:01 +00:00
break ;
}
}
}
catch ( const DeadlyImportError & e )
{
2022-08-08 22:51:30 +00:00
if ( ! is64bits & & ( length > std : : numeric_limits < uint32_t > : : max ( ) ) ) {
2021-03-09 20:08:28 +00:00
throw DeadlyImportError ( " The FBX file is invalid. This may be because the content is too big for this older version ( " , ai_to_string ( version ) , " ) of the FBX format. ( " , e . what ( ) , " ) " ) ;
2015-05-19 03:57:13 +00:00
}
2020-07-30 13:56:01 +00:00
throw ;
2015-05-19 03:57:13 +00:00
}
2012-08-09 00:08:12 +00:00
}
} // !FBX
} // !Assimp
2014-09-15 19:35:03 +00:00
# endif