- fbx: add binary reading draft. Started from scratch after my first attempt had design flaws. The binary reader now sits on the lexer stage (really) and generates a (fake) token sequence similar to the text lexer's output - this means most parsing code can go unaffected.
parent
3899fc5257
commit
24ce9495fd
|
@ -417,6 +417,7 @@ SET(FBX_SRCS
|
|||
FBXAnimation.cpp
|
||||
FBXNodeAttribute.cpp
|
||||
FBXDeformer.cpp
|
||||
FBXBinaryTokenizer.cpp
|
||||
)
|
||||
SOURCE_GROUP( FBX FILES ${FBX_SRCS})
|
||||
|
||||
|
|
|
@ -0,0 +1,362 @@
|
|||
/*
|
||||
Open Asset Import Library (assimp)
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2006-2012, assimp team
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use of this software in source and binary forms,
|
||||
with or without modification, are permitted provided that the
|
||||
following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
|
||||
* Neither the name of the assimp team, nor the names of its
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior
|
||||
written permission of the assimp team.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
*/
|
||||
/** @file FBXBinaryTokenizer.cpp
|
||||
* @brief Implementation of a fake lexer for binary fbx files -
|
||||
* we emit tokens so the parser needs almost no special handling
|
||||
* for binary files.
|
||||
*/
|
||||
#include "AssimpPCH.h"
|
||||
|
||||
#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
|
||||
|
||||
#include "FBXTokenizer.h"
|
||||
#include "FBXUtil.h"
|
||||
|
||||
namespace Assimp {
|
||||
namespace FBX {
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
|
||||
: sbegin(sbegin)
|
||||
, send(send)
|
||||
, type(type)
|
||||
, line(offset)
|
||||
, column(BINARY_MARKER)
|
||||
#ifdef DEBUG
|
||||
, contents(sbegin, static_cast<size_t>(send-sbegin))
|
||||
#endif
|
||||
{
|
||||
ai_assert(sbegin);
|
||||
ai_assert(send);
|
||||
|
||||
// binary tokens may have zero length because they are sometimes dummies
|
||||
// inserted by TokenizeBinary()
|
||||
ai_assert(send >= sbegin);
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
|
||||
void TokenizeError(const std::string& message, unsigned int offset)
|
||||
{
|
||||
throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
uint32_t Offset(const char* begin, const char* cursor)
|
||||
{
|
||||
ai_assert(begin <= cursor);
|
||||
return static_cast<unsigned int>(cursor - begin);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
void TokenizeError(const std::string& message, const char* begin, const char* cursor)
|
||||
{
|
||||
TokenizeError(message, Offset(begin, cursor));
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
|
||||
{
|
||||
if(Offset(cursor, end) < 4) {
|
||||
TokenizeError("cannot ReadWord, out of bounds",input, cursor);
|
||||
}
|
||||
|
||||
uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
|
||||
AI_SWAP4(word);
|
||||
|
||||
cursor += 4;
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
|
||||
{
|
||||
if(Offset(cursor, end) < 1) {
|
||||
TokenizeError("cannot ReadByte, out of bounds",input, cursor);
|
||||
}
|
||||
|
||||
uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
|
||||
++cursor;
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
|
||||
bool long_length = false,
|
||||
bool allow_null = false)
|
||||
{
|
||||
const uint32_t len_len = long_length ? 4 : 1;
|
||||
if(Offset(cursor, end) < len_len) {
|
||||
TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
|
||||
}
|
||||
|
||||
const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
|
||||
|
||||
if (Offset(cursor, end) < length) {
|
||||
TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
|
||||
}
|
||||
|
||||
sbegin_out = cursor;
|
||||
cursor += length;
|
||||
|
||||
send_out = cursor;
|
||||
|
||||
if(!allow_null) {
|
||||
for (unsigned int i = 0; i < length; ++i) {
|
||||
if(sbegin_out[i] == '\0') {
|
||||
TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
|
||||
{
|
||||
if(Offset(cursor, end) < 1) {
|
||||
TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
|
||||
}
|
||||
|
||||
const char type = *cursor;
|
||||
sbegin_out = cursor++;
|
||||
|
||||
switch(type)
|
||||
{
|
||||
// 32 bit int
|
||||
case 'I':
|
||||
// <- fall thru
|
||||
|
||||
// float
|
||||
case 'F':
|
||||
cursor += 4;
|
||||
break;
|
||||
|
||||
// double
|
||||
case 'D':
|
||||
cursor += 8;
|
||||
break;
|
||||
|
||||
// 64 bit int
|
||||
case 'L':
|
||||
cursor += 8;
|
||||
break;
|
||||
|
||||
// note: do not write cursor += ReadWord(...cursor) as this would be UB
|
||||
|
||||
// raw binary data
|
||||
case 'R': {
|
||||
const uint32_t length = ReadWord(input, cursor, end);
|
||||
cursor += length;
|
||||
break;
|
||||
}
|
||||
|
||||
// array of *
|
||||
case 'f':
|
||||
case 'd':
|
||||
case 'l':
|
||||
case 'i': {
|
||||
|
||||
const uint32_t length = ReadWord(input, cursor, end);
|
||||
const uint32_t encoding = ReadWord(input, cursor, end);
|
||||
if(encoding == 0) {
|
||||
uint32_t stride;
|
||||
switch(type)
|
||||
{
|
||||
case 'f':
|
||||
case 'i':
|
||||
stride = 4;
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
case 'l':
|
||||
stride = 8;
|
||||
break;
|
||||
|
||||
default:
|
||||
ai_assert(false);
|
||||
};
|
||||
cursor += length * stride;
|
||||
}
|
||||
// zip/deflate algorithm?
|
||||
else if (encoding == 1) {
|
||||
const uint32_t decomp_len = ReadWord(input, cursor, end);
|
||||
cursor += decomp_len;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// string
|
||||
case 'S': {
|
||||
const char* sb, *se;
|
||||
// 0 characters can legally happen in such strings
|
||||
ReadString(sb, se, input, cursor, end, true, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
|
||||
}
|
||||
|
||||
if(cursor > end) {
|
||||
TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
|
||||
}
|
||||
|
||||
// the type code is contained in the returned range
|
||||
send_out = cursor;
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
void ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
|
||||
{
|
||||
// the first word contains the offset at which this block ends
|
||||
const uint32_t end_offset = ReadWord(input, cursor, end);
|
||||
|
||||
if(end_offset > Offset(input, end)) {
|
||||
TokenizeError("block offset is out of range",input, cursor);
|
||||
}
|
||||
else if(end_offset < Offset(input, cursor)) {
|
||||
TokenizeError("block offset is negative out of range",input, cursor);
|
||||
}
|
||||
|
||||
// the second data word contains the number of properties in the scope
|
||||
const uint32_t prop_count = ReadWord(input, cursor, end);
|
||||
|
||||
// the third data word contains the length of the property list
|
||||
const uint32_t prop_length = ReadWord(input, cursor, end);
|
||||
|
||||
// now comes the name of the scope/key
|
||||
const char* sbeg, *send;
|
||||
ReadString(sbeg, send, input, cursor, end);
|
||||
|
||||
output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
|
||||
|
||||
// now come the individual properties
|
||||
const char* begin_cursor = cursor;
|
||||
for (unsigned int i = 0; i < prop_count; ++i) {
|
||||
ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
|
||||
|
||||
output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
|
||||
|
||||
if(i != prop_count-1) {
|
||||
output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
|
||||
}
|
||||
}
|
||||
|
||||
if (Offset(begin_cursor, cursor) != prop_length) {
|
||||
TokenizeError("property length not reached, something is wrong",input, cursor);
|
||||
}
|
||||
|
||||
// at the end of each nested block, there is a NUL record to indicate
|
||||
// that the sub-scope exists (i.e. to distinguish between P: and P : {})
|
||||
// this NUL record is 13 bytes long.
|
||||
#define BLOCK_SENTINEL_LENGTH 13
|
||||
|
||||
if (Offset(input, cursor) < end_offset) {
|
||||
|
||||
if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
|
||||
TokenizeError("insufficient padding bytes at block end",input, cursor);
|
||||
}
|
||||
|
||||
output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
|
||||
|
||||
// XXX this is vulnerable to stack overflowing ..
|
||||
while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
|
||||
ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
|
||||
}
|
||||
output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
|
||||
|
||||
for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
|
||||
if(cursor[i] != '\0') {
|
||||
TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
|
||||
}
|
||||
}
|
||||
cursor += BLOCK_SENTINEL_LENGTH;
|
||||
}
|
||||
|
||||
if (Offset(input, cursor) != end_offset) {
|
||||
TokenizeError("scope length not reached, something is wrong",input, cursor);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
|
||||
{
|
||||
ai_assert(input);
|
||||
|
||||
if(length < 0x1b) {
|
||||
TokenizeError("file is too short",0);
|
||||
}
|
||||
|
||||
if (strncmp(input,"Kaydara FBX Binary",18)) {
|
||||
TokenizeError("magic bytes not found",0);
|
||||
}
|
||||
|
||||
|
||||
uint32_t offset = 0x1b;
|
||||
|
||||
const char* cursor = input + 0x1b;
|
||||
|
||||
while (cursor < input + length) {
|
||||
ReadScope(output_tokens, input, cursor, input + length);
|
||||
}
|
||||
}
|
||||
|
||||
} // !FBX
|
||||
} // !Assimp
|
||||
|
||||
#endif
|
|
@ -520,7 +520,7 @@ const Object* LazyObject::Get(bool dieOnError)
|
|||
}
|
||||
else if (!strncmp(obtype,"Model",length)) {
|
||||
// do not load IKEffectors yet
|
||||
if (strcmp(classtag.c_str(),"IKEffector")) {
|
||||
if (strcmp(classtag.c_str(),"IKEffector") && strcmp(classtag.c_str(),"FKEffector")) {
|
||||
object.reset(new Model(id,element,doc,name));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -152,7 +152,13 @@ void FBXImporter::InternReadFile( const std::string& pFile,
|
|||
// syntax elements of FBX (brackets, commas, key:value mappings)
|
||||
TokenList tokens;
|
||||
try {
|
||||
Tokenize(tokens,begin);
|
||||
|
||||
if (!strncmp(begin,"Kaydara FBX Binary",18)) {
|
||||
TokenizeBinary(tokens,begin,contents.size());
|
||||
}
|
||||
else {
|
||||
Tokenize(tokens,begin);
|
||||
}
|
||||
|
||||
// use this information to construct a very rudimentary
|
||||
// parse-tree representing the FBX scope structure
|
||||
|
|
|
@ -65,6 +65,9 @@ enum TokenType
|
|||
// further processing happens at a later stage.
|
||||
TokenType_DATA,
|
||||
|
||||
//
|
||||
TokenType_BINARY_DATA,
|
||||
|
||||
// ,
|
||||
TokenType_COMMA,
|
||||
|
||||
|
@ -80,9 +83,18 @@ enum TokenType
|
|||
class Token
|
||||
{
|
||||
|
||||
private:
|
||||
|
||||
static const unsigned int BINARY_MARKER = static_cast<unsigned int>(-1);
|
||||
|
||||
public:
|
||||
|
||||
/** construct a textual token */
|
||||
Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column);
|
||||
|
||||
/** construct a binary token */
|
||||
Token(const char* sbegin, const char* send, TokenType type, unsigned int offset);
|
||||
|
||||
~Token();
|
||||
|
||||
public:
|
||||
|
@ -93,6 +105,10 @@ public:
|
|||
|
||||
public:
|
||||
|
||||
bool IsBinary() const {
|
||||
return column == BINARY_MARKER;
|
||||
}
|
||||
|
||||
const char* begin() const {
|
||||
return sbegin;
|
||||
}
|
||||
|
@ -105,11 +121,18 @@ public:
|
|||
return type;
|
||||
}
|
||||
|
||||
unsigned int Offset() const {
|
||||
ai_assert(IsBinary());
|
||||
return offset;
|
||||
}
|
||||
|
||||
unsigned int Line() const {
|
||||
ai_assert(!IsBinary());
|
||||
return line;
|
||||
}
|
||||
|
||||
unsigned int Column() const {
|
||||
ai_assert(!IsBinary());
|
||||
return column;
|
||||
}
|
||||
|
||||
|
@ -126,7 +149,11 @@ private:
|
|||
const char* const send;
|
||||
const TokenType type;
|
||||
|
||||
const unsigned int line, column;
|
||||
union {
|
||||
const unsigned int line;
|
||||
unsigned int offset;
|
||||
};
|
||||
const unsigned int column;
|
||||
};
|
||||
|
||||
// XXX should use C++11's unique_ptr - but assimp's need to keep working with 03
|
||||
|
@ -146,8 +173,18 @@ typedef std::vector< TokenPtr > TokenList;
|
|||
void Tokenize(TokenList& output_tokens, const char* input);
|
||||
|
||||
|
||||
/** Tokenizer function for binary FBX files.
|
||||
*
|
||||
* Emits a token list suitable for direct parsing.
|
||||
*
|
||||
* @param output_tokens Receives a list of all tokens in the input data.
|
||||
* @param input_buffer Binary input buffer to be processed.
|
||||
* @param length Length of input buffer, in bytes. There is no 0-terminal.
|
||||
* @throw DeadlyImportError if something goes wrong */
|
||||
void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length);
|
||||
|
||||
|
||||
} // ! FBX
|
||||
} // ! Assimp
|
||||
|
||||
#endif // ! INCLUDED_AI_FBX_PARSER_H
|
||||
|
||||
|
|
|
@ -79,6 +79,12 @@ const char* TokenTypeString(TokenType t)
|
|||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
std::string AddOffset(const std::string& prefix, const std::string& text, unsigned int offset)
|
||||
{
|
||||
return static_cast<std::string>( (Formatter::format(),prefix," (offset 0x",std::hex,offset,") ",text) );
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
std::string AddLineAndColumn(const std::string& prefix, const std::string& text, unsigned int line, unsigned int column)
|
||||
{
|
||||
|
@ -88,6 +94,13 @@ std::string AddLineAndColumn(const std::string& prefix, const std::string& text,
|
|||
// ------------------------------------------------------------------------------------------------
|
||||
std::string AddTokenText(const std::string& prefix, const std::string& text, const Token* tok)
|
||||
{
|
||||
if(tok->IsBinary()) {
|
||||
return static_cast<std::string>( (Formatter::format(),prefix,
|
||||
" (",TokenTypeString(tok->Type()),
|
||||
", offset 0x", std::hex, tok->Offset(),") ",
|
||||
text) );
|
||||
}
|
||||
|
||||
return static_cast<std::string>( (Formatter::format(),prefix,
|
||||
" (",TokenTypeString(tok->Type()),
|
||||
", line ",tok->Line(),
|
||||
|
|
|
@ -68,6 +68,17 @@ struct delete_fun
|
|||
const char* TokenTypeString(TokenType t);
|
||||
|
||||
|
||||
|
||||
/** Format log/error messages using a given offset in the source binary file
|
||||
*
|
||||
* @param prefix Message prefix to be preprended to the location info.
|
||||
* @param text Message text
|
||||
* @param line Line index, 1-based
|
||||
* @param column Colum index, 1-based
|
||||
* @return A string of the following format: {prefix} (offset 0x{offset}) {text}*/
|
||||
std::string AddOffset(const std::string& prefix, const std::string& text, unsigned int offset);
|
||||
|
||||
|
||||
/** Format log/error messages using a given line location in the source file.
|
||||
*
|
||||
* @param prefix Message prefix to be preprended to the location info.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue