- fbx: add binary reading draft. Started from scratch after my first attempt had design flaws. The binary reader now sits on the lexer stage (really) and generates a (fake) token sequence similar to the text lexer's output - this means most parsing code can go unaffected.

2012-08-09 02:08:12 +02:00 · 2012-08-09 02:08:12 +02:00 · 24ce9495fd
parent 3899fc5257
commit 24ce9495fd
8 changed files with 1738 additions and 1304 deletions
--- a/code/CMakeLists.txt
+++ b/code/CMakeLists.txt
@ -417,6 +417,7 @@ SET(FBX_SRCS
 	FBXAnimation.cpp
 	FBXNodeAttribute.cpp
 	FBXDeformer.cpp
+	FBXBinaryTokenizer.cpp
 )
 SOURCE_GROUP( FBX FILES ${FBX_SRCS})

--- a/code/FBXBinaryTokenizer.cpp
+++ b/code/FBXBinaryTokenizer.cpp
@ -0,0 +1,362 @@
+/*
+Open Asset Import Library (assimp)
+----------------------------------------------------------------------
+
+Copyright (c) 2006-2012, assimp team
+All rights reserved.
+
+Redistribution and use of this software in source and binary forms, 
+with or without modification, are permitted provided that the 
+following conditions are met:
+
+* Redistributions of source code must retain the above
+  copyright notice, this list of conditions and the
+  following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the
+  following disclaimer in the documentation and/or other
+  materials provided with the distribution.
+
+* Neither the name of the assimp team, nor the names of its
+  contributors may be used to endorse or promote products
+  derived from this software without specific prior
+  written permission of the assimp team.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+----------------------------------------------------------------------
+*/
+/** @file  FBXBinaryTokenizer.cpp
+ *  @brief Implementation of a fake lexer for binary fbx files -
+ *    we emit tokens so the parser needs almost no special handling
+ *    for binary files.
+ */
+#include "AssimpPCH.h"
+
+#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
+
+#include "FBXTokenizer.h"
+#include "FBXUtil.h"
+
+namespace Assimp {
+namespace FBX {
+
+
+// ------------------------------------------------------------------------------------------------
+Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
+	: sbegin(sbegin)
+	, send(send)
+	, type(type)
+	, line(offset)
+	, column(BINARY_MARKER)
+#ifdef DEBUG
+	, contents(sbegin, static_cast<size_t>(send-sbegin))
+#endif
+{
+	ai_assert(sbegin);
+	ai_assert(send);
+
+	// binary tokens may have zero length because they are sometimes dummies
+	// inserted by TokenizeBinary()
+	ai_assert(send >= sbegin);
+}
+
+
+namespace {
+
+// ------------------------------------------------------------------------------------------------
+// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
+void TokenizeError(const std::string& message, unsigned int offset)
+{
+	throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
+}
+
+
+// ------------------------------------------------------------------------------------------------
+uint32_t Offset(const char* begin, const char* cursor)
+{
+	ai_assert(begin <= cursor);
+	return static_cast<unsigned int>(cursor - begin);
+}
+
+
+// ------------------------------------------------------------------------------------------------
+void TokenizeError(const std::string& message, const char* begin, const char* cursor)
+{
+	TokenizeError(message, Offset(begin, cursor));
+}
+
+
+// ------------------------------------------------------------------------------------------------
+uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
+{
+	if(Offset(cursor, end) < 4) {
+		TokenizeError("cannot ReadWord, out of bounds",input, cursor);
+	} 
+
+	uint32_t word = *reinterpret_cast<const uint32_t*>(cursor);
+	AI_SWAP4(word);
+
+	cursor += 4;
+
+	return word;
+}
+
+
+// ------------------------------------------------------------------------------------------------
+uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
+{
+	if(Offset(cursor, end) < 1) {
+		TokenizeError("cannot ReadByte, out of bounds",input, cursor);
+	} 
+
+	uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
+	++cursor;
+
+	return word;
+}
+
+
+// ------------------------------------------------------------------------------------------------
+unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end, 
+	bool long_length = false,
+	bool allow_null = false)
+{
+	const uint32_t len_len = long_length ? 4 : 1;
+	if(Offset(cursor, end) < len_len) {
+		TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
+	} 
+
+	const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
+
+	if (Offset(cursor, end) < length) {
+		TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
+	}
+
+	sbegin_out = cursor;
+	cursor += length;
+
+	send_out = cursor;
+
+	if(!allow_null) {
+		for (unsigned int i = 0; i < length; ++i) {
+			if(sbegin_out[i] == '\0') {
+				TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
+			}
+		}
+	}
+
+	return length;
+}
+
+
+
+// ------------------------------------------------------------------------------------------------
+void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
+{
+	if(Offset(cursor, end) < 1) {
+		TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
+	} 
+
+	const char type = *cursor;
+	sbegin_out = cursor++;
+
+	switch(type)
+	{
+		// 32 bit int
+	case 'I':
+		// <- fall thru
+
+		// float
+	case 'F':
+		cursor += 4;
+		break;
+
+		// double
+	case 'D':
+		cursor += 8;
+		break;
+
+		// 64 bit int
+	case 'L':
+		cursor += 8;
+		break;
+
+		// note: do not write cursor += ReadWord(...cursor) as this would be UB
+
+		// raw binary data
+	case 'R':	{
+		const uint32_t length = ReadWord(input, cursor, end);
+		cursor += length;
+		break;
+	}
+
+		// array of *
+	case 'f':
+	case 'd':
+	case 'l':
+	case 'i':	{
+	
+		const uint32_t length = ReadWord(input, cursor, end);
+		const uint32_t encoding = ReadWord(input, cursor, end);
+		if(encoding == 0) {
+			uint32_t stride;
+			switch(type)
+			{
+			case 'f':
+			case 'i':
+				stride = 4;
+				break;
+
+			case 'd':
+			case 'l':
+				stride = 8;
+				break;
+
+			default:
+				ai_assert(false);
+			};
+			cursor += length * stride;
+		}
+		// zip/deflate algorithm?
+		else if (encoding == 1) {
+			const uint32_t decomp_len = ReadWord(input, cursor, end);
+			cursor += decomp_len;
+		}
+		break;
+	}
+
+		// string
+	case 'S': {
+		const char* sb, *se;
+		// 0 characters can legally happen in such strings
+		ReadString(sb, se, input, cursor, end, true, true);
+		break;
+	}
+	default:
+		TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
+	}
+
+	if(cursor > end) {
+		TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
+	} 
+
+	// the type code is contained in the returned range
+	send_out = cursor;
+}
+
+
+// ------------------------------------------------------------------------------------------------
+void ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end)
+{
+	// the first word contains the offset at which this block ends
+	const uint32_t end_offset = ReadWord(input, cursor, end);
+
+	if(end_offset > Offset(input, end)) {
+		TokenizeError("block offset is out of range",input, cursor);
+	}
+	else if(end_offset < Offset(input, cursor)) {
+		TokenizeError("block offset is negative out of range",input, cursor);
+	}
+
+	// the second data word contains the number of properties in the scope
+	const uint32_t prop_count = ReadWord(input, cursor, end);
+
+	// the third data word contains the length of the property list
+	const uint32_t prop_length = ReadWord(input, cursor, end);
+
+	// now comes the name of the scope/key
+	const char* sbeg, *send;
+	ReadString(sbeg, send, input, cursor, end);
+
+	output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
+
+	// now come the individual properties
+	const char* begin_cursor = cursor;
+	for (unsigned int i = 0; i < prop_count; ++i) {
+		ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
+
+		output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
+
+		if(i != prop_count-1) {
+			output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
+		}
+	}
+
+	if (Offset(begin_cursor, cursor) != prop_length) {
+		TokenizeError("property length not reached, something is wrong",input, cursor);
+	}
+
+	// at the end of each nested block, there is a NUL record to indicate
+	// that the sub-scope exists (i.e. to distinguish between P: and P : {})
+	// this NUL record is 13 bytes long.
+#define BLOCK_SENTINEL_LENGTH 13
+
+	if (Offset(input, cursor) < end_offset) {
+
+		if (end_offset - Offset(input, cursor) < BLOCK_SENTINEL_LENGTH) {
+			TokenizeError("insufficient padding bytes at block end",input, cursor);
+		}
+
+		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
+
+		// XXX this is vulnerable to stack overflowing ..
+		while(Offset(input, cursor) < end_offset - BLOCK_SENTINEL_LENGTH) {
+			ReadScope(output_tokens, input, cursor, input + end_offset - BLOCK_SENTINEL_LENGTH);
+		}
+		output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
+
+		for (unsigned int i = 0; i < BLOCK_SENTINEL_LENGTH; ++i) {
+			if(cursor[i] != '\0') {
+				TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
+			}
+		}
+		cursor += BLOCK_SENTINEL_LENGTH;
+	}
+
+	if (Offset(input, cursor) != end_offset) {
+		TokenizeError("scope length not reached, something is wrong",input, cursor);
+	}
+}
+
+
+}
+
+// ------------------------------------------------------------------------------------------------
+void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
+{
+	ai_assert(input);
+
+	if(length < 0x1b) {
+		TokenizeError("file is too short",0);
+	}
+
+	if (strncmp(input,"Kaydara FBX Binary",18)) {
+		TokenizeError("magic bytes not found",0);
+	}
+
+
+	uint32_t offset = 0x1b;
+
+	const char* cursor = input + 0x1b;
+
+	while (cursor < input + length) {
+		ReadScope(output_tokens, input, cursor, input + length);
+	}
+}
+
+} // !FBX
+} // !Assimp
+
+#endif
--- a/code/FBXDocument.cpp
+++ b/code/FBXDocument.cpp
@ -520,7 +520,7 @@ const Object* LazyObject::Get(bool dieOnError)
 		}
 		else if (!strncmp(obtype,"Model",length)) {
 			// do not load IKEffectors yet
-			if (strcmp(classtag.c_str(),"IKEffector")) {
+			if (strcmp(classtag.c_str(),"IKEffector") && strcmp(classtag.c_str(),"FKEffector")) {
 				object.reset(new Model(id,element,doc,name));
 			}
 		}
--- a/code/FBXImporter.cpp
+++ b/code/FBXImporter.cpp
@ -152,7 +152,13 @@ void FBXImporter::InternReadFile( const std::string& pFile,
 	// syntax elements of FBX (brackets, commas, key:value mappings)
 	TokenList tokens;
 	try {
+
+		if (!strncmp(begin,"Kaydara FBX Binary",18)) {
+			TokenizeBinary(tokens,begin,contents.size());
+		}
+		else {
 			Tokenize(tokens,begin);
+		}

 		// use this information to construct a very rudimentary 
 		// parse-tree representing the FBX scope structure
--- a/code/FBXTokenizer.h
+++ b/code/FBXTokenizer.h
@ -65,6 +65,9 @@ enum TokenType
 	// further processing happens at a later stage.
 	TokenType_DATA,

+	//
+	TokenType_BINARY_DATA,
+
 	// ,
 	TokenType_COMMA,

@ -80,9 +83,18 @@ enum TokenType
 class Token 
 {

+private:
+
+	static const unsigned int BINARY_MARKER = static_cast<unsigned int>(-1);
+
 public:

+	/** construct a textual token */
 	Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column);
+
+	/** construct a binary token */
+	Token(const char* sbegin, const char* send, TokenType type, unsigned int offset);
+
 	~Token();

 public:
@ -93,6 +105,10 @@ public:

 public:

+	bool IsBinary() const {
+		return column == BINARY_MARKER;
+	}
+
 	const char* begin() const {
 		return sbegin;
 	}
@ -105,11 +121,18 @@ public:
 		return type;
 	}

+	unsigned int Offset() const {
+		ai_assert(IsBinary());
+		return offset;
+	}
+
 	unsigned int Line() const {
+		ai_assert(!IsBinary());
 		return line;
 	}

 	unsigned int Column() const {
+		ai_assert(!IsBinary());
 		return column;
 	}

@ -126,7 +149,11 @@ private:
 	const char* const send;
 	const TokenType type;

-	const unsigned int line, column;
+	union {
+		const unsigned int line;
+		unsigned int offset;
+	};
+	const unsigned int column;
 };

 // XXX should use C++11's unique_ptr - but assimp's need to keep working with 03
@ -146,8 +173,18 @@ typedef std::vector< TokenPtr > TokenList;
 void Tokenize(TokenList& output_tokens, const char* input);


+/** Tokenizer function for binary FBX files.
+ *
+ *  Emits a token list suitable for direct parsing.
+ *
+ * @param output_tokens Receives a list of all tokens in the input data.
+ * @param input_buffer Binary input buffer to be processed.
+ * @param length Length of input buffer, in bytes. There is no 0-terminal.
+ * @throw DeadlyImportError if something goes wrong */
+void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length);
+
+
 } // ! FBX
 } // ! Assimp

 #endif // ! INCLUDED_AI_FBX_PARSER_H
-
--- a/code/FBXUtil.cpp
+++ b/code/FBXUtil.cpp
@ -79,6 +79,12 @@ const char* TokenTypeString(TokenType t)
 }
 	

+// ------------------------------------------------------------------------------------------------
+std::string AddOffset(const std::string& prefix, const std::string& text, unsigned int offset)
+{
+	return static_cast<std::string>( (Formatter::format(),prefix," (offset 0x",std::hex,offset,") ",text) );
+}
+
 // ------------------------------------------------------------------------------------------------
 std::string AddLineAndColumn(const std::string& prefix, const std::string& text, unsigned int line, unsigned int column)
 {
@ -88,6 +94,13 @@ std::string AddLineAndColumn(const std::string& prefix, const std::string& text,
 // ------------------------------------------------------------------------------------------------
 std::string AddTokenText(const std::string& prefix, const std::string& text, const Token* tok)
 {
+	if(tok->IsBinary()) {
+		return static_cast<std::string>( (Formatter::format(),prefix,
+			" (",TokenTypeString(tok->Type()),
+			", offset 0x", std::hex, tok->Offset(),") ",
+			text) );
+	}
+	
 	return static_cast<std::string>( (Formatter::format(),prefix,
 		" (",TokenTypeString(tok->Type()),
 		", line ",tok->Line(),
--- a/code/FBXUtil.h
+++ b/code/FBXUtil.h
@ -68,6 +68,17 @@ struct delete_fun
 const char* TokenTypeString(TokenType t);


+
+/** Format log/error messages using a given offset in the source binary file
+ *
+ *  @param prefix Message prefix to be preprended to the location info.
+ *  @param text Message text
+ *  @param line Line index, 1-based
+ *  @param column Colum index, 1-based 
+ *  @return A string of the following format: {prefix} (offset 0x{offset}) {text}*/
+std::string AddOffset(const std::string& prefix, const std::string& text, unsigned int offset);
+
+
 /** Format log/error messages using a given line location in the source file.
 *
 *  @param prefix Message prefix to be preprended to the location info.
--- a/workspaces/vc9/assimp.vcproj
+++ b/workspaces/vc9/assimp.vcproj