- fbx: fix various issues with tokenizer. Stricter error checking and better debuggability. Do not use shared_ptr's for tokens, there are simply too many of them and ownership is always clear.

2012-06-25 17:31:42 +02:00 · 2012-06-25 17:31:42 +02:00 · 6c5efe471f
parent ff995307ac
commit 6c5efe471f
5 changed files with 722 additions and 632 deletions
--- a/code/FBXImporter.cpp
+++ b/code/FBXImporter.cpp
@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "FBXTokenizer.h"
 #include "FBXParser.h"
 #include "FBXUtil.h"
 #include "StreamReader.h"
 #include "MemoryIOWrapper.h"
@ -148,11 +149,17 @@ void FBXImporter::InternReadFile( const std::string& pFile,
 	// broadphase tokenizing pass in which we identify the core
 	// syntax elements of FBX (brackets, commas, key:value mappings)
 	TokenList tokens;
 	try {
 		Tokenize(tokens,begin);
 		// use this information to construct a very rudimentary 
 		// parse-tree representing the FBX scope structure
 		Parser parser(tokens);
 	}
 	catch(...) {
 		std::for_each(tokens.begin(),tokens.end(),Util::delete_fun<Token>());
 		throw;
 	}
 }
 #endif // !ASSIMP_BUILD_NO_FBX_IMPORTER
--- a/code/FBXTokenizer.cpp
+++ b/code/FBXTokenizer.cpp
@ -60,6 +60,9 @@ Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int
 	, type(type)
 	, line(line)
 	, column(column)
 #ifdef DEBUG
 	, contents(sbegin, static_cast<size_t>(send-sbegin))
 #endif
 {
 	ai_assert(sbegin);
 	ai_assert(send);
@ -74,23 +77,45 @@ Token::~Token()
 namespace {
 // ------------------------------------------------------------------------------------------------
 // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
 void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
 {
 	throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column));
 }
 // process a potential data token up to 'cur', adding it to 'output_tokens'. 
 // ------------------------------------------------------------------------------------------------
 void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
 					  unsigned int line, 
 					  unsigned int column, 
-					  TokenType type = TokenType_DATA)
+					  TokenType type = TokenType_DATA,
 					  bool must_have_token = false)
 {
-	if (start != end) {
+	if (start && end) {
-		// tokens should have no whitespace in them and [start,end] should
+		// sanity check:
 		// tokens should have no whitespace outside quoted text and [start,end] should
 		// properly delimit the valid range.
-		for (const char* c = start; c != end; ++c) {
+		bool in_double_quotes = false;
-			if (IsSpaceOrNewLine(*c)) {
+		for (const char* c = start; c != end + 1; ++c) {
-				throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize","unexpected whitespace in token",line,column));
+			if (*c == '\"') {
 				in_double_quotes = !in_double_quotes;
 			}
 			if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
 				TokenizeError("unexpected whitespace in token", line, column);
 			}
 		}
-		output_tokens.push_back(boost::make_shared<Token>(start,end,type,line,column));
+		if (in_double_quotes) {
 			TokenizeError("non-terminated double quotes", line, column);
 		}
 		output_tokens.push_back(new_Token(start,end + 1,type,line,column));
 	}
 	else if (must_have_token) {
 		TokenizeError("unexpected character, expected data token", line, column);
 	}
 	start = end = NULL;
@ -109,6 +134,7 @@ void Tokenize(TokenList& output_tokens, const char* input)
 	bool comment = false;
 	bool in_double_quotes = false;
 	bool pending_data_token = false;
 	const char* token_begin = NULL, *token_end = NULL;
 	for (const char* cur = input;*cur;++cur,++column) {
@ -119,8 +145,6 @@ void Tokenize(TokenList& output_tokens, const char* input)
 			column = 0;
 			++line;
 			continue;
 		}
 		if(comment) {
@ -131,9 +155,9 @@ void Tokenize(TokenList& output_tokens, const char* input)
 			if (c == '\"') {
 				in_double_quotes = false;
 				token_end = cur;
-				if (!token_begin) {
+
-					token_begin = cur;
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column);
-				}
+				pending_data_token = false;
 			}
 			continue;
 		}
@ -141,6 +165,10 @@ void Tokenize(TokenList& output_tokens, const char* input)
 		switch(c)
 		{
 		case '\"':
 			if (token_begin) {
 				TokenizeError("unexpected double-quote", line, column);
 			}
 			token_begin = cur;
 			in_double_quotes = true;
 			continue;
@ -151,29 +179,57 @@ void Tokenize(TokenList& output_tokens, const char* input)
 		case '{':
 			ProcessDataToken(output_tokens,token_begin,token_end, line, column);
-			output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
+			output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
-			break;
+			continue;
 		case '}':
 			ProcessDataToken(output_tokens,token_begin,token_end,line,column);
-			output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
+			output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
-			break;
+			continue;
 		case ',':
-			ProcessDataToken(output_tokens,token_begin,token_end,line,column);
+			if (pending_data_token) {
-			output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_COMMA,line,column));
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
-			break;
+			}
 			output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
 			continue;
 		case ':':
-			ProcessDataToken(output_tokens,token_begin,token_end,line,column, TokenType_KEY);
+			if (pending_data_token) {
-			break;
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
 			}
 			else {
 				TokenizeError("unexpected colon", line, column);
 			}
 			continue;
 		}
-		if (!IsSpaceOrNewLine(c)) {
+		if (IsSpaceOrNewLine(c)) {
 			if (token_begin) {
 				// peek ahead and check if the next token is a colon in which
 				// case this counts as KEY token.
 				TokenType type = TokenType_DATA;
 				for (const char* peek = cur;  *peek && IsSpaceOrNewLine(*peek); ++peek) {
 					if (*peek == ':') {
 						type = TokenType_KEY;
 						cur = peek;
 						break;
 					}
 				}
 				ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
 			}
 			pending_data_token = false;
 		}
 		else {
 			token_end = cur;
 			if (!token_begin) {
 				token_begin = cur;
 			}
 			pending_data_token = true;
 		}
 	}
 }
--- a/code/FBXTokenizer.h
+++ b/code/FBXTokenizer.h
@ -99,6 +99,11 @@ public:
 		return type;
 	}
 #ifdef DEBUG
 	// copy of the token to show up in debugger
 	const std::string contents;
 #endif
 private:
 	const char* const sbegin;
@ -108,9 +113,12 @@ private:
 	const unsigned int line, column;
 };
 // note: shared_ptr eats up too much storage, unique_ptr is C++11,
 // so have to use manual memory management for now.
 typedef Token* TokenPtr;
 typedef std::vector< TokenPtr > TokenList;
-typedef boost::shared_ptr<Token> TokenPtr;
+#define new_Token new Token
 typedef std::vector< boost::shared_ptr<Token> > TokenList;
 /** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens.
--- a/code/FBXUtil.h
+++ b/code/FBXUtil.h
@ -51,6 +51,17 @@ namespace Assimp {
 namespace FBX {
 namespace Util {
 /** helper for std::for_each to delete all heap-allocated items in a container */
 template<typename T>
 struct delete_fun
 {
 	void operator()(T* del) {
 		delete del;
 	}
 };
 /** Format log/error messages using a given line location in the source file.
 *
 *  @param prefix Message prefix to be preprended to the location info.
--- a/workspaces/vc9/assimp_cmd.vcproj
+++ b/workspaces/vc9/assimp_cmd.vcproj