- fbx: parser working now. Drop shared_ptr's in favour of raw pointers to reduce memory overhead (a pity - I want unique_ptr and move semantics in C++03).

2012-06-25 23:03:06 +02:00 · 2012-06-25 23:03:06 +02:00 · c9d9fcdfd1
parent ff995307ac
commit c9d9fcdfd1
8 changed files with 293 additions and 53 deletions
--- a/code/FBXImporter.cpp
+++ b/code/FBXImporter.cpp
@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "FBXTokenizer.h"
 #include "FBXParser.h"
+#include "FBXUtil.h"

 #include "StreamReader.h"
 #include "MemoryIOWrapper.h"
@ -148,11 +149,17 @@ void FBXImporter::InternReadFile( const std::string& pFile,
 	// broadphase tokenizing pass in which we identify the core
 	// syntax elements of FBX (brackets, commas, key:value mappings)
 	TokenList tokens;
-	Tokenize(tokens,begin);
+	try {
+		Tokenize(tokens,begin);

-	// use this information to construct a very rudimentary 
-	// parse-tree representing the FBX scope structure
-	Parser parser(tokens);
+		// use this information to construct a very rudimentary 
+		// parse-tree representing the FBX scope structure
+		Parser parser(tokens);
+	}
+	catch(...) {
+		std::for_each(tokens.begin(),tokens.end(),Util::delete_fun<Token>());
+		throw;
+	}
 }

 #endif // !ASSIMP_BUILD_NO_FBX_IMPORTER
--- a/code/FBXParser.cpp
+++ b/code/FBXParser.cpp
@ -47,38 +47,107 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "FBXTokenizer.h"
 #include "FBXParser.h"
+#include "FBXUtil.h"

 using namespace Assimp;
 using namespace Assimp::FBX;

-namespace Assimp {
-	template<> const std::string LogFunctions<Assimp::FBX::Parser>::log_prefix = "FBX-Parse: ";
+namespace {
+
+// ------------------------------------------------------------------------------------------------
+// signal parsing error, this is always unrecoverable. Throws DeadlyImportError.
+void ParseError(const std::string& message, TokenPtr token)
+{
+	throw DeadlyImportError(token ? Util::AddTokenText("FBX-Parse",message,token) : ("FBX-Parse " + message));
+}
+
 }

 // ------------------------------------------------------------------------------------------------
 Element::Element(Parser& parser)
 {
+	TokenPtr n = NULL;
+	do {
+		n = parser.AdvanceToNextToken();
+		if(!n) {
+			ParseError("unexpected end of file, expected closing bracket",parser.LastToken());
+		}
+
+		if (n->Type() == TokenType_DATA) {
+			tokens.push_back(n);
+
+			n = parser.AdvanceToNextToken();
+			if(!n) {
+				ParseError("unexpected end of file, expected bracket, comma or key",parser.LastToken());
+			}
+
+			const TokenType ty = n->Type();
+			if (ty != TokenType_OPEN_BRACKET && ty != TokenType_CLOSE_BRACKET && ty != TokenType_COMMA && ty != TokenType_KEY) {
+				ParseError("unexpected token; expected bracket, comma or key",n);
+			}
+		}
+
+		if (n->Type() == TokenType_OPEN_BRACKET) {
+			compound.reset(new Scope(parser));
+
+			// compound scopes must appear at the end of an element, so TOK_CLOSE_BRACKET should be next
+			n = parser.CurrentToken();
+			ai_assert(n);
+
+			if (n->Type() != TokenType_CLOSE_BRACKET) {
+				ParseError("expected closing bracket",n);
+			}
+		}
+	}
+	while(n->Type() != TokenType_KEY);
 }

 // ------------------------------------------------------------------------------------------------
 Element::~Element()
 {
+	std::for_each(tokens.begin(),tokens.end(),Util::delete_fun<Token>());
 }

 // ------------------------------------------------------------------------------------------------
-Scope::Scope(Parser& parser)
+Scope::Scope(Parser& parser,bool topLevel)
 {
-	TokenPtr t = parser.GetNextToken();
-	if (t->Type() != TokenType_OPEN_BRACKET) {
-		parser.ThrowException("Expected open bracket");
+	if(!topLevel) {
+		TokenPtr t = parser.CurrentToken();
+		if (t->Type() != TokenType_OPEN_BRACKET) {
+			ParseError("expected open bracket",t);
+		}	
 	}

-	// XXX parse members
+	TokenPtr n = parser.AdvanceToNextToken();
+	if(n == NULL) {
+		ParseError("unexpected end of file",NULL);
+	}
+
+	do {
+		if (n->Type() != TokenType_KEY) {
+			ParseError("unexpected token, expected TOK_KEY",n);
+		}
+
+		elements.insert(ElementMap::value_type(n->StringContents(),new_Element(parser)));
+
+		// Element() should stop at the next Key (or Close) token
+		n = parser.CurrentToken();
+		if(n == NULL) {
+			if (topLevel) {
+				return;
+			}
+			ParseError("unexpected end of file",parser.LastToken());
+		}
+	}
+	while(n->Type() != TokenType_CLOSE_BRACKET);
 }

 // ------------------------------------------------------------------------------------------------
 Scope::~Scope()
 {
+	BOOST_FOREACH(ElementMap::value_type& v, elements) {
+		delete v.second;
+	}
 }


@ -86,8 +155,10 @@ Scope::~Scope()
 Parser::Parser (const TokenList& tokens)
 : tokens(tokens)
 , cursor(tokens.begin())
+, current()
+, last()
 {
-	root = boost::scoped_ptr<Scope>(new Scope(*this));
+	root.reset(new Scope(*this,true));
 }


@ -98,24 +169,30 @@ Parser::~Parser()


 // ------------------------------------------------------------------------------------------------
-TokenPtr Parser::GetNextToken()
+TokenPtr Parser::AdvanceToNextToken()
 {
+	last = current;
 	if (cursor == tokens.end()) {
-		return TokenPtr(NULL);
+		current = NULL;
 	}
-
-	return *cursor++;
+	else {
+		current = *cursor++;
+	}
+	return current;
 }


 // ------------------------------------------------------------------------------------------------
-TokenPtr Parser::PeekNextToken()
+TokenPtr Parser::CurrentToken() const
 {
-	if (cursor == tokens.end()) {
-		return TokenPtr(NULL);
-	}
+	return current;
+}

-	return *cursor;
+
+// ------------------------------------------------------------------------------------------------
+TokenPtr Parser::LastToken() const
+{
+	return last;
 }


--- a/code/FBXParser.h
+++ b/code/FBXParser.h
@ -62,10 +62,12 @@ namespace FBX {
 	class Parser;
 	class Element;

-	// should actually use 0x's unique_ptr for some of those
-	typedef std::vector< boost::shared_ptr<Scope> > ScopeList;
-	typedef std::fbx_unordered_multimap< std::string, boost::shared_ptr<Element> > ElementMap;
+	// XXX should use C++11's unique_ptr - but assimp's need to keep working with 03
+	typedef std::vector< Scope* > ScopeList;
+	typedef std::fbx_unordered_multimap< std::string, Element* > ElementMap;

+#	define new_Scope new Scope
+#	define new_Element new Element


 /** FBX data entity that consists of a key:value tuple.
@ -100,7 +102,7 @@ private:

 	std::string key;
 	TokenList tokens;
-	boost::shared_ptr<Scope> compound;
+	boost::scoped_ptr<Scope> compound;
 };


@ -121,7 +123,7 @@ class Scope

 public:

-	Scope(Parser& parser);
+	Scope(Parser& parser, bool topLevel = false);
 	~Scope();

 public:
@ -138,7 +140,7 @@ private:

 /** FBX parsing class, takes a list of input tokens and generates a hierarchy
 *  of nested #Scope instances, representing the fbx DOM.*/
-class Parser : public LogFunctions<Parser>
+class Parser 
 {
 public:
 	
@ -156,13 +158,16 @@ private:
 	friend class Scope;
 	friend class Element;

-	TokenPtr GetNextToken();
-	TokenPtr PeekNextToken();
+	TokenPtr AdvanceToNextToken();
+
+	TokenPtr LastToken() const;
+	TokenPtr CurrentToken() const;

 private:

 	const TokenList& tokens;
 	
+	TokenPtr last, current;
 	TokenList::const_iterator cursor;
 	boost::scoped_ptr<Scope> root;
 };
--- a/code/FBXTokenizer.cpp
+++ b/code/FBXTokenizer.cpp
@ -60,6 +60,9 @@ Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int
 	, type(type)
 	, line(line)
 	, column(column)
+#ifdef DEBUG
+	, contents(sbegin, static_cast<size_t>(send-sbegin))
+#endif
 {
 	ai_assert(sbegin);
 	ai_assert(send);
@ -74,23 +77,45 @@ Token::~Token()

 namespace {

+// ------------------------------------------------------------------------------------------------
+// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
+void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
+{
+	throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column));
+}
+
+
 // process a potential data token up to 'cur', adding it to 'output_tokens'. 
 // ------------------------------------------------------------------------------------------------
 void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
 					  unsigned int line, 
 					  unsigned int column, 
-					  TokenType type = TokenType_DATA)
+					  TokenType type = TokenType_DATA,
+					  bool must_have_token = false)
 {
-	if (start != end) {
-		// tokens should have no whitespace in them and [start,end] should
+	if (start && end) {
+		// sanity check:
+		// tokens should have no whitespace outside quoted text and [start,end] should
 		// properly delimit the valid range.
-		for (const char* c = start; c != end; ++c) {
-			if (IsSpaceOrNewLine(*c)) {
-				throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize","unexpected whitespace in token",line,column));
+		bool in_double_quotes = false;
+		for (const char* c = start; c != end + 1; ++c) {
+			if (*c == '\"') {
+				in_double_quotes = !in_double_quotes;
+			}
+
+			if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
+				TokenizeError("unexpected whitespace in token", line, column);
 			}
 		}

-		output_tokens.push_back(boost::make_shared<Token>(start,end,type,line,column));
+		if (in_double_quotes) {
+			TokenizeError("non-terminated double quotes", line, column);
+		}
+
+		output_tokens.push_back(new_Token(start,end + 1,type,line,column));
+	}
+	else if (must_have_token) {
+		TokenizeError("unexpected character, expected data token", line, column);
 	}

 	start = end = NULL;
@ -109,6 +134,7 @@ void Tokenize(TokenList& output_tokens, const char* input)

 	bool comment = false;
 	bool in_double_quotes = false;
+	bool pending_data_token = false;
 	
 	const char* token_begin = NULL, *token_end = NULL;
 	for (const char* cur = input;*cur;++cur,++column) {
@ -119,8 +145,6 @@ void Tokenize(TokenList& output_tokens, const char* input)

 			column = 0;
 			++line;
-
-			continue;
 		}

 		if(comment) {
@ -131,9 +155,9 @@ void Tokenize(TokenList& output_tokens, const char* input)
 			if (c == '\"') {
 				in_double_quotes = false;
 				token_end = cur;
-				if (!token_begin) {
-					token_begin = cur;
-				}
+
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column);
+				pending_data_token = false;
 			}
 			continue;
 		}
@ -141,6 +165,10 @@ void Tokenize(TokenList& output_tokens, const char* input)
 		switch(c)
 		{
 		case '\"':
+			if (token_begin) {
+				TokenizeError("unexpected double-quote", line, column);
+			}
+			token_begin = cur;
 			in_double_quotes = true;
 			continue;

@ -151,29 +179,57 @@ void Tokenize(TokenList& output_tokens, const char* input)

 		case '{':
 			ProcessDataToken(output_tokens,token_begin,token_end, line, column);
-			output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
-			break;
+			output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
+			continue;

 		case '}':
 			ProcessDataToken(output_tokens,token_begin,token_end,line,column);
-			output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
-			break;
+			output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
+			continue;
 		
 		case ',':
-			ProcessDataToken(output_tokens,token_begin,token_end,line,column);
-			output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_COMMA,line,column));
-			break;
+			if (pending_data_token) {
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
+			}
+			output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
+			continue;

 		case ':':
-			ProcessDataToken(output_tokens,token_begin,token_end,line,column, TokenType_KEY);
-			break;
+			if (pending_data_token) {
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
+			}
+			else {
+				TokenizeError("unexpected colon", line, column);
+			}
+			continue;
 		}
 		
-		if (!IsSpaceOrNewLine(c)) {
+		if (IsSpaceOrNewLine(c)) {
+
+			if (token_begin) {
+				// peek ahead and check if the next token is a colon in which
+				// case this counts as KEY token.
+				TokenType type = TokenType_DATA;
+				for (const char* peek = cur;  *peek && IsSpaceOrNewLine(*peek); ++peek) {
+					if (*peek == ':') {
+						type = TokenType_KEY;
+						cur = peek;
+						break;
+					}
+				}
+
+				ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
+			}
+
+			pending_data_token = false;
+		}
+		else {
 			token_end = cur;
 			if (!token_begin) {
 				token_begin = cur;
 			}
+
+			pending_data_token = true;
 		}
 	}
 }
--- a/code/FBXTokenizer.h
+++ b/code/FBXTokenizer.h
@ -85,6 +85,12 @@ public:
 	Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column);
 	~Token();

+public:
+
+	std::string StringContents() const {
+		return std::string(begin(),end());
+	}
+
 public:

 	const char* begin() const {
@ -99,8 +105,23 @@ public:
 		return type;
 	}

+	unsigned int Line() const {
+		return line;
+	}
+
+	unsigned int Column() const {
+		return column;
+	}
+
 private:

+#ifdef DEBUG
+	// full string copy for the sole purpose that it nicely appears
+	// in msvc's debugger window.
+	const std::string contents;
+#endif
+
+
 	const char* const sbegin;
 	const char* const send;
 	const TokenType type;
@ -108,9 +129,11 @@ private:
 	const unsigned int line, column;
 };

+// XXX should use C++11's unique_ptr - but assimp's need to keep working with 03
+typedef const Token* TokenPtr;
+typedef std::vector< TokenPtr > TokenList;

-typedef boost::shared_ptr<Token> TokenPtr;
-typedef std::vector< boost::shared_ptr<Token> > TokenList;
+#define new_Token new Token


 /** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens.
--- a/code/FBXUtil.cpp
+++ b/code/FBXUtil.cpp
@ -44,6 +44,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "AssimpPCH.h"

 #include "FBXUtil.h"
+#include "FBXTokenizer.h"
+
 #include "TinyFormatter.h"

 #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
@ -52,12 +54,47 @@ namespace Assimp {
 namespace FBX {
 namespace Util {

+// ------------------------------------------------------------------------------------------------
+const char* TokenTypeString(TokenType t)
+{
+	switch(t) {
+		case TokenType_OPEN_BRACKET:
+			return "TOK_OPEN_BRACKET";
+	
+		case TokenType_CLOSE_BRACKET:
+			return "TOK_CLOSE_BRACKET";
+
+		case TokenType_DATA:
+			return "TOK_DATA";
+
+		case TokenType_COMMA:
+			return "TOK_COMMA";
+
+		case TokenType_KEY:
+			return "TOK_KEY";
+	}
+
+	ai_assert(false);
+	return "";
+}
+	
+
 // ------------------------------------------------------------------------------------------------
 std::string AddLineAndColumn(const std::string& prefix, const std::string& text, unsigned int line, unsigned int column)
 {
 	return static_cast<std::string>( (Formatter::format(),prefix,"(line ",line,", col ",column,") ",text) );
 }

+// ------------------------------------------------------------------------------------------------
+std::string AddTokenText(const std::string& prefix, const std::string& text, const Token* tok)
+{
+	return static_cast<std::string>( (Formatter::format(),prefix,
+		"(",TokenTypeString(tok->Type()),
+		"line ",tok->Line(),
+		", col ",tok->Column(),") ",
+		text) );
+}
+
 } // !Util
 } // !FBX
 } // !Assimp
--- a/code/FBXUtil.h
+++ b/code/FBXUtil.h
@ -49,8 +49,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 namespace Assimp {
 namespace FBX {
+
+	class Token;
+	enum TokenType;
+
 namespace Util {

+
+/** helper for std::for_each to delete all heap-allocated items in a container */
+template<typename T>
+struct delete_fun
+{
+	void operator()(const volatile T* del) {
+		delete del;
+	}
+};
+
+/** Get a string representation for a #TokenType. */
+const char* TokenTypeString(TokenType t);
+
+
 /** Format log/error messages using a given line location in the source file.
 *
 *  @param prefix Message prefix to be preprended to the location info.
@ -60,6 +78,15 @@ namespace Util {
 *  @return A string of the following format: {prefix} (line {line}, col {column}) {text}*/
 std::string AddLineAndColumn(const std::string& prefix, const std::string& text, unsigned int line, unsigned int column);
 	
+
+/** Format log/error messages using a given cursor token.
+ *
+ *  @param prefix Message prefix to be preprended to the location info.
+ *  @param text Message text
+ *  @param tok Token where parsing/processing stopped
+ *  @return A string of the following format: {prefix} ({token-type}, line {line}, col {column}) {text}*/
+std::string AddTokenText(const std::string& prefix, const std::string& text, const Token* tok);
+
 }
 }
 }
--- a/workspaces/vc9/assimp_cmd.vcproj
+++ b/workspaces/vc9/assimp_cmd.vcproj
@ -1484,6 +1484,14 @@
 							CompileAs="1"
 						/>
 					</FileConfiguration>
+					<FileConfiguration
+						Name="debug-noboost-st|Win32"
+						ExcludedFromBuild="true"
+						>
+						<Tool
+							Name="VCCLCompilerTool"
+						/>
+					</FileConfiguration>
 				</File>
 			</Filter>
 		</Filter>