From 6c5efe471f281d9ce98cf57eaa59be45d48010f5 Mon Sep 17 00:00:00 2001 From: acgessler Date: Mon, 25 Jun 2012 17:31:42 +0200 Subject: [PATCH] - fbx: fix various issues with tokenizer. Stricter error checking and better debuggability. Do not use shared_ptr's for tokens, there are simply too many of them and ownership is always clear. --- code/FBXImporter.cpp | 15 +- code/FBXTokenizer.cpp | 100 ++- code/FBXTokenizer.h | 12 +- code/FBXUtil.h | 11 + workspaces/vc9/assimp_cmd.vcproj | 1216 +++++++++++++++--------------- 5 files changed, 722 insertions(+), 632 deletions(-) diff --git a/code/FBXImporter.cpp b/code/FBXImporter.cpp index c8ee45cc2..c917b6d72 100644 --- a/code/FBXImporter.cpp +++ b/code/FBXImporter.cpp @@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "FBXTokenizer.h" #include "FBXParser.h" +#include "FBXUtil.h" #include "StreamReader.h" #include "MemoryIOWrapper.h" @@ -148,11 +149,17 @@ void FBXImporter::InternReadFile( const std::string& pFile, // broadphase tokenizing pass in which we identify the core // syntax elements of FBX (brackets, commas, key:value mappings) TokenList tokens; - Tokenize(tokens,begin); + try { + Tokenize(tokens,begin); - // use this information to construct a very rudimentary - // parse-tree representing the FBX scope structure - Parser parser(tokens); + // use this information to construct a very rudimentary + // parse-tree representing the FBX scope structure + Parser parser(tokens); + } + catch(...) { + std::for_each(tokens.begin(),tokens.end(),Util::delete_fun()); + throw; + } } #endif // !ASSIMP_BUILD_NO_FBX_IMPORTER diff --git a/code/FBXTokenizer.cpp b/code/FBXTokenizer.cpp index 0bd8e4dd6..8dc49b023 100644 --- a/code/FBXTokenizer.cpp +++ b/code/FBXTokenizer.cpp @@ -60,6 +60,9 @@ Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int , type(type) , line(line) , column(column) +#ifdef DEBUG + , contents(sbegin, static_cast(send-sbegin)) +#endif { ai_assert(sbegin); ai_assert(send); @@ -74,23 +77,45 @@ Token::~Token() namespace { +// ------------------------------------------------------------------------------------------------ +// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. +void TokenizeError(const std::string& message, unsigned int line, unsigned int column) +{ + throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column)); +} + + // process a potential data token up to 'cur', adding it to 'output_tokens'. // ------------------------------------------------------------------------------------------------ void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end, unsigned int line, unsigned int column, - TokenType type = TokenType_DATA) + TokenType type = TokenType_DATA, + bool must_have_token = false) { - if (start != end) { - // tokens should have no whitespace in them and [start,end] should + if (start && end) { + // sanity check: + // tokens should have no whitespace outside quoted text and [start,end] should // properly delimit the valid range. - for (const char* c = start; c != end; ++c) { - if (IsSpaceOrNewLine(*c)) { - throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize","unexpected whitespace in token",line,column)); + bool in_double_quotes = false; + for (const char* c = start; c != end + 1; ++c) { + if (*c == '\"') { + in_double_quotes = !in_double_quotes; + } + + if (!in_double_quotes && IsSpaceOrNewLine(*c)) { + TokenizeError("unexpected whitespace in token", line, column); } } - output_tokens.push_back(boost::make_shared(start,end,type,line,column)); + if (in_double_quotes) { + TokenizeError("non-terminated double quotes", line, column); + } + + output_tokens.push_back(new_Token(start,end + 1,type,line,column)); + } + else if (must_have_token) { + TokenizeError("unexpected character, expected data token", line, column); } start = end = NULL; @@ -109,6 +134,7 @@ void Tokenize(TokenList& output_tokens, const char* input) bool comment = false; bool in_double_quotes = false; + bool pending_data_token = false; const char* token_begin = NULL, *token_end = NULL; for (const char* cur = input;*cur;++cur,++column) { @@ -119,8 +145,6 @@ void Tokenize(TokenList& output_tokens, const char* input) column = 0; ++line; - - continue; } if(comment) { @@ -131,9 +155,9 @@ void Tokenize(TokenList& output_tokens, const char* input) if (c == '\"') { in_double_quotes = false; token_end = cur; - if (!token_begin) { - token_begin = cur; - } + + ProcessDataToken(output_tokens,token_begin,token_end,line,column); + pending_data_token = false; } continue; } @@ -141,6 +165,10 @@ void Tokenize(TokenList& output_tokens, const char* input) switch(c) { case '\"': + if (token_begin) { + TokenizeError("unexpected double-quote", line, column); + } + token_begin = cur; in_double_quotes = true; continue; @@ -151,29 +179,57 @@ void Tokenize(TokenList& output_tokens, const char* input) case '{': ProcessDataToken(output_tokens,token_begin,token_end, line, column); - output_tokens.push_back(boost::make_shared(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); - break; + output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); + continue; case '}': ProcessDataToken(output_tokens,token_begin,token_end,line,column); - output_tokens.push_back(boost::make_shared(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); - break; + output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); + continue; case ',': - ProcessDataToken(output_tokens,token_begin,token_end,line,column); - output_tokens.push_back(boost::make_shared(cur,cur+1,TokenType_COMMA,line,column)); - break; + if (pending_data_token) { + ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true); + } + output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column)); + continue; case ':': - ProcessDataToken(output_tokens,token_begin,token_end,line,column, TokenType_KEY); - break; + if (pending_data_token) { + ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true); + } + else { + TokenizeError("unexpected colon", line, column); + } + continue; } - if (!IsSpaceOrNewLine(c)) { + if (IsSpaceOrNewLine(c)) { + + if (token_begin) { + // peek ahead and check if the next token is a colon in which + // case this counts as KEY token. + TokenType type = TokenType_DATA; + for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) { + if (*peek == ':') { + type = TokenType_KEY; + cur = peek; + break; + } + } + + ProcessDataToken(output_tokens,token_begin,token_end,line,column,type); + } + + pending_data_token = false; + } + else { token_end = cur; if (!token_begin) { token_begin = cur; } + + pending_data_token = true; } } } diff --git a/code/FBXTokenizer.h b/code/FBXTokenizer.h index 1a7899fca..6a2d14788 100644 --- a/code/FBXTokenizer.h +++ b/code/FBXTokenizer.h @@ -99,6 +99,11 @@ public: return type; } +#ifdef DEBUG + // copy of the token to show up in debugger + const std::string contents; +#endif + private: const char* const sbegin; @@ -108,9 +113,12 @@ private: const unsigned int line, column; }; +// note: shared_ptr eats up too much storage, unique_ptr is C++11, +// so have to use manual memory management for now. +typedef Token* TokenPtr; +typedef std::vector< TokenPtr > TokenList; -typedef boost::shared_ptr TokenPtr; -typedef std::vector< boost::shared_ptr > TokenList; +#define new_Token new Token /** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens. diff --git a/code/FBXUtil.h b/code/FBXUtil.h index 87bb86f70..a1141d6b3 100644 --- a/code/FBXUtil.h +++ b/code/FBXUtil.h @@ -51,6 +51,17 @@ namespace Assimp { namespace FBX { namespace Util { + +/** helper for std::for_each to delete all heap-allocated items in a container */ +template +struct delete_fun +{ + void operator()(T* del) { + delete del; + } +}; + + /** Format log/error messages using a given line location in the source file. * * @param prefix Message prefix to be preprended to the location info. diff --git a/workspaces/vc9/assimp_cmd.vcproj b/workspaces/vc9/assimp_cmd.vcproj index 1ee06b1cc..eb0a81066 100644 --- a/workspaces/vc9/assimp_cmd.vcproj +++ b/workspaces/vc9/assimp_cmd.vcproj @@ -1,7 +1,7 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1311,14 +1303,6 @@ AdditionalIncludeDirectories="./" /> - - - @@ -1327,14 +1311,6 @@ AdditionalIncludeDirectories="./" /> - - - @@ -1343,14 +1319,6 @@ AdditionalIncludeDirectories="./" /> - - - @@ -1359,14 +1327,6 @@ AdditionalIncludeDirectories="./" /> - - - @@ -1375,14 +1335,6 @@ AdditionalIncludeDirectories="./" /> - - - @@ -1392,7 +1344,7 @@ /> + + + + + + + + + + + + + + + + + + + + +