diff --git a/code/FBXImporter.cpp b/code/FBXImporter.cpp index c8ee45cc2..c917b6d72 100644 --- a/code/FBXImporter.cpp +++ b/code/FBXImporter.cpp @@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "FBXTokenizer.h" #include "FBXParser.h" +#include "FBXUtil.h" #include "StreamReader.h" #include "MemoryIOWrapper.h" @@ -148,11 +149,17 @@ void FBXImporter::InternReadFile( const std::string& pFile, // broadphase tokenizing pass in which we identify the core // syntax elements of FBX (brackets, commas, key:value mappings) TokenList tokens; - Tokenize(tokens,begin); + try { + Tokenize(tokens,begin); - // use this information to construct a very rudimentary - // parse-tree representing the FBX scope structure - Parser parser(tokens); + // use this information to construct a very rudimentary + // parse-tree representing the FBX scope structure + Parser parser(tokens); + } + catch(...) { + std::for_each(tokens.begin(),tokens.end(),Util::delete_fun()); + throw; + } } #endif // !ASSIMP_BUILD_NO_FBX_IMPORTER diff --git a/code/FBXParser.cpp b/code/FBXParser.cpp index dab4ebbe4..338fbba70 100644 --- a/code/FBXParser.cpp +++ b/code/FBXParser.cpp @@ -47,38 +47,107 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "FBXTokenizer.h" #include "FBXParser.h" +#include "FBXUtil.h" using namespace Assimp; using namespace Assimp::FBX; -namespace Assimp { - template<> const std::string LogFunctions::log_prefix = "FBX-Parse: "; +namespace { + +// ------------------------------------------------------------------------------------------------ +// signal parsing error, this is always unrecoverable. Throws DeadlyImportError. +void ParseError(const std::string& message, TokenPtr token) +{ + throw DeadlyImportError(token ? Util::AddTokenText("FBX-Parse",message,token) : ("FBX-Parse " + message)); +} + } // ------------------------------------------------------------------------------------------------ Element::Element(Parser& parser) { + TokenPtr n = NULL; + do { + n = parser.AdvanceToNextToken(); + if(!n) { + ParseError("unexpected end of file, expected closing bracket",parser.LastToken()); + } + + if (n->Type() == TokenType_DATA) { + tokens.push_back(n); + + n = parser.AdvanceToNextToken(); + if(!n) { + ParseError("unexpected end of file, expected bracket, comma or key",parser.LastToken()); + } + + const TokenType ty = n->Type(); + if (ty != TokenType_OPEN_BRACKET && ty != TokenType_CLOSE_BRACKET && ty != TokenType_COMMA && ty != TokenType_KEY) { + ParseError("unexpected token; expected bracket, comma or key",n); + } + } + + if (n->Type() == TokenType_OPEN_BRACKET) { + compound.reset(new Scope(parser)); + + // compound scopes must appear at the end of an element, so TOK_CLOSE_BRACKET should be next + n = parser.CurrentToken(); + ai_assert(n); + + if (n->Type() != TokenType_CLOSE_BRACKET) { + ParseError("expected closing bracket",n); + } + } + } + while(n->Type() != TokenType_KEY); } // ------------------------------------------------------------------------------------------------ Element::~Element() { + std::for_each(tokens.begin(),tokens.end(),Util::delete_fun()); } // ------------------------------------------------------------------------------------------------ -Scope::Scope(Parser& parser) +Scope::Scope(Parser& parser,bool topLevel) { - TokenPtr t = parser.GetNextToken(); - if (t->Type() != TokenType_OPEN_BRACKET) { - parser.ThrowException("Expected open bracket"); + if(!topLevel) { + TokenPtr t = parser.CurrentToken(); + if (t->Type() != TokenType_OPEN_BRACKET) { + ParseError("expected open bracket",t); + } } - // XXX parse members + TokenPtr n = parser.AdvanceToNextToken(); + if(n == NULL) { + ParseError("unexpected end of file",NULL); + } + + do { + if (n->Type() != TokenType_KEY) { + ParseError("unexpected token, expected TOK_KEY",n); + } + + elements.insert(ElementMap::value_type(n->StringContents(),new_Element(parser))); + + // Element() should stop at the next Key (or Close) token + n = parser.CurrentToken(); + if(n == NULL) { + if (topLevel) { + return; + } + ParseError("unexpected end of file",parser.LastToken()); + } + } + while(n->Type() != TokenType_CLOSE_BRACKET); } // ------------------------------------------------------------------------------------------------ Scope::~Scope() { + BOOST_FOREACH(ElementMap::value_type& v, elements) { + delete v.second; + } } @@ -86,8 +155,10 @@ Scope::~Scope() Parser::Parser (const TokenList& tokens) : tokens(tokens) , cursor(tokens.begin()) +, current() +, last() { - root = boost::scoped_ptr(new Scope(*this)); + root.reset(new Scope(*this,true)); } @@ -98,24 +169,30 @@ Parser::~Parser() // ------------------------------------------------------------------------------------------------ -TokenPtr Parser::GetNextToken() +TokenPtr Parser::AdvanceToNextToken() { + last = current; if (cursor == tokens.end()) { - return TokenPtr(NULL); + current = NULL; } - - return *cursor++; + else { + current = *cursor++; + } + return current; } // ------------------------------------------------------------------------------------------------ -TokenPtr Parser::PeekNextToken() +TokenPtr Parser::CurrentToken() const { - if (cursor == tokens.end()) { - return TokenPtr(NULL); - } + return current; +} - return *cursor; + +// ------------------------------------------------------------------------------------------------ +TokenPtr Parser::LastToken() const +{ + return last; } diff --git a/code/FBXParser.h b/code/FBXParser.h index 06093cfc5..4b3852055 100644 --- a/code/FBXParser.h +++ b/code/FBXParser.h @@ -62,10 +62,12 @@ namespace FBX { class Parser; class Element; - // should actually use 0x's unique_ptr for some of those - typedef std::vector< boost::shared_ptr > ScopeList; - typedef std::fbx_unordered_multimap< std::string, boost::shared_ptr > ElementMap; + // XXX should use C++11's unique_ptr - but assimp's need to keep working with 03 + typedef std::vector< Scope* > ScopeList; + typedef std::fbx_unordered_multimap< std::string, Element* > ElementMap; +# define new_Scope new Scope +# define new_Element new Element /** FBX data entity that consists of a key:value tuple. @@ -100,7 +102,7 @@ private: std::string key; TokenList tokens; - boost::shared_ptr compound; + boost::scoped_ptr compound; }; @@ -121,7 +123,7 @@ class Scope public: - Scope(Parser& parser); + Scope(Parser& parser, bool topLevel = false); ~Scope(); public: @@ -138,7 +140,7 @@ private: /** FBX parsing class, takes a list of input tokens and generates a hierarchy * of nested #Scope instances, representing the fbx DOM.*/ -class Parser : public LogFunctions +class Parser { public: @@ -156,13 +158,16 @@ private: friend class Scope; friend class Element; - TokenPtr GetNextToken(); - TokenPtr PeekNextToken(); + TokenPtr AdvanceToNextToken(); + + TokenPtr LastToken() const; + TokenPtr CurrentToken() const; private: const TokenList& tokens; + TokenPtr last, current; TokenList::const_iterator cursor; boost::scoped_ptr root; }; diff --git a/code/FBXTokenizer.cpp b/code/FBXTokenizer.cpp index 0bd8e4dd6..8dc49b023 100644 --- a/code/FBXTokenizer.cpp +++ b/code/FBXTokenizer.cpp @@ -60,6 +60,9 @@ Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int , type(type) , line(line) , column(column) +#ifdef DEBUG + , contents(sbegin, static_cast(send-sbegin)) +#endif { ai_assert(sbegin); ai_assert(send); @@ -74,23 +77,45 @@ Token::~Token() namespace { +// ------------------------------------------------------------------------------------------------ +// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. +void TokenizeError(const std::string& message, unsigned int line, unsigned int column) +{ + throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column)); +} + + // process a potential data token up to 'cur', adding it to 'output_tokens'. // ------------------------------------------------------------------------------------------------ void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end, unsigned int line, unsigned int column, - TokenType type = TokenType_DATA) + TokenType type = TokenType_DATA, + bool must_have_token = false) { - if (start != end) { - // tokens should have no whitespace in them and [start,end] should + if (start && end) { + // sanity check: + // tokens should have no whitespace outside quoted text and [start,end] should // properly delimit the valid range. - for (const char* c = start; c != end; ++c) { - if (IsSpaceOrNewLine(*c)) { - throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize","unexpected whitespace in token",line,column)); + bool in_double_quotes = false; + for (const char* c = start; c != end + 1; ++c) { + if (*c == '\"') { + in_double_quotes = !in_double_quotes; + } + + if (!in_double_quotes && IsSpaceOrNewLine(*c)) { + TokenizeError("unexpected whitespace in token", line, column); } } - output_tokens.push_back(boost::make_shared(start,end,type,line,column)); + if (in_double_quotes) { + TokenizeError("non-terminated double quotes", line, column); + } + + output_tokens.push_back(new_Token(start,end + 1,type,line,column)); + } + else if (must_have_token) { + TokenizeError("unexpected character, expected data token", line, column); } start = end = NULL; @@ -109,6 +134,7 @@ void Tokenize(TokenList& output_tokens, const char* input) bool comment = false; bool in_double_quotes = false; + bool pending_data_token = false; const char* token_begin = NULL, *token_end = NULL; for (const char* cur = input;*cur;++cur,++column) { @@ -119,8 +145,6 @@ void Tokenize(TokenList& output_tokens, const char* input) column = 0; ++line; - - continue; } if(comment) { @@ -131,9 +155,9 @@ void Tokenize(TokenList& output_tokens, const char* input) if (c == '\"') { in_double_quotes = false; token_end = cur; - if (!token_begin) { - token_begin = cur; - } + + ProcessDataToken(output_tokens,token_begin,token_end,line,column); + pending_data_token = false; } continue; } @@ -141,6 +165,10 @@ void Tokenize(TokenList& output_tokens, const char* input) switch(c) { case '\"': + if (token_begin) { + TokenizeError("unexpected double-quote", line, column); + } + token_begin = cur; in_double_quotes = true; continue; @@ -151,29 +179,57 @@ void Tokenize(TokenList& output_tokens, const char* input) case '{': ProcessDataToken(output_tokens,token_begin,token_end, line, column); - output_tokens.push_back(boost::make_shared(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); - break; + output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); + continue; case '}': ProcessDataToken(output_tokens,token_begin,token_end,line,column); - output_tokens.push_back(boost::make_shared(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); - break; + output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); + continue; case ',': - ProcessDataToken(output_tokens,token_begin,token_end,line,column); - output_tokens.push_back(boost::make_shared(cur,cur+1,TokenType_COMMA,line,column)); - break; + if (pending_data_token) { + ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true); + } + output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column)); + continue; case ':': - ProcessDataToken(output_tokens,token_begin,token_end,line,column, TokenType_KEY); - break; + if (pending_data_token) { + ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true); + } + else { + TokenizeError("unexpected colon", line, column); + } + continue; } - if (!IsSpaceOrNewLine(c)) { + if (IsSpaceOrNewLine(c)) { + + if (token_begin) { + // peek ahead and check if the next token is a colon in which + // case this counts as KEY token. + TokenType type = TokenType_DATA; + for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) { + if (*peek == ':') { + type = TokenType_KEY; + cur = peek; + break; + } + } + + ProcessDataToken(output_tokens,token_begin,token_end,line,column,type); + } + + pending_data_token = false; + } + else { token_end = cur; if (!token_begin) { token_begin = cur; } + + pending_data_token = true; } } } diff --git a/code/FBXTokenizer.h b/code/FBXTokenizer.h index 1a7899fca..11221f3e5 100644 --- a/code/FBXTokenizer.h +++ b/code/FBXTokenizer.h @@ -85,6 +85,12 @@ public: Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column); ~Token(); +public: + + std::string StringContents() const { + return std::string(begin(),end()); + } + public: const char* begin() const { @@ -99,8 +105,23 @@ public: return type; } + unsigned int Line() const { + return line; + } + + unsigned int Column() const { + return column; + } + private: +#ifdef DEBUG + // full string copy for the sole purpose that it nicely appears + // in msvc's debugger window. + const std::string contents; +#endif + + const char* const sbegin; const char* const send; const TokenType type; @@ -108,9 +129,11 @@ private: const unsigned int line, column; }; +// XXX should use C++11's unique_ptr - but assimp's need to keep working with 03 +typedef const Token* TokenPtr; +typedef std::vector< TokenPtr > TokenList; -typedef boost::shared_ptr TokenPtr; -typedef std::vector< boost::shared_ptr > TokenList; +#define new_Token new Token /** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens. diff --git a/code/FBXUtil.cpp b/code/FBXUtil.cpp index 66647cb66..57e8fc256 100644 --- a/code/FBXUtil.cpp +++ b/code/FBXUtil.cpp @@ -44,6 +44,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "AssimpPCH.h" #include "FBXUtil.h" +#include "FBXTokenizer.h" + #include "TinyFormatter.h" #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER @@ -52,12 +54,47 @@ namespace Assimp { namespace FBX { namespace Util { +// ------------------------------------------------------------------------------------------------ +const char* TokenTypeString(TokenType t) +{ + switch(t) { + case TokenType_OPEN_BRACKET: + return "TOK_OPEN_BRACKET"; + + case TokenType_CLOSE_BRACKET: + return "TOK_CLOSE_BRACKET"; + + case TokenType_DATA: + return "TOK_DATA"; + + case TokenType_COMMA: + return "TOK_COMMA"; + + case TokenType_KEY: + return "TOK_KEY"; + } + + ai_assert(false); + return ""; +} + + // ------------------------------------------------------------------------------------------------ std::string AddLineAndColumn(const std::string& prefix, const std::string& text, unsigned int line, unsigned int column) { return static_cast( (Formatter::format(),prefix,"(line ",line,", col ",column,") ",text) ); } +// ------------------------------------------------------------------------------------------------ +std::string AddTokenText(const std::string& prefix, const std::string& text, const Token* tok) +{ + return static_cast( (Formatter::format(),prefix, + "(",TokenTypeString(tok->Type()), + "line ",tok->Line(), + ", col ",tok->Column(),") ", + text) ); +} + } // !Util } // !FBX } // !Assimp diff --git a/code/FBXUtil.h b/code/FBXUtil.h index 87bb86f70..89b213732 100644 --- a/code/FBXUtil.h +++ b/code/FBXUtil.h @@ -49,8 +49,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace Assimp { namespace FBX { + + class Token; + enum TokenType; + namespace Util { + +/** helper for std::for_each to delete all heap-allocated items in a container */ +template +struct delete_fun +{ + void operator()(const volatile T* del) { + delete del; + } +}; + +/** Get a string representation for a #TokenType. */ +const char* TokenTypeString(TokenType t); + + /** Format log/error messages using a given line location in the source file. * * @param prefix Message prefix to be preprended to the location info. @@ -60,6 +78,15 @@ namespace Util { * @return A string of the following format: {prefix} (line {line}, col {column}) {text}*/ std::string AddLineAndColumn(const std::string& prefix, const std::string& text, unsigned int line, unsigned int column); + +/** Format log/error messages using a given cursor token. + * + * @param prefix Message prefix to be preprended to the location info. + * @param text Message text + * @param tok Token where parsing/processing stopped + * @return A string of the following format: {prefix} ({token-type}, line {line}, col {column}) {text}*/ +std::string AddTokenText(const std::string& prefix, const std::string& text, const Token* tok); + } } } diff --git a/workspaces/vc9/assimp_cmd.vcproj b/workspaces/vc9/assimp_cmd.vcproj index 1ee06b1cc..196d17bc6 100644 --- a/workspaces/vc9/assimp_cmd.vcproj +++ b/workspaces/vc9/assimp_cmd.vcproj @@ -1484,6 +1484,14 @@ CompileAs="1" /> + + +