- fbx: fix various issues with tokenizer. Stricter error checking and better debuggability. Do not use shared_ptr's for tokens, there are simply too many of them and ownership is always clear.

pull/14/head
acgessler 2012-06-25 17:31:42 +02:00
parent ff995307ac
commit 6c5efe471f
5 changed files with 722 additions and 632 deletions

View File

@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "FBXTokenizer.h" #include "FBXTokenizer.h"
#include "FBXParser.h" #include "FBXParser.h"
#include "FBXUtil.h"
#include "StreamReader.h" #include "StreamReader.h"
#include "MemoryIOWrapper.h" #include "MemoryIOWrapper.h"
@ -148,11 +149,17 @@ void FBXImporter::InternReadFile( const std::string& pFile,
// broadphase tokenizing pass in which we identify the core // broadphase tokenizing pass in which we identify the core
// syntax elements of FBX (brackets, commas, key:value mappings) // syntax elements of FBX (brackets, commas, key:value mappings)
TokenList tokens; TokenList tokens;
try {
Tokenize(tokens,begin); Tokenize(tokens,begin);
// use this information to construct a very rudimentary // use this information to construct a very rudimentary
// parse-tree representing the FBX scope structure // parse-tree representing the FBX scope structure
Parser parser(tokens); Parser parser(tokens);
}
catch(...) {
std::for_each(tokens.begin(),tokens.end(),Util::delete_fun<Token>());
throw;
}
} }
#endif // !ASSIMP_BUILD_NO_FBX_IMPORTER #endif // !ASSIMP_BUILD_NO_FBX_IMPORTER

View File

@ -60,6 +60,9 @@ Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int
, type(type) , type(type)
, line(line) , line(line)
, column(column) , column(column)
#ifdef DEBUG
, contents(sbegin, static_cast<size_t>(send-sbegin))
#endif
{ {
ai_assert(sbegin); ai_assert(sbegin);
ai_assert(send); ai_assert(send);
@ -74,23 +77,45 @@ Token::~Token()
namespace { namespace {
// ------------------------------------------------------------------------------------------------
// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
{
throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column));
}
// process a potential data token up to 'cur', adding it to 'output_tokens'. // process a potential data token up to 'cur', adding it to 'output_tokens'.
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end, void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
unsigned int line, unsigned int line,
unsigned int column, unsigned int column,
TokenType type = TokenType_DATA) TokenType type = TokenType_DATA,
bool must_have_token = false)
{ {
if (start != end) { if (start && end) {
// tokens should have no whitespace in them and [start,end] should // sanity check:
// tokens should have no whitespace outside quoted text and [start,end] should
// properly delimit the valid range. // properly delimit the valid range.
for (const char* c = start; c != end; ++c) { bool in_double_quotes = false;
if (IsSpaceOrNewLine(*c)) { for (const char* c = start; c != end + 1; ++c) {
throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize","unexpected whitespace in token",line,column)); if (*c == '\"') {
in_double_quotes = !in_double_quotes;
}
if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
TokenizeError("unexpected whitespace in token", line, column);
} }
} }
output_tokens.push_back(boost::make_shared<Token>(start,end,type,line,column)); if (in_double_quotes) {
TokenizeError("non-terminated double quotes", line, column);
}
output_tokens.push_back(new_Token(start,end + 1,type,line,column));
}
else if (must_have_token) {
TokenizeError("unexpected character, expected data token", line, column);
} }
start = end = NULL; start = end = NULL;
@ -109,6 +134,7 @@ void Tokenize(TokenList& output_tokens, const char* input)
bool comment = false; bool comment = false;
bool in_double_quotes = false; bool in_double_quotes = false;
bool pending_data_token = false;
const char* token_begin = NULL, *token_end = NULL; const char* token_begin = NULL, *token_end = NULL;
for (const char* cur = input;*cur;++cur,++column) { for (const char* cur = input;*cur;++cur,++column) {
@ -119,8 +145,6 @@ void Tokenize(TokenList& output_tokens, const char* input)
column = 0; column = 0;
++line; ++line;
continue;
} }
if(comment) { if(comment) {
@ -131,9 +155,9 @@ void Tokenize(TokenList& output_tokens, const char* input)
if (c == '\"') { if (c == '\"') {
in_double_quotes = false; in_double_quotes = false;
token_end = cur; token_end = cur;
if (!token_begin) {
token_begin = cur; ProcessDataToken(output_tokens,token_begin,token_end,line,column);
} pending_data_token = false;
} }
continue; continue;
} }
@ -141,6 +165,10 @@ void Tokenize(TokenList& output_tokens, const char* input)
switch(c) switch(c)
{ {
case '\"': case '\"':
if (token_begin) {
TokenizeError("unexpected double-quote", line, column);
}
token_begin = cur;
in_double_quotes = true; in_double_quotes = true;
continue; continue;
@ -151,29 +179,57 @@ void Tokenize(TokenList& output_tokens, const char* input)
case '{': case '{':
ProcessDataToken(output_tokens,token_begin,token_end, line, column); ProcessDataToken(output_tokens,token_begin,token_end, line, column);
output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
break; continue;
case '}': case '}':
ProcessDataToken(output_tokens,token_begin,token_end,line,column); ProcessDataToken(output_tokens,token_begin,token_end,line,column);
output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
break; continue;
case ',': case ',':
ProcessDataToken(output_tokens,token_begin,token_end,line,column); if (pending_data_token) {
output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_COMMA,line,column)); ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
break; }
output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
continue;
case ':': case ':':
ProcessDataToken(output_tokens,token_begin,token_end,line,column, TokenType_KEY); if (pending_data_token) {
break; ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
}
else {
TokenizeError("unexpected colon", line, column);
}
continue;
} }
if (!IsSpaceOrNewLine(c)) { if (IsSpaceOrNewLine(c)) {
if (token_begin) {
// peek ahead and check if the next token is a colon in which
// case this counts as KEY token.
TokenType type = TokenType_DATA;
for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) {
if (*peek == ':') {
type = TokenType_KEY;
cur = peek;
break;
}
}
ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
}
pending_data_token = false;
}
else {
token_end = cur; token_end = cur;
if (!token_begin) { if (!token_begin) {
token_begin = cur; token_begin = cur;
} }
pending_data_token = true;
} }
} }
} }

View File

@ -99,6 +99,11 @@ public:
return type; return type;
} }
#ifdef DEBUG
// copy of the token to show up in debugger
const std::string contents;
#endif
private: private:
const char* const sbegin; const char* const sbegin;
@ -108,9 +113,12 @@ private:
const unsigned int line, column; const unsigned int line, column;
}; };
// note: shared_ptr eats up too much storage, unique_ptr is C++11,
// so have to use manual memory management for now.
typedef Token* TokenPtr;
typedef std::vector< TokenPtr > TokenList;
typedef boost::shared_ptr<Token> TokenPtr; #define new_Token new Token
typedef std::vector< boost::shared_ptr<Token> > TokenList;
/** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens. /** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens.

View File

@ -51,6 +51,17 @@ namespace Assimp {
namespace FBX { namespace FBX {
namespace Util { namespace Util {
/** helper for std::for_each to delete all heap-allocated items in a container */
template<typename T>
struct delete_fun
{
void operator()(T* del) {
delete del;
}
};
/** Format log/error messages using a given line location in the source file. /** Format log/error messages using a given line location in the source file.
* *
* @param prefix Message prefix to be preprended to the location info. * @param prefix Message prefix to be preprended to the location info.

File diff suppressed because it is too large Load Diff