- fbx: fix various issues with tokenizer. Stricter error checking and better debuggability. Do not use shared_ptr's for tokens, there are simply too many of them and ownership is always clear.
parent
ff995307ac
commit
6c5efe471f
|
@ -53,6 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "FBXTokenizer.h"
|
#include "FBXTokenizer.h"
|
||||||
#include "FBXParser.h"
|
#include "FBXParser.h"
|
||||||
|
#include "FBXUtil.h"
|
||||||
|
|
||||||
#include "StreamReader.h"
|
#include "StreamReader.h"
|
||||||
#include "MemoryIOWrapper.h"
|
#include "MemoryIOWrapper.h"
|
||||||
|
@ -148,11 +149,17 @@ void FBXImporter::InternReadFile( const std::string& pFile,
|
||||||
// broadphase tokenizing pass in which we identify the core
|
// broadphase tokenizing pass in which we identify the core
|
||||||
// syntax elements of FBX (brackets, commas, key:value mappings)
|
// syntax elements of FBX (brackets, commas, key:value mappings)
|
||||||
TokenList tokens;
|
TokenList tokens;
|
||||||
|
try {
|
||||||
Tokenize(tokens,begin);
|
Tokenize(tokens,begin);
|
||||||
|
|
||||||
// use this information to construct a very rudimentary
|
// use this information to construct a very rudimentary
|
||||||
// parse-tree representing the FBX scope structure
|
// parse-tree representing the FBX scope structure
|
||||||
Parser parser(tokens);
|
Parser parser(tokens);
|
||||||
|
}
|
||||||
|
catch(...) {
|
||||||
|
std::for_each(tokens.begin(),tokens.end(),Util::delete_fun<Token>());
|
||||||
|
throw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // !ASSIMP_BUILD_NO_FBX_IMPORTER
|
#endif // !ASSIMP_BUILD_NO_FBX_IMPORTER
|
||||||
|
|
|
@ -60,6 +60,9 @@ Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int
|
||||||
, type(type)
|
, type(type)
|
||||||
, line(line)
|
, line(line)
|
||||||
, column(column)
|
, column(column)
|
||||||
|
#ifdef DEBUG
|
||||||
|
, contents(sbegin, static_cast<size_t>(send-sbegin))
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
ai_assert(sbegin);
|
ai_assert(sbegin);
|
||||||
ai_assert(send);
|
ai_assert(send);
|
||||||
|
@ -74,23 +77,45 @@ Token::~Token()
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
|
||||||
|
void TokenizeError(const std::string& message, unsigned int line, unsigned int column)
|
||||||
|
{
|
||||||
|
throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize",message,line,column));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// process a potential data token up to 'cur', adding it to 'output_tokens'.
|
// process a potential data token up to 'cur', adding it to 'output_tokens'.
|
||||||
// ------------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------------
|
||||||
void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
|
void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end,
|
||||||
unsigned int line,
|
unsigned int line,
|
||||||
unsigned int column,
|
unsigned int column,
|
||||||
TokenType type = TokenType_DATA)
|
TokenType type = TokenType_DATA,
|
||||||
|
bool must_have_token = false)
|
||||||
{
|
{
|
||||||
if (start != end) {
|
if (start && end) {
|
||||||
// tokens should have no whitespace in them and [start,end] should
|
// sanity check:
|
||||||
|
// tokens should have no whitespace outside quoted text and [start,end] should
|
||||||
// properly delimit the valid range.
|
// properly delimit the valid range.
|
||||||
for (const char* c = start; c != end; ++c) {
|
bool in_double_quotes = false;
|
||||||
if (IsSpaceOrNewLine(*c)) {
|
for (const char* c = start; c != end + 1; ++c) {
|
||||||
throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize","unexpected whitespace in token",line,column));
|
if (*c == '\"') {
|
||||||
|
in_double_quotes = !in_double_quotes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
|
||||||
|
TokenizeError("unexpected whitespace in token", line, column);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output_tokens.push_back(boost::make_shared<Token>(start,end,type,line,column));
|
if (in_double_quotes) {
|
||||||
|
TokenizeError("non-terminated double quotes", line, column);
|
||||||
|
}
|
||||||
|
|
||||||
|
output_tokens.push_back(new_Token(start,end + 1,type,line,column));
|
||||||
|
}
|
||||||
|
else if (must_have_token) {
|
||||||
|
TokenizeError("unexpected character, expected data token", line, column);
|
||||||
}
|
}
|
||||||
|
|
||||||
start = end = NULL;
|
start = end = NULL;
|
||||||
|
@ -109,6 +134,7 @@ void Tokenize(TokenList& output_tokens, const char* input)
|
||||||
|
|
||||||
bool comment = false;
|
bool comment = false;
|
||||||
bool in_double_quotes = false;
|
bool in_double_quotes = false;
|
||||||
|
bool pending_data_token = false;
|
||||||
|
|
||||||
const char* token_begin = NULL, *token_end = NULL;
|
const char* token_begin = NULL, *token_end = NULL;
|
||||||
for (const char* cur = input;*cur;++cur,++column) {
|
for (const char* cur = input;*cur;++cur,++column) {
|
||||||
|
@ -119,8 +145,6 @@ void Tokenize(TokenList& output_tokens, const char* input)
|
||||||
|
|
||||||
column = 0;
|
column = 0;
|
||||||
++line;
|
++line;
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(comment) {
|
if(comment) {
|
||||||
|
@ -131,9 +155,9 @@ void Tokenize(TokenList& output_tokens, const char* input)
|
||||||
if (c == '\"') {
|
if (c == '\"') {
|
||||||
in_double_quotes = false;
|
in_double_quotes = false;
|
||||||
token_end = cur;
|
token_end = cur;
|
||||||
if (!token_begin) {
|
|
||||||
token_begin = cur;
|
ProcessDataToken(output_tokens,token_begin,token_end,line,column);
|
||||||
}
|
pending_data_token = false;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -141,6 +165,10 @@ void Tokenize(TokenList& output_tokens, const char* input)
|
||||||
switch(c)
|
switch(c)
|
||||||
{
|
{
|
||||||
case '\"':
|
case '\"':
|
||||||
|
if (token_begin) {
|
||||||
|
TokenizeError("unexpected double-quote", line, column);
|
||||||
|
}
|
||||||
|
token_begin = cur;
|
||||||
in_double_quotes = true;
|
in_double_quotes = true;
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -151,29 +179,57 @@ void Tokenize(TokenList& output_tokens, const char* input)
|
||||||
|
|
||||||
case '{':
|
case '{':
|
||||||
ProcessDataToken(output_tokens,token_begin,token_end, line, column);
|
ProcessDataToken(output_tokens,token_begin,token_end, line, column);
|
||||||
output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
|
output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column));
|
||||||
break;
|
continue;
|
||||||
|
|
||||||
case '}':
|
case '}':
|
||||||
ProcessDataToken(output_tokens,token_begin,token_end,line,column);
|
ProcessDataToken(output_tokens,token_begin,token_end,line,column);
|
||||||
output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
|
output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column));
|
||||||
break;
|
continue;
|
||||||
|
|
||||||
case ',':
|
case ',':
|
||||||
ProcessDataToken(output_tokens,token_begin,token_end,line,column);
|
if (pending_data_token) {
|
||||||
output_tokens.push_back(boost::make_shared<Token>(cur,cur+1,TokenType_COMMA,line,column));
|
ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true);
|
||||||
break;
|
}
|
||||||
|
output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column));
|
||||||
|
continue;
|
||||||
|
|
||||||
case ':':
|
case ':':
|
||||||
ProcessDataToken(output_tokens,token_begin,token_end,line,column, TokenType_KEY);
|
if (pending_data_token) {
|
||||||
break;
|
ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
TokenizeError("unexpected colon", line, column);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IsSpaceOrNewLine(c)) {
|
if (IsSpaceOrNewLine(c)) {
|
||||||
|
|
||||||
|
if (token_begin) {
|
||||||
|
// peek ahead and check if the next token is a colon in which
|
||||||
|
// case this counts as KEY token.
|
||||||
|
TokenType type = TokenType_DATA;
|
||||||
|
for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) {
|
||||||
|
if (*peek == ':') {
|
||||||
|
type = TokenType_KEY;
|
||||||
|
cur = peek;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ProcessDataToken(output_tokens,token_begin,token_end,line,column,type);
|
||||||
|
}
|
||||||
|
|
||||||
|
pending_data_token = false;
|
||||||
|
}
|
||||||
|
else {
|
||||||
token_end = cur;
|
token_end = cur;
|
||||||
if (!token_begin) {
|
if (!token_begin) {
|
||||||
token_begin = cur;
|
token_begin = cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pending_data_token = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,6 +99,11 @@ public:
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
// copy of the token to show up in debugger
|
||||||
|
const std::string contents;
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
const char* const sbegin;
|
const char* const sbegin;
|
||||||
|
@ -108,9 +113,12 @@ private:
|
||||||
const unsigned int line, column;
|
const unsigned int line, column;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// note: shared_ptr eats up too much storage, unique_ptr is C++11,
|
||||||
|
// so have to use manual memory management for now.
|
||||||
|
typedef Token* TokenPtr;
|
||||||
|
typedef std::vector< TokenPtr > TokenList;
|
||||||
|
|
||||||
typedef boost::shared_ptr<Token> TokenPtr;
|
#define new_Token new Token
|
||||||
typedef std::vector< boost::shared_ptr<Token> > TokenList;
|
|
||||||
|
|
||||||
|
|
||||||
/** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens.
|
/** Main FBX tokenizer function. Transform input buffer into a list of preprocessed tokens.
|
||||||
|
|
|
@ -51,6 +51,17 @@ namespace Assimp {
|
||||||
namespace FBX {
|
namespace FBX {
|
||||||
namespace Util {
|
namespace Util {
|
||||||
|
|
||||||
|
|
||||||
|
/** helper for std::for_each to delete all heap-allocated items in a container */
|
||||||
|
template<typename T>
|
||||||
|
struct delete_fun
|
||||||
|
{
|
||||||
|
void operator()(T* del) {
|
||||||
|
delete del;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/** Format log/error messages using a given line location in the source file.
|
/** Format log/error messages using a given line location in the source file.
|
||||||
*
|
*
|
||||||
* @param prefix Message prefix to be preprended to the location info.
|
* @param prefix Message prefix to be preprended to the location info.
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue