# STEP: reduce memory overhead of STEP reader. A typical x64 build now has approx. 10x overhead (compared to the size of the file being read), as opposed to 20-40x that it had before.

git-svn-id: https://assimp.svn.sourceforge.net/svnroot/assimp/trunk@1015 67173fc5-114c-0410-ac8e-9d2fd5bffc1f
2011-05-30 20:17:57 +00:00 · 2011-05-30 20:17:57 +00:00 · 23ea1ac97d
parent bf7b1d3514
commit 23ea1ac97d
2 changed files with 109 additions and 41 deletions
--- a/code/STEPFile.h
+++ b/code/STEPFile.h
@ -326,11 +326,19 @@ namespace STEP {
 		// -------------------------------------------------------------------------------
 		class LIST : public DataType 
 		{
+		public:
+
+			~LIST() {
+				BOOST_FOREACH(const DataType* dt, members) {
+					delete dt;
+				}
+			}
+
 		public:

 			// access a particular list index, throw std::range_error for wrong indices 
 			const DataType* operator[] (size_t index) const {
-				return members[index].get();
+				return members[index];
 			}

 			size_t GetSize() const {
@ -346,7 +354,8 @@ namespace STEP {


 		private:
-			typedef std::vector< boost::shared_ptr<const DataType> > MemberList;
+			// no smart pointer type to avoid any overhead
+			typedef std::vector< const DataType* > MemberList;
 			MemberList members;
 		};

@ -394,6 +403,11 @@ namespace STEP {
 				return converters.find(name) != converters.end();
 			}

+			const char* GetStaticStringForToken(const std::string& token) const {
+				ConverterMap::const_iterator it = converters.find(token);
+				return it == converters.end() ? NULL : (*it).first.c_str();
+			}
+

 			template <size_t N> 
 			const ConversionSchema& operator=( const SchemaEntry (& schemas)[N]) {
@ -579,7 +593,7 @@ namespace STEP {
 		friend class DB;
 	public:

-		LazyObject(DB& db, uint64_t id,uint64_t line,const std::string& type,const std::string& args);
+		LazyObject(DB& db, uint64_t id, uint64_t line, const char* type,const char* args);
 		~LazyObject();

 	public:
@ -636,17 +650,24 @@ namespace STEP {
 			return type != atype;
 		}

+		uint64_t GetID() const {
+			return id;
+		}
+
 	private:

 		void LazyInit() const;

 	private:

-		const uint64_t id, line;
-		const std::string type;
+		mutable uint64_t id;
+		const char* const type;
 		DB& db;

-		const EXPRESS::LIST* conv_args;
+		union {
+			mutable const EXPRESS::LIST* conv_args;
+			mutable const char* args;
+		};
 		mutable Object* obj;
 	};

@ -815,21 +836,24 @@ namespace STEP {

 	public:

-		// objects indexed by ID
-		typedef std::map<uint64_t,boost::shared_ptr<const LazyObject> > ObjectMap;
+		// objects indexed by ID - this can grow pretty large (i.e some hundred million 
+		// entries), so use raw pointers to avoid *any* overhead.
+		typedef std::map<uint64_t,const LazyObject* > ObjectMap;

 		// objects indexed by their declarative type, but only for those that we truly want
 		typedef std::set< const LazyObject*> ObjectSet;
 		typedef std::map<std::string, ObjectSet > ObjectMapByType;

+		// list of types for which to keep inverse indices for all references
+		// that the respective objects keep.
+		// the list keeps pointers to strings in static storage
+		typedef std::set<const char*> InverseWhitelist;
+
 		// references - for each object id the ids of all objects which reference it
+		// this is used to simulate STEP inverse indices for selected types.
 		typedef std::step_unordered_multimap<uint64_t, uint64_t > RefMap;
 		typedef std::pair<RefMap::const_iterator,RefMap::const_iterator> RefMapRange;

-		// list of types for which to keep inverse indices for all references
-		// the respective objects keep.
-		typedef std::set<std::string> InverseWhitelist;
-
 	private:

 		DB(boost::shared_ptr<StreamReaderLE> reader) 
@ -838,6 +862,14 @@ namespace STEP {
 			, evaluated_count()
 		{}

+	public:
+
+		~DB() {
+			BOOST_FOREACH(ObjectMap::value_type& o, objects) {
+				delete o.second;
+			}
+		}
+
 	public:

 		uint64_t GetObjectCount() const {
@ -869,7 +901,7 @@ namespace STEP {
 		}


-		bool KeepInverseIndicesForType(const std::string& type) const {
+		bool KeepInverseIndicesForType(const char* const type) const {
 			return inv_whitelist.find(type) != inv_whitelist.end();
 		}

@ -878,7 +910,7 @@ namespace STEP {
 		const LazyObject* GetObject(uint64_t id) const {
 			const ObjectMap::const_iterator it = objects.find(id);
 			if (it != objects.end()) {
-				return (*it).second.get();
+				return (*it).second;
 			}
 			return NULL;
 		}
@ -932,12 +964,12 @@ namespace STEP {
 			return splitter;
 		}

-		void InternInsert(boost::shared_ptr<const LazyObject> lz) {
-			objects[lz->id] = lz;
+		void InternInsert(const LazyObject* lz) {
+			objects[lz->GetID()] = lz;

 			const ObjectMapByType::iterator it = objects_bytype.find( lz->type );
 			if (it != objects_bytype.end()) {
-				(*it).second.insert(lz.get());
+				(*it).second.insert(lz);
 			}
 		}

@ -954,7 +986,9 @@ namespace STEP {

 		void SetInverseIndicesToTrack( const char* const* types, size_t N ) {
 			for(size_t i = 0; i < N;++i) {
-				inv_whitelist.insert(types[i]);
+				const char* const sz = schema->GetStaticStringForToken(types[i]);
+				ai_assert(sz);
+				inv_whitelist.insert(sz);
 			}
 		}

--- a/code/STEPFileReader.cpp
+++ b/code/STEPFileReader.cpp
@ -220,10 +220,25 @@ void STEP::ReadFile(DB& db,const EXPRESS::ConversionSchema& scheme,
 			DefaultLogger::get()->warn(AddLineNumber((Formatter::format(),"an object with the id #",id," already exists"),line));
 		}

-		std::string type = s.substr(n0+1,n1-n0-1);
-		trim(type);
+		std::string::size_type ns = n0;
+		do ++ns; while( IsSpace(s.at(ns)));
+
+		std::string::size_type ne = n1;
+		do --ne; while( IsSpace(s.at(ne)));
+
+		std::string type = s.substr(ns,ne-ns+1);
 		std::transform( type.begin(), type.end(), type.begin(), &Assimp::ToLower<char>  );
-		db.InternInsert(boost::shared_ptr<LazyObject>(new LazyObject(db,id,line,type,s.substr(n1,n2-n1+1))));
+
+		const char* sz = scheme.GetStaticStringForToken(type);
+		if(sz) {
+		
+			const std::string::size_type len = n2-n1+1;
+			char* const copysz = new char[len+1];
+			std::copy(s.c_str()+n1,s.c_str()+n2+1,copysz);
+			copysz[len] = '\0';
+
+			db.InternInsert(new LazyObject(db,id,line,sz,copysz));
+		}
 	}

 	if (!splitter) {
@ -375,7 +390,7 @@ const EXPRESS::LIST* EXPRESS::LIST::Parse(const char*& inout,uint64_t line, cons
 			break;
 		}
 		
-		members.push_back( boost::shared_ptr<const EXPRESS::DataType>(EXPRESS::DataType::Parse(cur,line,schema)));
+		members.push_back( EXPRESS::DataType::Parse(cur,line,schema));
 		SkipSpaces(cur,&cur);

 		if (*cur != ',') {
@ -390,41 +405,52 @@ const EXPRESS::LIST* EXPRESS::LIST::Parse(const char*& inout,uint64_t line, cons
 	return list.release();
 }

+
 // ------------------------------------------------------------------------------------------------
-STEP::LazyObject::LazyObject(DB& db, uint64_t id,uint64_t line,const std::string& type,const std::string& args) 
+STEP::LazyObject::LazyObject(DB& db, uint64_t id,uint64_t line, const char* const type,const char* args) 
 	: db(db)
 	, id(id)
-	, line(line)
 	, type(type)
 	, obj()
-	// need to initialize this upfront, otherwise the destructor
-	// will crash if an exception is thrown in the c'tor
-	, conv_args() 
+	, args(args)
 {
-	const char* arg = args.c_str();
-	conv_args = EXPRESS::LIST::Parse(arg,line,&db.GetSchema());
-
 	// find any external references and store them in the database.
 	// this helps us emulate STEPs INVERSE fields.
 	if (db.KeepInverseIndicesForType(type)) {
-		for (size_t i = 0; i < conv_args->GetSize(); ++i) {
-			const EXPRESS::DataType* t = conv_args->operator [](i);
-			if (const EXPRESS::ENTITY* e = t->ToPtr<EXPRESS::ENTITY>()) {
-				db.MarkRef(*e,id);
+		const char* a  = args;
+	
+		// do a quick scan through the argument tuple and watch out for entity references
+		int64_t skip_depth = 0;
+		while(*a) {
+			if (*a == '(') {
+				++skip_depth;
 			}
+			else if (*a == ')') {
+				--skip_depth;
+			}
+
+			if (skip_depth == 1 && *a=='#') {
+				const char* tmp;
+				const int64_t num = static_cast<int64_t>( strtoul10_64(a+1,&tmp) );
+				db.MarkRef(num,id);
+			}
+			++a;
 		}
+
 	}
 }

 // ------------------------------------------------------------------------------------------------
 STEP::LazyObject::~LazyObject() 
 {
-	// 'obj' always remains in our possession, so there is 
-	// no need for a smart pointer type.
-	delete obj;
-	delete conv_args;
-}
+	// make sure the right dtor/operator delete get called
+	if (obj) {
+		delete conv_args;
+	}
+	else delete[] args;

+	delete obj;
+}

 // ------------------------------------------------------------------------------------------------
 void STEP::LazyObject::LazyInit() const
@ -433,20 +459,28 @@ void STEP::LazyObject::LazyInit() const
 	STEP::ConvertObjectProc proc = schema.GetConverterProc(type);

 	if (!proc) {
-		throw STEP::TypeError("unknown object type: " + type,id,line);
+		throw STEP::TypeError("unknown object type: " + std::string(type),id);
 	}

+	const char* a  = args, *acopy = a;
+	conv_args = EXPRESS::LIST::Parse(acopy,STEP::SyntaxError::LINE_NOT_SPECIFIED,&db.GetSchema());
+	delete[] a;
+
 	// if the converter fails, it should throw an exception, but it should never return NULL
 	try {
 		obj = proc(db,*conv_args);
 	}
 	catch(const TypeError& t) {
 		// augment line and entity information
-		throw TypeError(t.what(),id,line);
+		throw TypeError(t.what(),id);
 	}
 	++db.evaluated_count;
 	ai_assert(obj);

 	// store the original id in the object instance
 	obj->SetID(id);
+
+	//delete conv_args;
+	//conv_args = NULL;
 }
+