# STEP: reduce memory overhead of STEP reader. A typical x64 build now has approx. 10x overhead (compared to the size of the file being read), as opposed to 20-40x that it had before.

git-svn-id: https://assimp.svn.sourceforge.net/svnroot/assimp/trunk@1015 67173fc5-114c-0410-ac8e-9d2fd5bffc1f
2011-05-30 20:17:57 +00:00 · 2011-05-30 20:17:57 +00:00 · 23ea1ac97d
parent bf7b1d3514
commit 23ea1ac97d
2 changed files with 109 additions and 41 deletions
--- a/code/STEPFile.h
+++ b/code/STEPFile.h
@ -326,11 +326,19 @@ namespace STEP {
 		// -------------------------------------------------------------------------------
 		class LIST : public DataType 
 		{
 		public:
 			~LIST() {
 				BOOST_FOREACH(const DataType* dt, members) {
 					delete dt;
 				}
 			}
 		public:
 			// access a particular list index, throw std::range_error for wrong indices 
 			const DataType* operator[] (size_t index) const {
-				return members[index].get();
+				return members[index];
 			}
 			size_t GetSize() const {
@ -346,7 +354,8 @@ namespace STEP {
 		private:
-			typedef std::vector< boost::shared_ptr<const DataType> > MemberList;
+			// no smart pointer type to avoid any overhead
 			typedef std::vector< const DataType* > MemberList;
 			MemberList members;
 		};
@ -394,6 +403,11 @@ namespace STEP {
 				return converters.find(name) != converters.end();
 			}
 			const char* GetStaticStringForToken(const std::string& token) const {
 				ConverterMap::const_iterator it = converters.find(token);
 				return it == converters.end() ? NULL : (*it).first.c_str();
 			}
 			template <size_t N> 
 			const ConversionSchema& operator=( const SchemaEntry (& schemas)[N]) {
@ -579,7 +593,7 @@ namespace STEP {
 		friend class DB;
 	public:
-		LazyObject(DB& db, uint64_t id,uint64_t line,const std::string& type,const std::string& args);
+		LazyObject(DB& db, uint64_t id, uint64_t line, const char* type,const char* args);
 		~LazyObject();
 	public:
@ -636,17 +650,24 @@ namespace STEP {
 			return type != atype;
 		}
 		uint64_t GetID() const {
 			return id;
 		}
 	private:
 		void LazyInit() const;
 	private:
-		const uint64_t id, line;
+		mutable uint64_t id;
-		const std::string type;
+		const char* const type;
 		DB& db;
-		const EXPRESS::LIST* conv_args;
+		union {
 			mutable const EXPRESS::LIST* conv_args;
 			mutable const char* args;
 		};
 		mutable Object* obj;
 	};
@ -815,21 +836,24 @@ namespace STEP {
 	public:
-		// objects indexed by ID
+		// objects indexed by ID - this can grow pretty large (i.e some hundred million 
-		typedef std::map<uint64_t,boost::shared_ptr<const LazyObject> > ObjectMap;
+		// entries), so use raw pointers to avoid *any* overhead.
 		typedef std::map<uint64_t,const LazyObject* > ObjectMap;
 		// objects indexed by their declarative type, but only for those that we truly want
 		typedef std::set< const LazyObject*> ObjectSet;
 		typedef std::map<std::string, ObjectSet > ObjectMapByType;
 		// list of types for which to keep inverse indices for all references
 		// that the respective objects keep.
 		// the list keeps pointers to strings in static storage
 		typedef std::set<const char*> InverseWhitelist;
 		// references - for each object id the ids of all objects which reference it
 		// this is used to simulate STEP inverse indices for selected types.
 		typedef std::step_unordered_multimap<uint64_t, uint64_t > RefMap;
 		typedef std::pair<RefMap::const_iterator,RefMap::const_iterator> RefMapRange;
 		// list of types for which to keep inverse indices for all references
 		// the respective objects keep.
 		typedef std::set<std::string> InverseWhitelist;
 	private:
 		DB(boost::shared_ptr<StreamReaderLE> reader) 
@ -838,6 +862,14 @@ namespace STEP {
 			, evaluated_count()
 		{}
 	public:
 		~DB() {
 			BOOST_FOREACH(ObjectMap::value_type& o, objects) {
 				delete o.second;
 			}
 		}
 	public:
 		uint64_t GetObjectCount() const {
@ -869,7 +901,7 @@ namespace STEP {
 		}
-		bool KeepInverseIndicesForType(const std::string& type) const {
+		bool KeepInverseIndicesForType(const char* const type) const {
 			return inv_whitelist.find(type) != inv_whitelist.end();
 		}
@ -878,7 +910,7 @@ namespace STEP {
 		const LazyObject* GetObject(uint64_t id) const {
 			const ObjectMap::const_iterator it = objects.find(id);
 			if (it != objects.end()) {
-				return (*it).second.get();
+				return (*it).second;
 			}
 			return NULL;
 		}
@ -932,12 +964,12 @@ namespace STEP {
 			return splitter;
 		}
-		void InternInsert(boost::shared_ptr<const LazyObject> lz) {
+		void InternInsert(const LazyObject* lz) {
-			objects[lz->id] = lz;
+			objects[lz->GetID()] = lz;
 			const ObjectMapByType::iterator it = objects_bytype.find( lz->type );
 			if (it != objects_bytype.end()) {
-				(*it).second.insert(lz.get());
+				(*it).second.insert(lz);
 			}
 		}
@ -954,7 +986,9 @@ namespace STEP {
 		void SetInverseIndicesToTrack( const char* const* types, size_t N ) {
 			for(size_t i = 0; i < N;++i) {
-				inv_whitelist.insert(types[i]);
+				const char* const sz = schema->GetStaticStringForToken(types[i]);
 				ai_assert(sz);
 				inv_whitelist.insert(sz);
 			}
 		}
--- a/code/STEPFileReader.cpp
+++ b/code/STEPFileReader.cpp
@ -220,10 +220,25 @@ void STEP::ReadFile(DB& db,const EXPRESS::ConversionSchema& scheme,
 			DefaultLogger::get()->warn(AddLineNumber((Formatter::format(),"an object with the id #",id," already exists"),line));
 		}
-		std::string type = s.substr(n0+1,n1-n0-1);
+		std::string::size_type ns = n0;
-		trim(type);
+		do ++ns; while( IsSpace(s.at(ns)));
 		std::string::size_type ne = n1;
 		do --ne; while( IsSpace(s.at(ne)));
 		std::string type = s.substr(ns,ne-ns+1);
 		std::transform( type.begin(), type.end(), type.begin(), &Assimp::ToLower<char>  );
-		db.InternInsert(boost::shared_ptr<LazyObject>(new LazyObject(db,id,line,type,s.substr(n1,n2-n1+1))));
+
 		const char* sz = scheme.GetStaticStringForToken(type);
 		if(sz) {
 			const std::string::size_type len = n2-n1+1;
 			char* const copysz = new char[len+1];
 			std::copy(s.c_str()+n1,s.c_str()+n2+1,copysz);
 			copysz[len] = '\0';
 			db.InternInsert(new LazyObject(db,id,line,sz,copysz));
 		}
 	}
 	if (!splitter) {
@ -375,7 +390,7 @@ const EXPRESS::LIST* EXPRESS::LIST::Parse(const char*& inout,uint64_t line, cons
 			break;
 		}
-		members.push_back( boost::shared_ptr<const EXPRESS::DataType>(EXPRESS::DataType::Parse(cur,line,schema)));
+		members.push_back( EXPRESS::DataType::Parse(cur,line,schema));
 		SkipSpaces(cur,&cur);
 		if (*cur != ',') {
@ -390,41 +405,52 @@ const EXPRESS::LIST* EXPRESS::LIST::Parse(const char*& inout,uint64_t line, cons
 	return list.release();
 }
 // ------------------------------------------------------------------------------------------------
-STEP::LazyObject::LazyObject(DB& db, uint64_t id,uint64_t line,const std::string& type,const std::string& args) 
+STEP::LazyObject::LazyObject(DB& db, uint64_t id,uint64_t line, const char* const type,const char* args) 
 	: db(db)
 	, id(id)
 	, line(line)
 	, type(type)
 	, obj()
-	// need to initialize this upfront, otherwise the destructor
+	, args(args)
 	// will crash if an exception is thrown in the c'tor
 	, conv_args() 
 {
 	const char* arg = args.c_str();
 	conv_args = EXPRESS::LIST::Parse(arg,line,&db.GetSchema());
 	// find any external references and store them in the database.
 	// this helps us emulate STEPs INVERSE fields.
 	if (db.KeepInverseIndicesForType(type)) {
-		for (size_t i = 0; i < conv_args->GetSize(); ++i) {
+		const char* a  = args;
-			const EXPRESS::DataType* t = conv_args->operator [](i);
+	
-			if (const EXPRESS::ENTITY* e = t->ToPtr<EXPRESS::ENTITY>()) {
+		// do a quick scan through the argument tuple and watch out for entity references
-				db.MarkRef(*e,id);
+		int64_t skip_depth = 0;
 		while(*a) {
 			if (*a == '(') {
 				++skip_depth;
 			}
 			else if (*a == ')') {
 				--skip_depth;
 			}
 			if (skip_depth == 1 && *a=='#') {
 				const char* tmp;
 				const int64_t num = static_cast<int64_t>( strtoul10_64(a+1,&tmp) );
 				db.MarkRef(num,id);
 			}
 			++a;
 		}
 	}
 }
 // ------------------------------------------------------------------------------------------------
 STEP::LazyObject::~LazyObject() 
 {
-	// 'obj' always remains in our possession, so there is 
+	// make sure the right dtor/operator delete get called
-	// no need for a smart pointer type.
+	if (obj) {
-	delete obj;
+		delete conv_args;
-	delete conv_args;
+	}
-}
+	else delete[] args;
 	delete obj;
 }
 // ------------------------------------------------------------------------------------------------
 void STEP::LazyObject::LazyInit() const
@ -433,20 +459,28 @@ void STEP::LazyObject::LazyInit() const
 	STEP::ConvertObjectProc proc = schema.GetConverterProc(type);
 	if (!proc) {
-		throw STEP::TypeError("unknown object type: " + type,id,line);
+		throw STEP::TypeError("unknown object type: " + std::string(type),id);
 	}
 	const char* a  = args, *acopy = a;
 	conv_args = EXPRESS::LIST::Parse(acopy,STEP::SyntaxError::LINE_NOT_SPECIFIED,&db.GetSchema());
 	delete[] a;
 	// if the converter fails, it should throw an exception, but it should never return NULL
 	try {
 		obj = proc(db,*conv_args);
 	}
 	catch(const TypeError& t) {
 		// augment line and entity information
-		throw TypeError(t.what(),id,line);
+		throw TypeError(t.what(),id);
 	}
 	++db.evaluated_count;
 	ai_assert(obj);
 	// store the original id in the object instance
 	obj->SetID(id);
 	//delete conv_args;
 	//conv_args = NULL;
 }