From a9fd02c14e5f6a8b1e8dfeaff230bfe66099dc8b Mon Sep 17 00:00:00 2001 From: aramis_acg Date: Sun, 11 Jul 2010 23:07:11 +0000 Subject: [PATCH] =?UTF-8?q?JoinIdenticalVertices:=20Performance=20optimiza?= =?UTF-8?q?tions=20by=20Krishty=20(=E2=80=9EFuck=20the=20System=E2=80=9D).?= =?UTF-8?q?=20Yields=20a=209x=20speedup=20in=20first=20benchmarks=20with?= =?UTF-8?q?=20meshes=20>=202k=20triangles.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://assimp.svn.sourceforge.net/svnroot/assimp/trunk@780 67173fc5-114c-0410-ac8e-9d2fd5bffc1f --- code/JoinVerticesProcess.cpp | 21 +++--- code/SpatialSort.cpp | 139 +++++++++++++++++++++++++++++++++++ code/SpatialSort.h | 9 +++ workspaces/vc9/assimp.vcproj | 3 +- 4 files changed, 160 insertions(+), 12 deletions(-) diff --git a/code/JoinVerticesProcess.cpp b/code/JoinVerticesProcess.cpp index 32bf95ba7..d5dc1dedb 100644 --- a/code/JoinVerticesProcess.cpp +++ b/code/JoinVerticesProcess.cpp @@ -127,10 +127,12 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex) std::vector uniqueVertices; uniqueVertices.reserve( pMesh->mNumVertices); - // For each vertex the index of the vertex it was replaced by. + // For each vertex the index of the vertex it was replaced by. + // Since the maximal number of vertices is 2^31-1, the most significand bit can be used to mark + // whether a new vertex was created for the index (true) or if it was replaced by an existing + // unique vertex (false). This saves an additional std::vector and greatly enhances + // branching performance. std::vector replaceIndex( pMesh->mNumVertices, 0xffffffff); - // for each vertex whether it was replaced by an existing unique vertex (true) or a new vertex was created for it (false) - std::vector isVertexUnique( pMesh->mNumVertices, false); // A little helper to find locally close vertices faster. // Try to reuse the lookup table from the last step. @@ -180,7 +182,7 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex) Vertex v(pMesh,a); // collect all vertices that are close enough to the given position - vertexFinder->FindPositions( v.position, posEpsilonSqr, verticesFound); + vertexFinder->FindIdenticalPositions( v.position, verticesFound); unsigned int matchIndex = 0xffffffff; // check all unique vertices close to the position if this vertex is already present among them @@ -188,9 +190,8 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex) const unsigned int vidx = verticesFound[b]; const unsigned int uidx = replaceIndex[ vidx]; - if( uidx == 0xffffffff || !isVertexUnique[ vidx]) { + if( uidx & 0x80000000) continue; - } const Vertex& uv = uniqueVertices[ uidx]; // Position mismatch is impossible - the vertex finder already discarded all non-matching positions @@ -239,15 +240,13 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex) if( matchIndex != 0xffffffff) { // store where to found the matching unique vertex - replaceIndex[a] = matchIndex; - isVertexUnique[a] = false; + replaceIndex[a] = matchIndex | 0x80000000; } else { // no unique vertex matches it upto now -> so add it replaceIndex[a] = (unsigned int)uniqueVertices.size(); uniqueVertices.push_back( v); - isVertexUnique[a] = true; } } @@ -331,7 +330,7 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex) { aiFace& face = pMesh->mFaces[a]; for( unsigned int b = 0; b < face.mNumIndices; b++) { - face.mIndices[b] = replaceIndex[face.mIndices[b]]; + face.mIndices[b] = replaceIndex[face.mIndices[b]] & ~0x80000000; } } @@ -346,7 +345,7 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex) { const aiVertexWeight& ow = bone->mWeights[b]; // if the vertex is a unique one, translate it - if( isVertexUnique[ow.mVertexId]) + if( !(replaceIndex[ow.mVertexId] & 0x80000000)) { aiVertexWeight nw; nw.mVertexId = replaceIndex[ow.mVertexId]; diff --git a/code/SpatialSort.cpp b/code/SpatialSort.cpp index 29b5cf279..e5e149945 100644 --- a/code/SpatialSort.cpp +++ b/code/SpatialSort.cpp @@ -46,6 +46,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. using namespace Assimp; +// CHAR_BIT seems to be defined under MVSC, but not under GCC. Pray that the correct value is 8. +#ifndef CHAR_BIT +# define CHAR_BIT 8 +#endif + // ------------------------------------------------------------------------------------------------ // Constructs a spatially sorted representation from the given position array. SpatialSort::SpatialSort( const aiVector3D* pPositions, unsigned int pNumPositions, @@ -168,6 +173,140 @@ void SpatialSort::FindPositions( const aiVector3D& pPosition, // that's it } +namespace { + + // Binary, signed-integer representation of a single-precision floating-point value. + // IEEE 754 says: "If two floating-point numbers in the same format are ordered then they are + // ordered the same way when their bits are reinterpreted as sign-magnitude integers." + // This allows us to convert all floating-point numbers to signed integers of arbitrary size + // and then use them to work with ULPs (Units in the Last Place, for high-precision + // computations) or to compare them (integer comparisons are faster than floating-point + // comparisons on many platforms). + typedef signed int BinFloat; + + // -------------------------------------------------------------------------------------------- + // Converts the bit pattern of a floating-point number to its signed integer representation. + BinFloat ToBinary( const float & pValue) { + + // If this assertion fails, signed int is not big enough to store a float on your platform. + // Please correct the declaration of BinFloat a few lines above - but do it in a portable, + // #ifdef'd manner! + BOOST_STATIC_ASSERT( sizeof(BinFloat) >= sizeof(float)); + + #if defined( _MSC_VER) + // If this assertion fails, Visual C++ has finally moved to ILP64. This means that this + // code has just become legacy code! Find out the current value of _MSC_VER and modify + // the #if above so it evaluates false on the current and all upcoming VC versions (or + // on the current platform, if LP64 or LLP64 are still used on other platforms). + BOOST_STATIC_ASSERT( sizeof(BinFloat) == sizeof(float)); + + // This works best on Visual C++, but other compilers have their problems with it. + const BinFloat binValue = reinterpret_cast(pValue); + #else + // On many compilers, reinterpreting a float address as an integer causes aliasing + // problems. This is an ugly but more or less safe way of doing it. + union { + float asFloat; + BinFloat asBin; + } conversion; + conversion.asBin = 0; // zero empty space in case sizeof(BinFloat) > sizeof(float) + conversion.asFloat = pValue; + const BinFloat binValue = conversion.asBin; + #endif + + // floating-point numbers are of sign-magnitude format, so find out what signed number + // representation we must convert negative values to. + // See http://en.wikipedia.org/wiki/Signed_number_representations. + + // Two's complement? + if( (-42 == (~42 + 1)) && (binValue & 0x80000000)) + return BinFloat(1 << (CHAR_BIT * sizeof(BinFloat) - 1)) - binValue; + // One's complement? + else if( (-42 == ~42) && (binValue & 0x80000000)) + return BinFloat(-0) - binValue; + // Sign-magnitude? + else if( (-42 == (42 | (-0))) && (binValue & 0x80000000)) // -0 = 1000... binary + return binValue; + else + return binValue; + } + +} // namespace + +// ------------------------------------------------------------------------------------------------ +// Fills an array with indices of all positions indentical to the given position. In opposite to +// FindPositions(), not an epsilon is used but a (very low) tolerance of four floating-point units. +void SpatialSort::FindIdenticalPositions( const aiVector3D& pPosition, + std::vector& poResults) const +{ + // Epsilons have a huge disadvantage: they are of constant precision, while floating-point + // values are of log2 precision. If you apply e=0.01 to 100, the epsilon is rather small, but + // if you apply it to 0.001, it is enormous. + + // The best way to overcome this is the unit in the last place (ULP). A precision of 2 ULPs + // tells us that a float does not differ more than 2 bits from the "real" value. ULPs are of + // logarithmic precision - around 1, they are 1÷(2^24) and around 10000, they are 0.00125. + + // For standard C math, we can assume a precision of 0.5 ULPs according to IEEE 754. The + // incoming vertex positions might have already been transformed, probably using rather + // inaccurate SSE instructions, so we assume a tolerance of 4 ULPs to safely identify + // identical vertex positions. + static const int toleranceInULPs = 4; + // An interesting point is that the inaccuracy grows linear with the number of operations: + // multiplying to numbers, each inaccurate to four ULPs, results in an inaccuracy of four ULPs + // plus 0.5 ULPs for the multiplication. + // To compute the distance to the plane, a dot product is needed - that is a multiplication and + // an addition on each number. + static const int distanceToleranceInULPs = toleranceInULPs + 1; + // The squared distance between two 3D vectors is computed the same way, but with an additional + // subtraction. + static const int distance3DToleranceInULPs = distanceToleranceInULPs + 1; + + // Convert the plane distance to its signed integer representation so the ULPs tolerance can be + // applied. For some reason, VC won't optimize two calls of the bit pattern conversion. + const BinFloat minDistBinary = ToBinary( pPosition * mPlaneNormal) - distanceToleranceInULPs; + const BinFloat maxDistBinary = minDistBinary + 2 * distanceToleranceInULPs; + + // clear the array in this strange fashion because a simple clear() would also deallocate + // the array which we want to avoid + poResults.erase( poResults.begin(), poResults.end()); + + // do a binary search for the minimal distance to start the iteration there + unsigned int index = (unsigned int)mPositions.size() / 2; + unsigned int binaryStepSize = (unsigned int)mPositions.size() / 4; + while( binaryStepSize > 1) + { + // Ugly, but conditional jumps are faster with integers than with floats + if( minDistBinary > ToBinary(mPositions[index].mDistance)) + index += binaryStepSize; + else + index -= binaryStepSize; + + binaryStepSize /= 2; + } + + // depending on the direction of the last step we need to single step a bit back or forth + // to find the actual beginning element of the range + while( index > 0 && minDistBinary < ToBinary(mPositions[index].mDistance) ) + index--; + while( index < (mPositions.size() - 1) && minDistBinary > ToBinary(mPositions[index].mDistance)) + index++; + + // Now start iterating from there until the first position lays outside of the distance range. + // Add all positions inside the distance range within the tolerance to the result aray + std::vector::const_iterator it = mPositions.begin() + index; + while( ToBinary(it->mDistance) < maxDistBinary) + { + if( distance3DToleranceInULPs >= ToBinary((it->mPosition - pPosition).SquareLength())) + poResults.push_back(it->mIndex); + ++it; + if( it == mPositions.end()) + break; + } + + // that's it +} + // ------------------------------------------------------------------------------------------------ unsigned int SpatialSort::GenerateMappingTable(std::vector& fill,float pRadius) const { diff --git a/code/SpatialSort.h b/code/SpatialSort.h index 2ecef304b..09aa1f44c 100644 --- a/code/SpatialSort.h +++ b/code/SpatialSort.h @@ -120,6 +120,15 @@ public: void FindPositions( const aiVector3D& pPosition, float pRadius, std::vector& poResults) const; + // ------------------------------------------------------------------------------------ + /** Fills an array with indices of all positions indentical to the given position. In + * opposite to FindPositions(), not an epsilon is used but a (very low) tolerance of + * four floating-point units. + * @param pPosition The position to look for vertices. + * @param poResults The container to store the indices of the found positions. + * Will be emptied by the call so it may contain anything.*/ + void FindIdenticalPositions( const aiVector3D& pPosition, + std::vector& poResults) const; // ------------------------------------------------------------------------------------ /** Compute a table that maps each vertex ID referring to a spatially close diff --git a/workspaces/vc9/assimp.vcproj b/workspaces/vc9/assimp.vcproj index d6cf618f4..8df0fab0f 100644 --- a/workspaces/vc9/assimp.vcproj +++ b/workspaces/vc9/assimp.vcproj @@ -361,7 +361,7 @@ Name="release-dll|x64" ConfigurationType="2" InheritedPropertySheets=".\shared\DllShared.vsprops" - WholeProgramOptimization="0" + WholeProgramOptimization="1" >