JoinIdenticalVertices: Performance optimizations by Krishty („Fuck the System”). Yields a 9x speedup in first benchmarks with meshes > 2k triangles.
git-svn-id: https://assimp.svn.sourceforge.net/svnroot/assimp/trunk@780 67173fc5-114c-0410-ac8e-9d2fd5bffc1fpull/1/head
parent
9e8a9586b3
commit
a9fd02c14e
|
@ -128,9 +128,11 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
|
||||||
uniqueVertices.reserve( pMesh->mNumVertices);
|
uniqueVertices.reserve( pMesh->mNumVertices);
|
||||||
|
|
||||||
// For each vertex the index of the vertex it was replaced by.
|
// For each vertex the index of the vertex it was replaced by.
|
||||||
|
// Since the maximal number of vertices is 2^31-1, the most significand bit can be used to mark
|
||||||
|
// whether a new vertex was created for the index (true) or if it was replaced by an existing
|
||||||
|
// unique vertex (false). This saves an additional std::vector<bool> and greatly enhances
|
||||||
|
// branching performance.
|
||||||
std::vector<unsigned int> replaceIndex( pMesh->mNumVertices, 0xffffffff);
|
std::vector<unsigned int> replaceIndex( pMesh->mNumVertices, 0xffffffff);
|
||||||
// for each vertex whether it was replaced by an existing unique vertex (true) or a new vertex was created for it (false)
|
|
||||||
std::vector<bool> isVertexUnique( pMesh->mNumVertices, false);
|
|
||||||
|
|
||||||
// A little helper to find locally close vertices faster.
|
// A little helper to find locally close vertices faster.
|
||||||
// Try to reuse the lookup table from the last step.
|
// Try to reuse the lookup table from the last step.
|
||||||
|
@ -180,7 +182,7 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
|
||||||
Vertex v(pMesh,a);
|
Vertex v(pMesh,a);
|
||||||
|
|
||||||
// collect all vertices that are close enough to the given position
|
// collect all vertices that are close enough to the given position
|
||||||
vertexFinder->FindPositions( v.position, posEpsilonSqr, verticesFound);
|
vertexFinder->FindIdenticalPositions( v.position, verticesFound);
|
||||||
unsigned int matchIndex = 0xffffffff;
|
unsigned int matchIndex = 0xffffffff;
|
||||||
|
|
||||||
// check all unique vertices close to the position if this vertex is already present among them
|
// check all unique vertices close to the position if this vertex is already present among them
|
||||||
|
@ -188,9 +190,8 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
|
||||||
|
|
||||||
const unsigned int vidx = verticesFound[b];
|
const unsigned int vidx = verticesFound[b];
|
||||||
const unsigned int uidx = replaceIndex[ vidx];
|
const unsigned int uidx = replaceIndex[ vidx];
|
||||||
if( uidx == 0xffffffff || !isVertexUnique[ vidx]) {
|
if( uidx & 0x80000000)
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
const Vertex& uv = uniqueVertices[ uidx];
|
const Vertex& uv = uniqueVertices[ uidx];
|
||||||
// Position mismatch is impossible - the vertex finder already discarded all non-matching positions
|
// Position mismatch is impossible - the vertex finder already discarded all non-matching positions
|
||||||
|
@ -239,15 +240,13 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
|
||||||
if( matchIndex != 0xffffffff)
|
if( matchIndex != 0xffffffff)
|
||||||
{
|
{
|
||||||
// store where to found the matching unique vertex
|
// store where to found the matching unique vertex
|
||||||
replaceIndex[a] = matchIndex;
|
replaceIndex[a] = matchIndex | 0x80000000;
|
||||||
isVertexUnique[a] = false;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// no unique vertex matches it upto now -> so add it
|
// no unique vertex matches it upto now -> so add it
|
||||||
replaceIndex[a] = (unsigned int)uniqueVertices.size();
|
replaceIndex[a] = (unsigned int)uniqueVertices.size();
|
||||||
uniqueVertices.push_back( v);
|
uniqueVertices.push_back( v);
|
||||||
isVertexUnique[a] = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -331,7 +330,7 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
|
||||||
{
|
{
|
||||||
aiFace& face = pMesh->mFaces[a];
|
aiFace& face = pMesh->mFaces[a];
|
||||||
for( unsigned int b = 0; b < face.mNumIndices; b++) {
|
for( unsigned int b = 0; b < face.mNumIndices; b++) {
|
||||||
face.mIndices[b] = replaceIndex[face.mIndices[b]];
|
face.mIndices[b] = replaceIndex[face.mIndices[b]] & ~0x80000000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -346,7 +345,7 @@ int JoinVerticesProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
|
||||||
{
|
{
|
||||||
const aiVertexWeight& ow = bone->mWeights[b];
|
const aiVertexWeight& ow = bone->mWeights[b];
|
||||||
// if the vertex is a unique one, translate it
|
// if the vertex is a unique one, translate it
|
||||||
if( isVertexUnique[ow.mVertexId])
|
if( !(replaceIndex[ow.mVertexId] & 0x80000000))
|
||||||
{
|
{
|
||||||
aiVertexWeight nw;
|
aiVertexWeight nw;
|
||||||
nw.mVertexId = replaceIndex[ow.mVertexId];
|
nw.mVertexId = replaceIndex[ow.mVertexId];
|
||||||
|
|
|
@ -46,6 +46,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
using namespace Assimp;
|
using namespace Assimp;
|
||||||
|
|
||||||
|
// CHAR_BIT seems to be defined under MVSC, but not under GCC. Pray that the correct value is 8.
|
||||||
|
#ifndef CHAR_BIT
|
||||||
|
# define CHAR_BIT 8
|
||||||
|
#endif
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------------
|
||||||
// Constructs a spatially sorted representation from the given position array.
|
// Constructs a spatially sorted representation from the given position array.
|
||||||
SpatialSort::SpatialSort( const aiVector3D* pPositions, unsigned int pNumPositions,
|
SpatialSort::SpatialSort( const aiVector3D* pPositions, unsigned int pNumPositions,
|
||||||
|
@ -168,6 +173,140 @@ void SpatialSort::FindPositions( const aiVector3D& pPosition,
|
||||||
// that's it
|
// that's it
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Binary, signed-integer representation of a single-precision floating-point value.
|
||||||
|
// IEEE 754 says: "If two floating-point numbers in the same format are ordered then they are
|
||||||
|
// ordered the same way when their bits are reinterpreted as sign-magnitude integers."
|
||||||
|
// This allows us to convert all floating-point numbers to signed integers of arbitrary size
|
||||||
|
// and then use them to work with ULPs (Units in the Last Place, for high-precision
|
||||||
|
// computations) or to compare them (integer comparisons are faster than floating-point
|
||||||
|
// comparisons on many platforms).
|
||||||
|
typedef signed int BinFloat;
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------
|
||||||
|
// Converts the bit pattern of a floating-point number to its signed integer representation.
|
||||||
|
BinFloat ToBinary( const float & pValue) {
|
||||||
|
|
||||||
|
// If this assertion fails, signed int is not big enough to store a float on your platform.
|
||||||
|
// Please correct the declaration of BinFloat a few lines above - but do it in a portable,
|
||||||
|
// #ifdef'd manner!
|
||||||
|
BOOST_STATIC_ASSERT( sizeof(BinFloat) >= sizeof(float));
|
||||||
|
|
||||||
|
#if defined( _MSC_VER)
|
||||||
|
// If this assertion fails, Visual C++ has finally moved to ILP64. This means that this
|
||||||
|
// code has just become legacy code! Find out the current value of _MSC_VER and modify
|
||||||
|
// the #if above so it evaluates false on the current and all upcoming VC versions (or
|
||||||
|
// on the current platform, if LP64 or LLP64 are still used on other platforms).
|
||||||
|
BOOST_STATIC_ASSERT( sizeof(BinFloat) == sizeof(float));
|
||||||
|
|
||||||
|
// This works best on Visual C++, but other compilers have their problems with it.
|
||||||
|
const BinFloat binValue = reinterpret_cast<BinFloat const &>(pValue);
|
||||||
|
#else
|
||||||
|
// On many compilers, reinterpreting a float address as an integer causes aliasing
|
||||||
|
// problems. This is an ugly but more or less safe way of doing it.
|
||||||
|
union {
|
||||||
|
float asFloat;
|
||||||
|
BinFloat asBin;
|
||||||
|
} conversion;
|
||||||
|
conversion.asBin = 0; // zero empty space in case sizeof(BinFloat) > sizeof(float)
|
||||||
|
conversion.asFloat = pValue;
|
||||||
|
const BinFloat binValue = conversion.asBin;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// floating-point numbers are of sign-magnitude format, so find out what signed number
|
||||||
|
// representation we must convert negative values to.
|
||||||
|
// See http://en.wikipedia.org/wiki/Signed_number_representations.
|
||||||
|
|
||||||
|
// Two's complement?
|
||||||
|
if( (-42 == (~42 + 1)) && (binValue & 0x80000000))
|
||||||
|
return BinFloat(1 << (CHAR_BIT * sizeof(BinFloat) - 1)) - binValue;
|
||||||
|
// One's complement?
|
||||||
|
else if( (-42 == ~42) && (binValue & 0x80000000))
|
||||||
|
return BinFloat(-0) - binValue;
|
||||||
|
// Sign-magnitude?
|
||||||
|
else if( (-42 == (42 | (-0))) && (binValue & 0x80000000)) // -0 = 1000... binary
|
||||||
|
return binValue;
|
||||||
|
else
|
||||||
|
return binValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Fills an array with indices of all positions indentical to the given position. In opposite to
|
||||||
|
// FindPositions(), not an epsilon is used but a (very low) tolerance of four floating-point units.
|
||||||
|
void SpatialSort::FindIdenticalPositions( const aiVector3D& pPosition,
|
||||||
|
std::vector<unsigned int>& poResults) const
|
||||||
|
{
|
||||||
|
// Epsilons have a huge disadvantage: they are of constant precision, while floating-point
|
||||||
|
// values are of log2 precision. If you apply e=0.01 to 100, the epsilon is rather small, but
|
||||||
|
// if you apply it to 0.001, it is enormous.
|
||||||
|
|
||||||
|
// The best way to overcome this is the unit in the last place (ULP). A precision of 2 ULPs
|
||||||
|
// tells us that a float does not differ more than 2 bits from the "real" value. ULPs are of
|
||||||
|
// logarithmic precision - around 1, they are 1÷(2^24) and around 10000, they are 0.00125.
|
||||||
|
|
||||||
|
// For standard C math, we can assume a precision of 0.5 ULPs according to IEEE 754. The
|
||||||
|
// incoming vertex positions might have already been transformed, probably using rather
|
||||||
|
// inaccurate SSE instructions, so we assume a tolerance of 4 ULPs to safely identify
|
||||||
|
// identical vertex positions.
|
||||||
|
static const int toleranceInULPs = 4;
|
||||||
|
// An interesting point is that the inaccuracy grows linear with the number of operations:
|
||||||
|
// multiplying to numbers, each inaccurate to four ULPs, results in an inaccuracy of four ULPs
|
||||||
|
// plus 0.5 ULPs for the multiplication.
|
||||||
|
// To compute the distance to the plane, a dot product is needed - that is a multiplication and
|
||||||
|
// an addition on each number.
|
||||||
|
static const int distanceToleranceInULPs = toleranceInULPs + 1;
|
||||||
|
// The squared distance between two 3D vectors is computed the same way, but with an additional
|
||||||
|
// subtraction.
|
||||||
|
static const int distance3DToleranceInULPs = distanceToleranceInULPs + 1;
|
||||||
|
|
||||||
|
// Convert the plane distance to its signed integer representation so the ULPs tolerance can be
|
||||||
|
// applied. For some reason, VC won't optimize two calls of the bit pattern conversion.
|
||||||
|
const BinFloat minDistBinary = ToBinary( pPosition * mPlaneNormal) - distanceToleranceInULPs;
|
||||||
|
const BinFloat maxDistBinary = minDistBinary + 2 * distanceToleranceInULPs;
|
||||||
|
|
||||||
|
// clear the array in this strange fashion because a simple clear() would also deallocate
|
||||||
|
// the array which we want to avoid
|
||||||
|
poResults.erase( poResults.begin(), poResults.end());
|
||||||
|
|
||||||
|
// do a binary search for the minimal distance to start the iteration there
|
||||||
|
unsigned int index = (unsigned int)mPositions.size() / 2;
|
||||||
|
unsigned int binaryStepSize = (unsigned int)mPositions.size() / 4;
|
||||||
|
while( binaryStepSize > 1)
|
||||||
|
{
|
||||||
|
// Ugly, but conditional jumps are faster with integers than with floats
|
||||||
|
if( minDistBinary > ToBinary(mPositions[index].mDistance))
|
||||||
|
index += binaryStepSize;
|
||||||
|
else
|
||||||
|
index -= binaryStepSize;
|
||||||
|
|
||||||
|
binaryStepSize /= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// depending on the direction of the last step we need to single step a bit back or forth
|
||||||
|
// to find the actual beginning element of the range
|
||||||
|
while( index > 0 && minDistBinary < ToBinary(mPositions[index].mDistance) )
|
||||||
|
index--;
|
||||||
|
while( index < (mPositions.size() - 1) && minDistBinary > ToBinary(mPositions[index].mDistance))
|
||||||
|
index++;
|
||||||
|
|
||||||
|
// Now start iterating from there until the first position lays outside of the distance range.
|
||||||
|
// Add all positions inside the distance range within the tolerance to the result aray
|
||||||
|
std::vector<Entry>::const_iterator it = mPositions.begin() + index;
|
||||||
|
while( ToBinary(it->mDistance) < maxDistBinary)
|
||||||
|
{
|
||||||
|
if( distance3DToleranceInULPs >= ToBinary((it->mPosition - pPosition).SquareLength()))
|
||||||
|
poResults.push_back(it->mIndex);
|
||||||
|
++it;
|
||||||
|
if( it == mPositions.end())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// that's it
|
||||||
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------------
|
||||||
unsigned int SpatialSort::GenerateMappingTable(std::vector<unsigned int>& fill,float pRadius) const
|
unsigned int SpatialSort::GenerateMappingTable(std::vector<unsigned int>& fill,float pRadius) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -120,6 +120,15 @@ public:
|
||||||
void FindPositions( const aiVector3D& pPosition, float pRadius,
|
void FindPositions( const aiVector3D& pPosition, float pRadius,
|
||||||
std::vector<unsigned int>& poResults) const;
|
std::vector<unsigned int>& poResults) const;
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------
|
||||||
|
/** Fills an array with indices of all positions indentical to the given position. In
|
||||||
|
* opposite to FindPositions(), not an epsilon is used but a (very low) tolerance of
|
||||||
|
* four floating-point units.
|
||||||
|
* @param pPosition The position to look for vertices.
|
||||||
|
* @param poResults The container to store the indices of the found positions.
|
||||||
|
* Will be emptied by the call so it may contain anything.*/
|
||||||
|
void FindIdenticalPositions( const aiVector3D& pPosition,
|
||||||
|
std::vector<unsigned int>& poResults) const;
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------
|
||||||
/** Compute a table that maps each vertex ID referring to a spatially close
|
/** Compute a table that maps each vertex ID referring to a spatially close
|
||||||
|
|
|
@ -361,7 +361,7 @@
|
||||||
Name="release-dll|x64"
|
Name="release-dll|x64"
|
||||||
ConfigurationType="2"
|
ConfigurationType="2"
|
||||||
InheritedPropertySheets=".\shared\DllShared.vsprops"
|
InheritedPropertySheets=".\shared\DllShared.vsprops"
|
||||||
WholeProgramOptimization="0"
|
WholeProgramOptimization="1"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCPreBuildEventTool"
|
Name="VCPreBuildEventTool"
|
||||||
|
@ -381,6 +381,7 @@
|
||||||
/>
|
/>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCLCompilerTool"
|
Name="VCCLCompilerTool"
|
||||||
|
Optimization="3"
|
||||||
InlineFunctionExpansion="2"
|
InlineFunctionExpansion="2"
|
||||||
EnableIntrinsicFunctions="true"
|
EnableIntrinsicFunctions="true"
|
||||||
FavorSizeOrSpeed="1"
|
FavorSizeOrSpeed="1"
|
||||||
|
|
Loading…
Reference in New Issue