Update ImproveCacheLocality.cpp

- closes https://github.com/assimp/assimp/issues/5262
pull/5277/head
Kim Kulling 2023-10-07 14:54:35 +02:00
parent 666ecd3f1f
commit a531c72f7f
1 changed files with 132 additions and 101 deletions

View File

@ -7,6 +7,26 @@ Copyright (c) 2006-2022, assimp team
All rights reserved.
Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other
materials provided with the distribution./*
---------------------------------------------------------------------------
Open Asset Import Library (assimp)
---------------------------------------------------------------------------
Copyright (c) 2006-2023, assimp team
All rights reserved.
Redistribution and use of this software in source and binary forms,
@ -59,12 +79,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include <stack>
using namespace Assimp;
namespace Assimp {
// ------------------------------------------------------------------------------------------------
// Constructor to be privately used by Importer
ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
: mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() :
mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
// empty
}
@ -110,36 +130,13 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
}
// ------------------------------------------------------------------------------------------------
// Improves the cache coherency of a specific mesh
ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
// TODO: rewrite this to use std::vector or boost::shared_array
ai_assert(nullptr != pMesh);
// Check whether the input data is valid
// - there must be vertices and faces
// - all faces must be triangulated or we can't operate on them
if (!pMesh->HasFaces() || !pMesh->HasPositions())
return static_cast<ai_real>(0.f);
if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
return static_cast<ai_real>(0.f);
}
if(pMesh->mNumVertices <= mConfigCacheDepth) {
return static_cast<ai_real>(0.f);
}
ai_real fACMR = 3.f;
const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
// Input ACMR is for logging purposes only
if (!DefaultLogger::isNullLogger()) {
unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
static ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd,
unsigned int configCacheDepth, unsigned int meshNum) {
ai_real fACMR = 0.0f;
unsigned int *piFIFOStack = new unsigned int[configCacheDepth];
memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int));
unsigned int *piCur = piFIFOStack;
const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;
const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth;
// count the number of cache misses
unsigned int iCacheMisses = 0;
@ -174,21 +171,53 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
ASSIMP_LOG_WARN(szBuff);
return static_cast<ai_real>(0.f);
}
return fACMR;
}
// ------------------------------------------------------------------------------------------------
// Improves the cache coherency of a specific mesh
ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) {
// TODO: rewrite this to use std::vector or boost::shared_array
ai_assert(nullptr != pMesh);
// Check whether the input data is valid
// - there must be vertices and faces
// - all faces must be triangulated or we can't operate on them
if (!pMesh->HasFaces() || !pMesh->HasPositions())
return static_cast<ai_real>(0.f);
if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
return static_cast<ai_real>(0.f);
}
if (pMesh->mNumVertices <= mConfigCacheDepth) {
return static_cast<ai_real>(0.f);
}
ai_real fACMR = 3.f;
const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces;
// Input ACMR is for logging purposes only
if (!DefaultLogger::isNullLogger()) {
fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum);
}
// first we need to build a vertex-triangle adjacency list
VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true);
// build a list to store per-vertex caching time stamps
unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
std::vector<unsigned int> piCachingStamps;
piCachingStamps.resize(pMesh->mNumVertices);
memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int));
// allocate an empty output index buffer. We store the output indices in one large array.
// Since the number of triangles won't change the input faces can be reused. This is how
// we save thousands of redundant mini allocations for aiFace::mIndices
const unsigned int iIdxCnt = pMesh->mNumFaces * 3;
unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
unsigned int* piCSIter = piIBOutput;
std::vector<unsigned int> piIBOutput;
piIBOutput.resize(iIdxCnt);
std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin();
// allocate the flag array to hold the information
// whether a face has already been emitted or not
@ -202,7 +231,8 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
// get the largest number of referenced triangles and allocate the "candidate buffer"
unsigned int iMaxRefTris = 0; {
unsigned int iMaxRefTris = 0;
{
const unsigned int *piCur = adj.mLiveTriangles;
const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices;
for (; piCur != piCurEnd; ++piCur) {
@ -210,7 +240,8 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
}
}
ai_assert(iMaxRefTris > 0);
unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
std::vector<unsigned int> piCandidates;
piCandidates.resize(iMaxRefTris * 3);
unsigned int iCacheMisses = 0;
// ...................................................................................
@ -250,7 +281,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
unsigned int icnt = piNumTriPtrNoModify[ivdx];
unsigned int *piList = adj.GetAdjacentTriangles(ivdx);
unsigned int* piCurCandidate = piCandidates;
std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin();
// get all triangles in the neighborhood
for (unsigned int tri = 0; tri < icnt; ++tri) {
@ -261,7 +292,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
// so iterate through all vertices of the current triangle
const aiFace *pcFace = &pMesh->mFaces[fidx];
unsigned nind = pcFace->mNumIndices;
const unsigned nind = pcFace->mNumIndices;
for (unsigned ind = 0; ind < nind; ind++) {
unsigned dp = pcFace->mIndices[ind];
@ -297,7 +328,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
// get next fanning vertex
ivdx = -1;
int max_priority = -1;
for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) {
const unsigned int dp = *piCur;
// must have live triangles
@ -345,29 +376,29 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me
}
ai_real fACMR2 = 0.0f;
if (!DefaultLogger::isNullLogger()) {
fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces);
const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f;
// very intense verbose logging ... prepare for much text if there are many meshes
if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR);
}
fACMR2 *= pMesh->mNumFaces;
}
// sort the output index buffer back to the input array
piCSIter = piIBOutput;
piCSIter = piIBOutput.begin();
for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
unsigned nind = pcFace->mNumIndices;
unsigned *ind = pcFace->mIndices;
if (nind > 0) ind[0] = *piCSIter++;
if (nind > 1) ind[1] = *piCSIter++;
if (nind > 2) ind[2] = *piCSIter++;
if (nind > 0)
ind[0] = *piCSIter++;
if (nind > 1)
ind[1] = *piCSIter++;
if (nind > 2)
ind[2] = *piCSIter++;
}
// delete temporary storage
delete[] piCachingStamps;
delete[] piIBOutput;
delete[] piCandidates;
return fACMR2;
}
} // namespace Assimp