diff --git a/code/PostProcessing/ImproveCacheLocality.cpp b/code/PostProcessing/ImproveCacheLocality.cpp index 9336d6b17..01426648a 100644 --- a/code/PostProcessing/ImproveCacheLocality.cpp +++ b/code/PostProcessing/ImproveCacheLocality.cpp @@ -7,6 +7,26 @@ Copyright (c) 2006-2022, assimp team +All rights reserved. + +Redistribution and use of this software in source and binary forms, +with or without modification, are permitted provided that the following +conditions are met: + +* Redistributions of source code must retain the above + copyright notice, this list of conditions and the + following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution./* +--------------------------------------------------------------------------- +Open Asset Import Library (assimp) +--------------------------------------------------------------------------- + +Copyright (c) 2006-2023, assimp team + All rights reserved. Redistribution and use of this software in source and binary forms, @@ -59,31 +79,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -using namespace Assimp; +namespace Assimp { // ------------------------------------------------------------------------------------------------ // Constructor to be privately used by Importer -ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() -: mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) { +ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() : + mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) { // empty } // ------------------------------------------------------------------------------------------------ // Returns whether the processing step is present in the given flag field. -bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const { +bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const { return (pFlags & aiProcess_ImproveCacheLocality) != 0; } // ------------------------------------------------------------------------------------------------ // Setup configuration -void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) { +void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) { // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer - mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE); + mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE); } // ------------------------------------------------------------------------------------------------ // Executes the post processing step on the given imported data. -void ImproveCacheLocalityProcess::Execute( aiScene* pScene) { +void ImproveCacheLocalityProcess::Execute(aiScene *pScene) { if (!pScene->mNumMeshes) { ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes"); return; @@ -93,11 +113,11 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) { float out = 0.f; unsigned int numf = 0, numm = 0; - for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){ - const float res = ProcessMesh( pScene->mMeshes[a],a); + for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) { + const float res = ProcessMesh(pScene->mMeshes[a], a); if (res) { numf += pScene->mMeshes[a]->mNumFaces; - out += res; + out += res; ++numm; } } @@ -109,9 +129,54 @@ void ImproveCacheLocalityProcess::Execute( aiScene* pScene) { } } +// ------------------------------------------------------------------------------------------------ +static ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd, + unsigned int configCacheDepth, unsigned int meshNum) { + ai_real fACMR = 0.0f; + unsigned int *piFIFOStack = new unsigned int[configCacheDepth]; + memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int)); + unsigned int *piCur = piFIFOStack; + const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth; + + // count the number of cache misses + unsigned int iCacheMisses = 0; + for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) { + for (unsigned int qq = 0; qq < 3; ++qq) { + bool bInCache = false; + for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) { + if (*pp == pcFace->mIndices[qq]) { + // the vertex is in cache + bInCache = true; + break; + } + } + if (!bInCache) { + ++iCacheMisses; + if (piCurEnd == piCur) { + piCur = piFIFOStack; + } + *piCur++ = pcFace->mIndices[qq]; + } + } + } + delete[] piFIFOStack; + fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces; + if (3.0 == fACMR) { + char szBuff[128]; // should be sufficiently large in every case + + // the JoinIdenticalVertices process has not been executed on this + // mesh, otherwise this value would normally be at least minimally + // smaller than 3.0 ... + ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum); + ASSIMP_LOG_WARN(szBuff); + return static_cast(0.f); + } + return fACMR; +} + // ------------------------------------------------------------------------------------------------ // Improves the cache coherency of a specific mesh -ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) { +ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) { // TODO: rewrite this to use std::vector or boost::shared_array ai_assert(nullptr != pMesh); @@ -126,91 +191,57 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me return static_cast(0.f); } - if(pMesh->mNumVertices <= mConfigCacheDepth) { + if (pMesh->mNumVertices <= mConfigCacheDepth) { return static_cast(0.f); } ai_real fACMR = 3.f; - const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces; + const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces; // Input ACMR is for logging purposes only - if (!DefaultLogger::isNullLogger()) { - - unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth]; - memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int)); - unsigned int* piCur = piFIFOStack; - const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth; - - // count the number of cache misses - unsigned int iCacheMisses = 0; - for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) { - for (unsigned int qq = 0; qq < 3;++qq) { - bool bInCache = false; - for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) { - if (*pp == pcFace->mIndices[qq]) { - // the vertex is in cache - bInCache = true; - break; - } - } - if (!bInCache) { - ++iCacheMisses; - if (piCurEnd == piCur) { - piCur = piFIFOStack; - } - *piCur++ = pcFace->mIndices[qq]; - } - } - } - delete[] piFIFOStack; - fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces; - if (3.0 == fACMR) { - char szBuff[128]; // should be sufficiently large in every case - - // the JoinIdenticalVertices process has not been executed on this - // mesh, otherwise this value would normally be at least minimally - // smaller than 3.0 ... - ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum); - ASSIMP_LOG_WARN(szBuff); - return static_cast(0.f); - } + if (!DefaultLogger::isNullLogger()) { + fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum); } // first we need to build a vertex-triangle adjacency list - VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true); + VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true); // build a list to store per-vertex caching time stamps - unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices]; - memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int)); + std::vector piCachingStamps; + piCachingStamps.resize(pMesh->mNumVertices); + memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int)); // allocate an empty output index buffer. We store the output indices in one large array. // Since the number of triangles won't change the input faces can be reused. This is how // we save thousands of redundant mini allocations for aiFace::mIndices - const unsigned int iIdxCnt = pMesh->mNumFaces*3; - unsigned int* const piIBOutput = new unsigned int[iIdxCnt]; - unsigned int* piCSIter = piIBOutput; + const unsigned int iIdxCnt = pMesh->mNumFaces * 3; + std::vector piIBOutput; + piIBOutput.resize(iIdxCnt); + std::vector::iterator piCSIter = piIBOutput.begin(); // allocate the flag array to hold the information // whether a face has already been emitted or not - std::vector abEmitted(pMesh->mNumFaces,false); + std::vector abEmitted(pMesh->mNumFaces, false); // dead-end vertex index stack - std::stack > sDeadEndVStack; + std::stack> sDeadEndVStack; // create a copy of the piNumTriPtr buffer - unsigned int* const piNumTriPtr = adj.mLiveTriangles; + unsigned int *const piNumTriPtr = adj.mLiveTriangles; const std::vector piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices); // get the largest number of referenced triangles and allocate the "candidate buffer" - unsigned int iMaxRefTris = 0; { - const unsigned int* piCur = adj.mLiveTriangles; - const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices; - for (;piCur != piCurEnd;++piCur) { - iMaxRefTris = std::max(iMaxRefTris,*piCur); + unsigned int iMaxRefTris = 0; + { + const unsigned int *piCur = adj.mLiveTriangles; + const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices; + for (; piCur != piCurEnd; ++piCur) { + iMaxRefTris = std::max(iMaxRefTris, *piCur); } } ai_assert(iMaxRefTris > 0); - unsigned int* piCandidates = new unsigned int[iMaxRefTris*3]; + std::vector piCandidates; + piCandidates.resize(iMaxRefTris * 3); unsigned int iCacheMisses = 0; // ................................................................................... @@ -245,23 +276,23 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me int ivdx = 0; int ics = 1; - int iStampCnt = mConfigCacheDepth+1; - while (ivdx >= 0) { + int iStampCnt = mConfigCacheDepth + 1; + while (ivdx >= 0) { unsigned int icnt = piNumTriPtrNoModify[ivdx]; - unsigned int* piList = adj.GetAdjacentTriangles(ivdx); - unsigned int* piCurCandidate = piCandidates; + unsigned int *piList = adj.GetAdjacentTriangles(ivdx); + std::vector::iterator piCurCandidate = piCandidates.begin(); // get all triangles in the neighborhood - for (unsigned int tri = 0; tri < icnt;++tri) { + for (unsigned int tri = 0; tri < icnt; ++tri) { // if they have not yet been emitted, add them to the output IB const unsigned int fidx = *piList++; - if (!abEmitted[fidx]) { + if (!abEmitted[fidx]) { // so iterate through all vertices of the current triangle - const aiFace* pcFace = &pMesh->mFaces[ fidx ]; - unsigned nind = pcFace->mNumIndices; + const aiFace *pcFace = &pMesh->mFaces[fidx]; + const unsigned nind = pcFace->mNumIndices; for (unsigned ind = 0; ind < nind; ind++) { unsigned dp = pcFace->mIndices[ind]; @@ -281,7 +312,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me *piCSIter++ = dp; // if the vertex is not yet in cache, set its cache count - if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) { + if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) { piCachingStamps[dp] = iStampCnt++; ++iCacheMisses; } @@ -297,16 +328,16 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me // get next fanning vertex ivdx = -1; int max_priority = -1; - for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) { + for (std::vector::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) { const unsigned int dp = *piCur; // must have live triangles - if (piNumTriPtr[dp] > 0) { + if (piNumTriPtr[dp] > 0) { int priority = 0; // will the vertex be in cache, even after fanning occurs? unsigned int tmp; - if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) { + if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) { priority = tmp; } @@ -324,7 +355,7 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me while (!sDeadEndVStack.empty()) { unsigned int iCachedIdx = sDeadEndVStack.top(); sDeadEndVStack.pop(); - if (piNumTriPtr[ iCachedIdx ] > 0) { + if (piNumTriPtr[iCachedIdx] > 0) { ivdx = iCachedIdx; break; } @@ -333,9 +364,9 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me if (-1 == ivdx) { // well, there isn't such a vertex. Simply get the next vertex in input order and // hope it is not too bad ... - while (ics < (int)pMesh->mNumVertices) { + while (ics < (int)pMesh->mNumVertices) { ++ics; - if (piNumTriPtr[ics] > 0) { + if (piNumTriPtr[ics] > 0) { ivdx = ics; break; } @@ -345,29 +376,29 @@ ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int me } ai_real fACMR2 = 0.0f; if (!DefaultLogger::isNullLogger()) { - fACMR2 = (float)iCacheMisses / pMesh->mNumFaces; - + fACMR2 = static_cast(iCacheMisses / pMesh->mNumFaces); + const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f; // very intense verbose logging ... prepare for much text if there are many meshes - if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) { - ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f); + if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) { + ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR); } - fACMR2 *= pMesh->mNumFaces; } - // sort the output index buffer back to the input array - piCSIter = piIBOutput; - for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) { - unsigned nind = pcFace->mNumIndices; - unsigned * ind = pcFace->mIndices; - if (nind > 0) ind[0] = *piCSIter++; - if (nind > 1) ind[1] = *piCSIter++; - if (nind > 2) ind[2] = *piCSIter++; - } - // delete temporary storage - delete[] piCachingStamps; - delete[] piIBOutput; - delete[] piCandidates; + // sort the output index buffer back to the input array + piCSIter = piIBOutput.begin(); + for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) { + unsigned nind = pcFace->mNumIndices; + unsigned *ind = pcFace->mIndices; + if (nind > 0) + ind[0] = *piCSIter++; + if (nind > 1) + ind[1] = *piCSIter++; + if (nind > 2) + ind[2] = *piCSIter++; + } return fACMR2; } + +} // namespace Assimp