390 lines
15 KiB
C++
390 lines
15 KiB
C++
/*
|
|
---------------------------------------------------------------------------
|
|
Open Asset Import Library (assimp)
|
|
---------------------------------------------------------------------------
|
|
|
|
Copyright (c) 2006-2018, assimp team
|
|
|
|
|
|
|
|
All rights reserved.
|
|
|
|
Redistribution and use of this software in source and binary forms,
|
|
with or without modification, are permitted provided that the following
|
|
conditions are met:
|
|
|
|
* Redistributions of source code must retain the above
|
|
copyright notice, this list of conditions and the
|
|
following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the
|
|
following disclaimer in the documentation and/or other
|
|
materials provided with the distribution.
|
|
|
|
* Neither the name of the assimp team, nor the names of its
|
|
contributors may be used to endorse or promote products
|
|
derived from this software without specific prior
|
|
written permission of the assimp team.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
---------------------------------------------------------------------------
|
|
*/
|
|
|
|
/** @file Implementation of the post processing step to improve the cache locality of a mesh.
|
|
* <br>
|
|
* The algorithm is roughly basing on this paper:
|
|
* http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
|
|
* .. although overdraw rduction isn't implemented yet ...
|
|
*/
|
|
|
|
|
|
|
|
// internal headers
|
|
#include "ImproveCacheLocality.h"
|
|
#include "VertexTriangleAdjacency.h"
|
|
#include <assimp/StringUtils.h>
|
|
#include <assimp/postprocess.h>
|
|
#include <assimp/scene.h>
|
|
#include <assimp/DefaultLogger.hpp>
|
|
#include <stdio.h>
|
|
#include <stack>
|
|
|
|
using namespace Assimp;
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
// Constructor to be privately used by Importer
|
|
ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
|
|
configCacheDepth = PP_ICL_PTCACHE_SIZE;
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
// Destructor, private as well
|
|
ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
|
|
{
|
|
// nothing to do here
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
// Returns whether the processing step is present in the given flag field.
|
|
bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
|
|
{
|
|
return (pFlags & aiProcess_ImproveCacheLocality) != 0;
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
// Setup configuration
|
|
void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
|
|
{
|
|
// AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
|
|
configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
// Executes the post processing step on the given imported data.
|
|
void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
|
|
{
|
|
if (!pScene->mNumMeshes) {
|
|
DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
|
|
return;
|
|
}
|
|
|
|
DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
|
|
|
|
float out = 0.f;
|
|
unsigned int numf = 0, numm = 0;
|
|
for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
|
|
const float res = ProcessMesh( pScene->mMeshes[a],a);
|
|
if (res) {
|
|
numf += pScene->mMeshes[a]->mNumFaces;
|
|
out += res;
|
|
++numm;
|
|
}
|
|
}
|
|
if (!DefaultLogger::isNullLogger()) {
|
|
char szBuff[128]; // should be sufficiently large in every case
|
|
ai_snprintf(szBuff,128,"Cache relevant are %u meshes (%u faces). Average output ACMR is %f",
|
|
numm,numf,out/numf);
|
|
|
|
DefaultLogger::get()->info(szBuff);
|
|
DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
|
|
}
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------------------
|
|
// Improves the cache coherency of a specific mesh
|
|
float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
|
|
{
|
|
// TODO: rewrite this to use std::vector or boost::shared_array
|
|
ai_assert(NULL != pMesh);
|
|
|
|
// Check whether the input data is valid
|
|
// - there must be vertices and faces
|
|
// - all faces must be triangulated or we can't operate on them
|
|
if (!pMesh->HasFaces() || !pMesh->HasPositions())
|
|
return 0.f;
|
|
|
|
if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
|
|
DefaultLogger::get()->error("This algorithm works on triangle meshes only");
|
|
return 0.f;
|
|
}
|
|
|
|
if(pMesh->mNumVertices <= configCacheDepth) {
|
|
return 0.f;
|
|
}
|
|
|
|
float fACMR = 3.f;
|
|
const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
|
|
|
|
// Input ACMR is for logging purposes only
|
|
if (!DefaultLogger::isNullLogger()) {
|
|
|
|
unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
|
|
memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
|
|
unsigned int* piCur = piFIFOStack;
|
|
const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
|
|
|
|
// count the number of cache misses
|
|
unsigned int iCacheMisses = 0;
|
|
for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
|
|
|
|
for (unsigned int qq = 0; qq < 3;++qq) {
|
|
bool bInCache = false;
|
|
|
|
for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
|
|
if (*pp == pcFace->mIndices[qq]) {
|
|
// the vertex is in cache
|
|
bInCache = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!bInCache) {
|
|
++iCacheMisses;
|
|
if (piCurEnd == piCur) {
|
|
piCur = piFIFOStack;
|
|
}
|
|
*piCur++ = pcFace->mIndices[qq];
|
|
}
|
|
}
|
|
}
|
|
delete[] piFIFOStack;
|
|
fACMR = (float)iCacheMisses / pMesh->mNumFaces;
|
|
if (3.0 == fACMR) {
|
|
char szBuff[128]; // should be sufficiently large in every case
|
|
|
|
// the JoinIdenticalVertices process has not been executed on this
|
|
// mesh, otherwise this value would normally be at least minimally
|
|
// smaller than 3.0 ...
|
|
ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
|
|
ASSIMP_LOG_WARN(szBuff);
|
|
return 0.f;
|
|
}
|
|
}
|
|
|
|
// first we need to build a vertex-triangle adjacency list
|
|
VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
|
|
|
|
// build a list to store per-vertex caching time stamps
|
|
unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
|
|
memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
|
|
|
|
// allocate an empty output index buffer. We store the output indices in one large array.
|
|
// Since the number of triangles won't change the input faces can be reused. This is how
|
|
// we save thousands of redundant mini allocations for aiFace::mIndices
|
|
const unsigned int iIdxCnt = pMesh->mNumFaces*3;
|
|
unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
|
|
unsigned int* piCSIter = piIBOutput;
|
|
|
|
// allocate the flag array to hold the information
|
|
// whether a face has already been emitted or not
|
|
std::vector<bool> abEmitted(pMesh->mNumFaces,false);
|
|
|
|
// dead-end vertex index stack
|
|
std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
|
|
|
|
// create a copy of the piNumTriPtr buffer
|
|
unsigned int* const piNumTriPtr = adj.mLiveTriangles;
|
|
const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
|
|
|
|
// get the largest number of referenced triangles and allocate the "candidate buffer"
|
|
unsigned int iMaxRefTris = 0; {
|
|
const unsigned int* piCur = adj.mLiveTriangles;
|
|
const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
|
|
for (;piCur != piCurEnd;++piCur) {
|
|
iMaxRefTris = std::max(iMaxRefTris,*piCur);
|
|
}
|
|
}
|
|
ai_assert(iMaxRefTris > 0);
|
|
unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
|
|
unsigned int iCacheMisses = 0;
|
|
|
|
// ...................................................................................
|
|
/** PSEUDOCODE for the algorithm
|
|
|
|
A = Build-Adjacency(I) Vertex-triangle adjacency
|
|
L = Get-Triangle-Counts(A) Per-vertex live triangle counts
|
|
C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
|
|
D = Empty-Stack() Dead-end vertex stack
|
|
E = False(Triangle-Count(I)) Per triangle emitted flag
|
|
O = Empty-Index-Buffer() Empty output buffer
|
|
f = 0 Arbitrary starting vertex
|
|
s = k+1, i = 1 Time stamp and cursor
|
|
while f >= 0 For all valid fanning vertices
|
|
N = Empty-Set() 1-ring of next candidates
|
|
for each Triangle t in Neighbors(A, f)
|
|
if !Emitted(E,t)
|
|
for each Vertex v in t
|
|
Append(O,v) Output vertex
|
|
Push(D,v) Add to dead-end stack
|
|
Insert(N,v) Register as candidate
|
|
L[v] = L[v]-1 Decrease live triangle count
|
|
if s-C[v] > k If not in cache
|
|
C[v] = s Set time stamp
|
|
s = s+1 Increment time stamp
|
|
E[t] = true Flag triangle as emitted
|
|
Select next fanning vertex
|
|
f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
|
|
return O
|
|
*/
|
|
// ...................................................................................
|
|
|
|
int ivdx = 0;
|
|
int ics = 1;
|
|
int iStampCnt = configCacheDepth+1;
|
|
while (ivdx >= 0) {
|
|
|
|
unsigned int icnt = piNumTriPtrNoModify[ivdx];
|
|
unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
|
|
unsigned int* piCurCandidate = piCandidates;
|
|
|
|
// get all triangles in the neighborhood
|
|
for (unsigned int tri = 0; tri < icnt;++tri) {
|
|
|
|
// if they have not yet been emitted, add them to the output IB
|
|
const unsigned int fidx = *piList++;
|
|
if (!abEmitted[fidx]) {
|
|
|
|
// so iterate through all vertices of the current triangle
|
|
const aiFace* pcFace = &pMesh->mFaces[ fidx ];
|
|
for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) {
|
|
const unsigned int dp = *p;
|
|
|
|
// the current vertex won't have any free triangles after this step
|
|
if (ivdx != (int)dp) {
|
|
// append the vertex to the dead-end stack
|
|
sDeadEndVStack.push(dp);
|
|
|
|
// register as candidate for the next step
|
|
*piCurCandidate++ = dp;
|
|
|
|
// decrease the per-vertex triangle counts
|
|
piNumTriPtr[dp]--;
|
|
}
|
|
|
|
// append the vertex to the output index buffer
|
|
*piCSIter++ = dp;
|
|
|
|
// if the vertex is not yet in cache, set its cache count
|
|
if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
|
|
piCachingStamps[dp] = iStampCnt++;
|
|
++iCacheMisses;
|
|
}
|
|
}
|
|
// flag triangle as emitted
|
|
abEmitted[fidx] = true;
|
|
}
|
|
}
|
|
|
|
// the vertex has now no living adjacent triangles anymore
|
|
piNumTriPtr[ivdx] = 0;
|
|
|
|
// get next fanning vertex
|
|
ivdx = -1;
|
|
int max_priority = -1;
|
|
for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
|
|
const unsigned int dp = *piCur;
|
|
|
|
// must have live triangles
|
|
if (piNumTriPtr[dp] > 0) {
|
|
int priority = 0;
|
|
|
|
// will the vertex be in cache, even after fanning occurs?
|
|
unsigned int tmp;
|
|
if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
|
|
priority = tmp;
|
|
}
|
|
|
|
// keep best candidate
|
|
if (priority > max_priority) {
|
|
max_priority = priority;
|
|
ivdx = dp;
|
|
}
|
|
}
|
|
}
|
|
// did we reach a dead end?
|
|
if (-1 == ivdx) {
|
|
// need to get a non-local vertex for which we have a good chance that it is still
|
|
// in the cache ...
|
|
while (!sDeadEndVStack.empty()) {
|
|
unsigned int iCachedIdx = sDeadEndVStack.top();
|
|
sDeadEndVStack.pop();
|
|
if (piNumTriPtr[ iCachedIdx ] > 0) {
|
|
ivdx = iCachedIdx;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (-1 == ivdx) {
|
|
// well, there isn't such a vertex. Simply get the next vertex in input order and
|
|
// hope it is not too bad ...
|
|
while (ics < (int)pMesh->mNumVertices) {
|
|
++ics;
|
|
if (piNumTriPtr[ics] > 0) {
|
|
ivdx = ics;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
float fACMR2 = 0.0f;
|
|
if (!DefaultLogger::isNullLogger()) {
|
|
fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
|
|
|
|
// very intense verbose logging ... prepare for much text if there are many meshes
|
|
if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
|
|
char szBuff[128]; // should be sufficiently large in every case
|
|
|
|
ai_snprintf(szBuff,128,"Mesh %u | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
|
|
((fACMR - fACMR2) / fACMR) * 100.f);
|
|
DefaultLogger::get()->debug(szBuff);
|
|
}
|
|
|
|
fACMR2 *= pMesh->mNumFaces;
|
|
}
|
|
// sort the output index buffer back to the input array
|
|
piCSIter = piIBOutput;
|
|
for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
|
|
pcFace->mIndices[0] = *piCSIter++;
|
|
pcFace->mIndices[1] = *piCSIter++;
|
|
pcFace->mIndices[2] = *piCSIter++;
|
|
}
|
|
|
|
// delete temporary storage
|
|
delete[] piCachingStamps;
|
|
delete[] piIBOutput;
|
|
delete[] piCandidates;
|
|
|
|
return fACMR2;
|
|
}
|