From acad9a06e9e7f6fe58199357ef7951470f39126c Mon Sep 17 00:00:00 2001
From: Haroon Qureshi <haroonq@google.com>
Date: Fri, 26 Oct 2018 15:36:34 -0700
Subject: [PATCH] Add support for importing GLTF2 animations.

Refactors the glTF2 internal classes to more closely reflect the structure
of the actual GLTF2 file format. Adds implementations for reading skins
and animations from GLTF2 files into those structures.

Also provides implementations for converting skins and animations from GLTF
into assimp data structures.  Special handling is required for bone weights
since assimp stores vertex-weights-per-bone whereas GLTF2 stores
bone-weights-per-vertex.  Only supports keyframed LINEAR animation data;
STEP and CUBICSPLINE is not currently supported.
---
 code/glTF2Asset.h         |  71 +++++-----
 code/glTF2Asset.inl       |  94 ++++++++++++-
 code/glTF2AssetWriter.inl |  43 ++++--
 code/glTF2Exporter.cpp    | 194 ++++++++++++--------------
 code/glTF2Importer.cpp    | 281 +++++++++++++++++++++++++++++++++-----
 code/glTF2Importer.h      |   2 +-
 6 files changed, 488 insertions(+), 197 deletions(-)
diff --git a/code/glTF2Asset.h b/code/glTF2Asset.h
index 0def5b74d..92be82f3b 100644
--- a/code/glTF2Asset.h
+++ b/code/glTF2Asset.h
@@ -303,6 +303,20 @@ namespace glTF2
         TextureType_UNSIGNED_SHORT_5_5_5_1 = 32820
     };
 
+    //! Values for the Animation::Target::path field
+    enum AnimationPath {
+        AnimationPath_TRANSLATION,
+        AnimationPath_ROTATION,
+        AnimationPath_SCALE,
+        AnimationPath_WEIGHTS,
+    };
+
+    //! Values for the Animation::Sampler::interpolation field
+    enum Interpolation {
+        Interpolation_LINEAR,
+        Interpolation_STEP,
+        Interpolation_CUBICSPLINE,
+    };
 
     //! Values for the Accessor::type field (helper class)
     class AttribType
@@ -742,7 +756,7 @@ namespace glTF2
         //extension: KHR_materials_pbrSpecularGlossiness
         Nullable<PbrSpecularGlossiness> pbrSpecularGlossiness;
 
-        //extension: KHR_materials_unlit 
+        //extension: KHR_materials_unlit
         bool unlit;
 
         Material() { SetDefaults(); }
@@ -870,56 +884,35 @@ namespace glTF2
 
     struct Animation : public Object
     {
-        struct AnimSampler {
-            std::string id;               //!< The ID of this sampler.
-            std::string input;            //!< The ID of a parameter in this animation to use as key-frame input.
-            std::string interpolation;    //!< Type of interpolation algorithm to use between key-frames.
-            std::string output;           //!< The ID of a parameter in this animation to use as key-frame output.
+        struct Sampler {
+            Sampler() : interpolation(Interpolation_LINEAR) {}
+
+            Ref<Accessor> input;          //!< Accessor reference to the buffer storing the key-frame times.
+            Ref<Accessor> output;         //!< Accessor reference to the buffer storing the key-frame values.
+            Interpolation interpolation;  //!< Type of interpolation algorithm to use between key-frames.
         };
 
-        struct AnimChannel {
-            int sampler;                 //!< The index of a sampler in the containing animation's samplers property.
+        struct Target {
+            Target() : path(AnimationPath_TRANSLATION) {}
 
-            struct AnimTarget {
-                Ref<Node> node;          //!< The node to animate.
-                std::string path;        //!< The name of property of the node to animate ("translation", "rotation", or "scale").
-            } target;
+            Ref<Node> node;                //!< The node to animate.
+            AnimationPath path;            //!< The property of the node to animate.
         };
 
-        struct AnimParameters {
-            Ref<Accessor> TIME;           //!< Accessor reference to a buffer storing a array of floating point scalar values.
-            Ref<Accessor> rotation;       //!< Accessor reference to a buffer storing a array of four-component floating-point vectors.
-            Ref<Accessor> scale;          //!< Accessor reference to a buffer storing a array of three-component floating-point vectors.
-            Ref<Accessor> translation;    //!< Accessor reference to a buffer storing a array of three-component floating-point vectors.
+        struct Channel {
+            Channel() : sampler(-1) {}
+
+            int sampler;                   //!< The sampler index containing the animation data.
+            Target target;                 //!< The node and property to animate.
         };
 
-        // AnimChannel Channels[3];            //!< Connect the output values of the key-frame animation to a specific node in the hierarchy.
-        // AnimParameters Parameters;          //!< The samplers that interpolate between the key-frames.
-        // AnimSampler Samplers[3];            //!< The parameterized inputs representing the key-frame data.
-
-        std::vector<AnimChannel> Channels;            //!< Connect the output values of the key-frame animation to a specific node in the hierarchy.
-        AnimParameters Parameters;                    //!< The samplers that interpolate between the key-frames.
-        std::vector<AnimSampler> Samplers;         //!< The parameterized inputs representing the key-frame data.
+        std::vector<Sampler> samplers;     //!< All the key-frame data for this animation.
+        std::vector<Channel> channels;     //!< Data to connect nodes to key-frames.
 
         Animation() {}
         void Read(Value& obj, Asset& r);
-
-        //! Get accessor given an animation parameter name.
-        Ref<Accessor> GetAccessor(std::string name) {
-            if (name == "TIME") {
-                return Parameters.TIME;
-            } else if (name == "rotation") {
-                return Parameters.rotation;
-            } else if (name == "scale") {
-                return Parameters.scale;
-            } else if (name == "translation") {
-                return Parameters.translation;
-            }
-            return Ref<Accessor>();
-        }
     };
 
-
     //! Base class for LazyDict that acts as an interface
     class LazyDictBase
     {
diff --git a/code/glTF2Asset.inl b/code/glTF2Asset.inl
index 11e3965e5..687e16ce1 100755
--- a/code/glTF2Asset.inl
+++ b/code/glTF2Asset.inl
@@ -461,7 +461,7 @@ inline void Buffer::EncodedRegion_SetCurrent(const std::string& pID)
 	throw DeadlyImportError("GLTF: EncodedRegion with ID: \"" + pID + "\" not found.");
 }
 
-inline 
+inline
 bool Buffer::ReplaceData(const size_t pBufferData_Offset, const size_t pBufferData_Count, const uint8_t* pReplace_Data, const size_t pReplace_Count)
 {
 
@@ -483,8 +483,8 @@ bool Buffer::ReplaceData(const size_t pBufferData_Offset, const size_t pBufferDa
 
 	return true;
 }
-	
-inline 
+
+inline
 bool Buffer::ReplaceData_joint(const size_t pBufferData_Offset, const size_t pBufferData_Count, const uint8_t* pReplace_Data, const size_t pReplace_Count)
 {
 	if((pBufferData_Count == 0) || (pReplace_Count == 0) || (pReplace_Data == nullptr)) {
@@ -718,7 +718,7 @@ inline void Image::Read(Value& obj, Asset& r)
 
             this->mDataLength = this->bufferView->byteLength;
             // maybe this memcpy could be avoided if aiTexture does not delete[] pcData at destruction.
-			
+
 			this->mData.reset(new uint8_t[this->mDataLength]);
 			memcpy(this->mData.get(), buffer->GetPointer() + this->bufferView->byteOffset, this->mDataLength);
 
@@ -1083,6 +1083,10 @@ inline void Node::Read(Value& obj, Asset& r)
         if (meshRef) this->meshes.push_back(meshRef);
     }
 
+    if (Value* skin = FindUInt(obj, "skin")) {
+        this->skin = r.skins.Retrieve(skin->GetUint());
+    }
+
     if (Value* camera = FindUInt(obj, "camera")) {
         this->camera = r.cameras.Retrieve(camera->GetUint());
         if (this->camera)
@@ -1102,6 +1106,82 @@ inline void Scene::Read(Value& obj, Asset& r)
     }
 }
 
+inline void Skin::Read(Value& obj, Asset& r)
+{
+    if (Value* matrices = FindUInt(obj, "inverseBindMatrices")) {
+        inverseBindMatrices = r.accessors.Retrieve(matrices->GetUint());
+    }
+
+    if (Value* joints = FindArray(obj, "joints")) {
+        for (unsigned i = 0; i < joints->Size(); ++i) {
+            if (!(*joints)[i].IsUint()) continue;
+            Ref<Node> node = r.nodes.Retrieve((*joints)[i].GetUint());
+            if (node) {
+                this->jointNames.push_back(node);
+            }
+        }
+    }
+}
+
+inline void Animation::Read(Value& obj, Asset& r)
+{
+    if (Value* samplers = FindArray(obj, "samplers")) {
+        for (unsigned i = 0; i < samplers->Size(); ++i) {
+            Value& sampler = (*samplers)[i];
+
+            Sampler s;
+            if (Value* input = FindUInt(sampler, "input")) {
+                s.input = r.accessors.Retrieve(input->GetUint());
+            }
+            if (Value* output = FindUInt(sampler, "output")) {
+                s.output = r.accessors.Retrieve(output->GetUint());
+            }
+            s.interpolation = Interpolation_LINEAR;
+            if (Value* interpolation = FindString(sampler, "interpolation")) {
+                const std::string interp = interpolation->GetString();
+                if (interp == "LINEAR") {
+                  s.interpolation = Interpolation_LINEAR;
+                } else if (interp == "STEP") {
+                  s.interpolation = Interpolation_STEP;
+                } else if (interp == "CUBICSPLINE") {
+                  s.interpolation = Interpolation_CUBICSPLINE;
+                }
+            }
+            this->samplers.push_back(s);
+        }
+    }
+
+    if (Value* channels = FindArray(obj, "channels")) {
+        for (unsigned i = 0; i < channels->Size(); ++i) {
+            Value& channel = (*channels)[i];
+
+            Channel c;
+            if (Value* sampler = FindUInt(channel, "sampler")) {
+                c.sampler = sampler->GetUint();
+            }
+
+            if (Value* target = FindObject(channel, "target")) {
+                if (Value* node = FindUInt(*target, "node")) {
+                    c.target.node = r.nodes.Retrieve(node->GetUint());
+                }
+                if (Value* path = FindString(*target, "path")) {
+                    const std::string p = path->GetString();
+                    if (p == "translation") {
+                        c.target.path = AnimationPath_TRANSLATION;
+                    } else if (p == "rotation") {
+                        c.target.path = AnimationPath_ROTATION;
+                    } else if (p == "scale") {
+                        c.target.path = AnimationPath_SCALE;
+                    } else if (p == "weights") {
+                        c.target.path = AnimationPath_WEIGHTS;
+                    }
+                }
+            }
+            this->channels.push_back(c);
+        }
+    }
+}
+
 inline void AssetMetadata::Read(Document& doc)
 {
     if (Value* obj = FindObject(doc, "asset")) {
@@ -1277,6 +1357,12 @@ inline void Asset::Load(const std::string& pFile, bool isBinary)
         }
     }
 
+    if (Value* animsArray = FindArray(doc, "animations")) {
+        for (unsigned int i = 0; i < animsArray->Size(); ++i) {
+            animations.Retrieve(i);
+        }
+    }
+
     // Clean up
     for (size_t i = 0; i < mDicts.size(); ++i) {
         mDicts[i]->DetachFromDocument();
diff --git a/code/glTF2AssetWriter.inl b/code/glTF2AssetWriter.inl
index 0579dfdac..50d855aaa 100644
--- a/code/glTF2AssetWriter.inl
+++ b/code/glTF2AssetWriter.inl
@@ -113,10 +113,10 @@ namespace glTF2 {
         /****************** Channels *******************/
         Value channels;
         channels.SetArray();
-        channels.Reserve(unsigned(a.Channels.size()), w.mAl);
+        channels.Reserve(unsigned(a.channels.size()), w.mAl);
 
-        for (size_t i = 0; i < unsigned(a.Channels.size()); ++i) {
-            Animation::AnimChannel& c = a.Channels[i];
+        for (size_t i = 0; i < unsigned(a.channels.size()); ++i) {
+            Animation::Channel& c = a.channels[i];
             Value valChannel;
             valChannel.SetObject();
             {
@@ -126,7 +126,20 @@ namespace glTF2 {
                 valTarget.SetObject();
                 {
                     valTarget.AddMember("node", c.target.node->index, w.mAl);
-                    valTarget.AddMember("path", c.target.path, w.mAl);
+                    switch (c.target.path) {
+                        case AnimationPath_TRANSLATION:
+                            valTarget.AddMember("path", "translation", w.mAl);
+                            break;
+                        case AnimationPath_ROTATION:
+                            valTarget.AddMember("path", "rotation", w.mAl);
+                            break;
+                        case AnimationPath_SCALE:
+                            valTarget.AddMember("path", "scale", w.mAl);
+                            break;
+                        case AnimationPath_WEIGHTS:
+                            valTarget.AddMember("path", "weights", w.mAl);
+                            break;
+                    }
                 }
                 valChannel.AddMember("target", valTarget, w.mAl);
             }
@@ -138,16 +151,24 @@ namespace glTF2 {
         Value valSamplers;
         valSamplers.SetArray();
 
-        for (size_t i = 0; i < unsigned(a.Samplers.size()); ++i) {
-            Animation::AnimSampler& s = a.Samplers[i];
+        for (size_t i = 0; i < unsigned(a.samplers.size()); ++i) {
+            Animation::Sampler& s = a.samplers[i];
             Value valSampler;
             valSampler.SetObject();
             {
-                Ref<Accessor> inputAccessor = a.GetAccessor(s.input);
-                Ref<Accessor> outputAccessor = a.GetAccessor(s.output);
-                valSampler.AddMember("input", inputAccessor->index, w.mAl);
-                valSampler.AddMember("interpolation", s.interpolation, w.mAl);
-                valSampler.AddMember("output", outputAccessor->index, w.mAl);
+                valSampler.AddMember("input", s.input->index, w.mAl);
+                switch (s.interpolation) {
+                    case Interpolation_LINEAR:
+                        valSampler.AddMember("path", "LINEAR", w.mAl);
+                        break;
+                    case Interpolation_STEP:
+                        valSampler.AddMember("path", "STEP", w.mAl);
+                        break;
+                    case Interpolation_CUBICSPLINE:
+                        valSampler.AddMember("path", "CUBICSPLINE", w.mAl);
+                        break;
+                }
+                valSampler.AddMember("output", s.output->index, w.mAl);
             }
             valSamplers.PushBack(valSampler, w.mAl);
         }
diff --git a/code/glTF2Exporter.cpp b/code/glTF2Exporter.cpp
index 564533de4..83aae5136 100644
--- a/code/glTF2Exporter.cpp
+++ b/code/glTF2Exporter.cpp
@@ -961,92 +961,92 @@ void glTF2Exporter::ExportMetadata()
     asset.generator = buffer;
 }
 
-inline void ExtractAnimationData(Asset& mAsset, std::string& animId, Ref<Animation>& animRef, Ref<Buffer>& buffer, const aiNodeAnim* nodeChannel, float ticksPerSecond)
+inline Ref<Accessor> GetSamplerInputRef(Asset& asset, std::string& animId, Ref<Buffer>& buffer, std::vector<float>& times)
 {
-    // Loop over the data and check to see if it exactly matches an existing buffer.
-    //    If yes, then reference the existing corresponding accessor.
-    //    Otherwise, add to the buffer and create a new accessor.
+    return ExportData(asset, animId, buffer, times.size(), &times[0], AttribType::SCALAR, AttribType::SCALAR, ComponentType_FLOAT);
+}
 
-    size_t counts[3] = {
-        nodeChannel->mNumPositionKeys,
-        nodeChannel->mNumScalingKeys,
-        nodeChannel->mNumRotationKeys,
-    };
-    size_t numKeyframes = 1;
-    for (int i = 0; i < 3; ++i) {
-        if (counts[i] > numKeyframes) {
-            numKeyframes = counts[i];
-        }
+inline void ExtractTranslationSampler(Asset& asset, std::string& animId, Ref<Buffer>& buffer, const aiNodeAnim* nodeChannel, float ticksPerSecond, Animation::Sampler& sampler)
+{
+    const unsigned int numKeyframes = nodeChannel->mNumPositionKeys;
+    if (numKeyframes == 0) {
+        return;
     }
 
-    //-------------------------------------------------------
-    // Extract TIME parameter data.
-    // Check if the timeStamps are the same for mPositionKeys, mRotationKeys, and mScalingKeys.
-    if(nodeChannel->mNumPositionKeys > 0) {
-        typedef float TimeType;
-        std::vector<TimeType> timeData;
-        timeData.resize(numKeyframes);
-        for (size_t i = 0; i < numKeyframes; ++i) {
-            size_t frameIndex = i * nodeChannel->mNumPositionKeys / numKeyframes;
-            // mTime is measured in ticks, but GLTF time is measured in seconds, so convert.
-            // Check if we have to cast type here. e.g. uint16_t()
-            timeData[i] = static_cast<float>(nodeChannel->mPositionKeys[frameIndex].mTime / ticksPerSecond);
-        }
-
-        Ref<Accessor> timeAccessor = ExportData(mAsset, animId, buffer, static_cast<unsigned int>(numKeyframes), &timeData[0], AttribType::SCALAR, AttribType::SCALAR, ComponentType_FLOAT);
-        if (timeAccessor) animRef->Parameters.TIME = timeAccessor;
+    const vec3 kZeros = {0, 0, 0};
+    std::vector<float> times(numKeyframes);
+    std::vector<vec3> values(numKeyframes, kZeros);
+    for (unsigned int i = 0; i < numKeyframes; ++i) {
+        const aiVectorKey& key = nodeChannel->mPositionKeys[i];
+        // mTime is measured in ticks, but GLTF time is measured in seconds, so convert.
+        times[i] = static_cast<float>(key.mTime / ticksPerSecond);
+        values[i][0] = key.mValue.x;
+        values[i][1] = key.mValue.y;
+        values[i][2] = key.mValue.z;
     }
 
-    //-------------------------------------------------------
-    // Extract translation parameter data
-    if(nodeChannel->mNumPositionKeys > 0) {
-        C_STRUCT aiVector3D* translationData = new aiVector3D[numKeyframes];
-        for (size_t i = 0; i < numKeyframes; ++i) {
-            size_t frameIndex = i * nodeChannel->mNumPositionKeys / numKeyframes;
-            translationData[i] = nodeChannel->mPositionKeys[frameIndex].mValue;
-        }
+    sampler.input = GetSamplerInputRef(asset, animId, buffer, times);
+    sampler.output = ExportData(asset, animId, buffer, numKeyframes, &values[0], AttribType::VEC3, AttribType::VEC3, ComponentType_FLOAT);
+    sampler.interpolation = Interpolation_LINEAR;
+}
 
-        Ref<Accessor> tranAccessor = ExportData(mAsset, animId, buffer, static_cast<unsigned int>(numKeyframes), translationData, AttribType::VEC3, AttribType::VEC3, ComponentType_FLOAT);
-        if ( tranAccessor ) {
-            animRef->Parameters.translation = tranAccessor;
-        }
-        delete[] translationData;
+inline void ExtractScaleSampler(Asset& asset, std::string& animId, Ref<Buffer>& buffer, const aiNodeAnim* nodeChannel, float ticksPerSecond, Animation::Sampler& sampler)
+{
+    const unsigned int numKeyframes = nodeChannel->mNumScalingKeys;
+    if (numKeyframes == 0) {
+        return;
     }
 
-    //-------------------------------------------------------
-    // Extract scale parameter data
-    if(nodeChannel->mNumScalingKeys > 0) {
-        C_STRUCT aiVector3D* scaleData = new aiVector3D[numKeyframes];
-        for (size_t i = 0; i < numKeyframes; ++i) {
-            size_t frameIndex = i * nodeChannel->mNumScalingKeys / numKeyframes;
-            scaleData[i] = nodeChannel->mScalingKeys[frameIndex].mValue;
-        }
-
-        Ref<Accessor> scaleAccessor = ExportData(mAsset, animId, buffer, static_cast<unsigned int>(numKeyframes), scaleData, AttribType::VEC3, AttribType::VEC3, ComponentType_FLOAT);
-        if ( scaleAccessor ) {
-            animRef->Parameters.scale = scaleAccessor;
-        }
-        delete[] scaleData;
+    const vec3 kZeros = {0, 0, 0};
+    std::vector<float> times(numKeyframes);
+    std::vector<vec3> values(numKeyframes, kZeros);
+    for (unsigned int i = 0; i < numKeyframes; ++i) {
+        const aiVectorKey& key = nodeChannel->mScalingKeys[i];
+        // mTime is measured in ticks, but GLTF time is measured in seconds, so convert.
+        times[i] = static_cast<float>(key.mTime / ticksPerSecond);
+        values[i][0] = key.mValue.x;
+        values[i][1] = key.mValue.y;
+        values[i][2] = key.mValue.z;
     }
 
-    //-------------------------------------------------------
-    // Extract rotation parameter data
-    if(nodeChannel->mNumRotationKeys > 0) {
-        vec4* rotationData = new vec4[numKeyframes];
-        for (size_t i = 0; i < numKeyframes; ++i) {
-            size_t frameIndex = i * nodeChannel->mNumRotationKeys / numKeyframes;
-            rotationData[i][0] = nodeChannel->mRotationKeys[frameIndex].mValue.x;
-            rotationData[i][1] = nodeChannel->mRotationKeys[frameIndex].mValue.y;
-            rotationData[i][2] = nodeChannel->mRotationKeys[frameIndex].mValue.z;
-            rotationData[i][3] = nodeChannel->mRotationKeys[frameIndex].mValue.w;
-        }
+    sampler.input = GetSamplerInputRef(asset, animId, buffer, times);
+    sampler.output = ExportData(asset, animId, buffer, numKeyframes, &values[0], AttribType::VEC3, AttribType::VEC3, ComponentType_FLOAT);
+    sampler.interpolation = Interpolation_LINEAR;
+}
 
-        Ref<Accessor> rotAccessor = ExportData(mAsset, animId, buffer, static_cast<unsigned int>(numKeyframes), rotationData, AttribType::VEC4, AttribType::VEC4, ComponentType_FLOAT);
-        if ( rotAccessor ) {
-            animRef->Parameters.rotation = rotAccessor;
-        }
-        delete[] rotationData;
+inline void ExtractRotationSampler(Asset& asset, std::string& animId, Ref<Buffer>& buffer, const aiNodeAnim* nodeChannel, float ticksPerSecond, Animation::Sampler& sampler)
+{
+    const unsigned int numKeyframes = nodeChannel->mNumRotationKeys;
+    if (numKeyframes == 0) {
+        return;
     }
+
+    const vec4 kZeros = {0, 0, 0, 0};
+    std::vector<float> times(numKeyframes);
+    std::vector<vec4> values(numKeyframes, kZeros);
+    for (unsigned int i = 0; i < numKeyframes; ++i) {
+        const aiQuatKey& key = nodeChannel->mRotationKeys[i];
+        // mTime is measured in ticks, but GLTF time is measured in seconds, so convert.
+        times[i] = static_cast<float>(key.mTime / ticksPerSecond);
+        values[i][0] = key.mValue.x;
+        values[i][1] = key.mValue.y;
+        values[i][2] = key.mValue.z;
+        values[i][3] = key.mValue.w;
+    }
+
+    sampler.input = GetSamplerInputRef(asset, animId, buffer, times);
+    sampler.output = ExportData(asset, animId, buffer, numKeyframes, &values[0], AttribType::VEC4, AttribType::VEC4, ComponentType_FLOAT);
+    sampler.interpolation = Interpolation_LINEAR;
+}
+
+static void AddSampler(Ref<Animation>& animRef, Ref<Node>& nodeRef, Animation::Sampler& sampler, AnimationPath path)
+{
+      Animation::Channel channel;
+      channel.sampler = static_cast<int>(animRef->samplers.size());
+      channel.target.path = path;
+      channel.target.node = nodeRef;
+      animRef->channels.push_back(channel);
+      animRef->samplers.push_back(sampler);
 }
 
 void glTF2Exporter::ExportAnimations()
@@ -1055,6 +1055,7 @@ void glTF2Exporter::ExportAnimations()
 
     for (unsigned int i = 0; i < mScene->mNumAnimations; ++i) {
         const aiAnimation* anim = mScene->mAnimations[i];
+        const float ticksPerSecond = static_cast<float>(anim->mTicksPerSecond);
 
         std::string nameAnim = "anim";
         if (anim->mName.length > 0) {
@@ -1070,46 +1071,19 @@ void glTF2Exporter::ExportAnimations()
             name = mAsset->FindUniqueID(name, "animation");
             Ref<Animation> animRef = mAsset->animations.Create(name);
 
-            // Parameters
-            ExtractAnimationData(*mAsset, name, animRef, bufferRef, nodeChannel, static_cast<float>(anim->mTicksPerSecond));
+            Ref<Node> animNode = mAsset->nodes.Get(nodeChannel->mNodeName.C_Str());
 
-            for (unsigned int j = 0; j < 3; ++j) {
-                std::string channelType;
-                int channelSize;
-                switch (j) {
-                    case 0:
-                        channelType = "rotation";
-                        channelSize = nodeChannel->mNumRotationKeys;
-                        break;
-                    case 1:
-                        channelType = "scale";
-                        channelSize = nodeChannel->mNumScalingKeys;
-                        break;
-                    case 2:
-                        channelType = "translation";
-                        channelSize = nodeChannel->mNumPositionKeys;
-                        break;
-                }
+            Animation::Sampler translationSampler;
+            ExtractTranslationSampler(*mAsset, name, bufferRef, nodeChannel, ticksPerSecond, translationSampler);
+            AddSampler(animRef, animNode, translationSampler, AnimationPath_TRANSLATION);
 
-                if (channelSize < 1) { continue; }
-
-                Animation::AnimChannel tmpAnimChannel;
-                Animation::AnimSampler tmpAnimSampler;
-
-                tmpAnimChannel.sampler = static_cast<int>(animRef->Samplers.size());
-                tmpAnimChannel.target.path = channelType;
-                tmpAnimSampler.output = channelType;
-                tmpAnimSampler.id = name + "_" + channelType;
-
-                tmpAnimChannel.target.node = mAsset->nodes.Get(nodeChannel->mNodeName.C_Str());
-
-                tmpAnimSampler.input = "TIME";
-                tmpAnimSampler.interpolation = "LINEAR";
-
-                animRef->Channels.push_back(tmpAnimChannel);
-                animRef->Samplers.push_back(tmpAnimSampler);
-            }
+            Animation::Sampler rotationSampler;
+            ExtractRotationSampler(*mAsset, name, bufferRef, nodeChannel, ticksPerSecond, rotationSampler);
+            AddSampler(animRef, animNode, rotationSampler, AnimationPath_ROTATION);
 
+            Animation::Sampler scaleSampler;
+            ExtractScaleSampler(*mAsset, name, bufferRef, nodeChannel, ticksPerSecond, scaleSampler);
+            AddSampler(animRef, animNode, scaleSampler, AnimationPath_SCALE);
         }
 
         // Assimp documentation staes this is not used (not implemented)
diff --git a/code/glTF2Importer.cpp b/code/glTF2Importer.cpp
index 4b99fc8da..ed7c55792 100755
--- a/code/glTF2Importer.cpp
+++ b/code/glTF2Importer.cpp
@@ -54,6 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <assimp/CreateAnimMesh.h>
 
 #include <memory>
+#include <unordered_map>
 
 #include "MakeVerboseFormat.h"
 
@@ -580,7 +581,7 @@ void glTF2Importer::ImportMeshes(glTF2::Asset& r)
             }
             else { // no indices provided so directly generate from counts
 
-                // use the already determined count as it includes checks 
+                // use the already determined count as it includes checks
                 unsigned int count = aim->mNumVertices;
 
                 switch (prim.mode) {
@@ -702,26 +703,7 @@ void glTF2Importer::ImportCameras(glTF2::Asset& r)
     }
 }
 
-aiNode* ImportNode(aiScene* pScene, glTF2::Asset& r, std::vector<unsigned int>& meshOffsets, glTF2::Ref<glTF2::Node>& ptr)
-{
-    Node& node = *ptr;
-
-    std::string nameOrId = node.name.empty() ? node.id : node.name;
-
-    aiNode* ainode = new aiNode(nameOrId);
-
-    if (!node.children.empty()) {
-        ainode->mNumChildren = unsigned(node.children.size());
-        ainode->mChildren = new aiNode*[ainode->mNumChildren];
-
-        for (unsigned int i = 0; i < ainode->mNumChildren; ++i) {
-            aiNode* child = ImportNode(pScene, r, meshOffsets, node.children[i]);
-            child->mParent = ainode;
-            ainode->mChildren[i] = child;
-        }
-    }
-
-    aiMatrix4x4& matrix = ainode->mTransformation;
+static void GetNodeTransform(aiMatrix4x4& matrix, const glTF2::Node& node) {
     if (node.matrix.isPresent) {
         CopyValue(node.matrix.value, matrix);
     }
@@ -748,24 +730,112 @@ aiNode* ImportNode(aiScene* pScene, glTF2::Asset& r, std::vector<unsigned int>&
             matrix = matrix * s;
         }
     }
+}
+
+static void BuildVertexWeightMapping(Ref<Mesh>& mesh, std::vector<std::vector<aiVertexWeight>>& map)
+{
+    Mesh::Primitive::Attributes& attr = mesh->primitives[0].attributes;
+    if (attr.weight.empty() || attr.joint.empty()) {
+        return;
+    }
+    if (attr.weight[0]->count != attr.joint[0]->count) {
+        return;
+    }
+
+    const int num_vertices = attr.weight[0]->count;
+
+    struct Weights { float values[4]; };
+    struct Indices { uint8_t values[4]; };
+    Weights* weights = nullptr;
+    Indices* indices = nullptr;
+    attr.weight[0]->ExtractData(weights);
+    attr.joint[0]->ExtractData(indices);
+
+    for (int i = 0; i < num_vertices; ++i) {
+        for (int j = 0; j < 4; ++j) {
+            const unsigned int bone = indices[i].values[j];
+            const float weight = weights[i].values[j];
+            if (weight > 0 && bone >= 0 && bone < map.size()) {
+                map[bone].reserve(8);
+                map[bone].emplace_back(i, weight);
+            }
+        }
+    }
+
+    delete[] weights;
+    delete[] indices;
+}
+
+aiNode* ImportNode(aiScene* pScene, glTF2::Asset& r, std::vector<unsigned int>& meshOffsets, glTF2::Ref<glTF2::Node>& ptr)
+{
+    Node& node = *ptr;
+
+    std::string nameOrId = node.name.empty() ? node.id : node.name;
+
+    aiNode* ainode = new aiNode(nameOrId);
+
+    if (!node.children.empty()) {
+        ainode->mNumChildren = unsigned(node.children.size());
+        ainode->mChildren = new aiNode*[ainode->mNumChildren];
+
+        for (unsigned int i = 0; i < ainode->mNumChildren; ++i) {
+            aiNode* child = ImportNode(pScene, r, meshOffsets, node.children[i]);
+            child->mParent = ainode;
+            ainode->mChildren[i] = child;
+        }
+    }
+
+    GetNodeTransform(ainode->mTransformation, node);
 
     if (!node.meshes.empty()) {
-        int count = 0;
-        for (size_t i = 0; i < node.meshes.size(); ++i) {
-            int idx = node.meshes[i].GetIndex();
-            count += meshOffsets[idx + 1] - meshOffsets[idx];
-        }
-        ainode->mNumMeshes = count;
+        int mesh_idx = node.meshes[0].GetIndex();
+        int count = meshOffsets[mesh_idx + 1] - meshOffsets[mesh_idx];
+        // GLTF files contain at most 1 mesh per node.
+        assert(node.meshes.size() == 1);
+        assert(count == 1);
 
+        ainode->mNumMeshes = count;
         ainode->mMeshes = new unsigned int[count];
 
-        int k = 0;
-        for (size_t i = 0; i < node.meshes.size(); ++i) {
-            int idx = node.meshes[i].GetIndex();
-            for (unsigned int j = meshOffsets[idx]; j < meshOffsets[idx + 1]; ++j, ++k) {
-                ainode->mMeshes[k] = j;
+        if (node.skin) {
+            aiMesh* mesh = pScene->mMeshes[meshOffsets[mesh_idx]];
+            mesh->mNumBones = node.skin->jointNames.size();
+            mesh->mBones = new aiBone*[mesh->mNumBones];
+
+            // GLTF and Assimp choose to store bone weights differently.
+            // GLTF has each vertex specify which bones influence the vertex.
+            // Assimp has each bone specify which vertices it has influence over.
+            // To convert this data, we first read over the vertex data and pull
+            // out the bone-to-vertex mapping.  Then, when creating the aiBones,
+            // we copy the bone-to-vertex mapping into the bone.  This is unfortunate
+            // both because it's somewhat slow and because, for many applications,
+            // we then need to reconvert the data back into the vertex-to-bone
+            // mapping which makes things doubly-slow.
+            std::vector<std::vector<aiVertexWeight>> weighting(mesh->mNumBones);
+            BuildVertexWeightMapping(node.meshes[0], weighting);
+
+            for (size_t i = 0; i < mesh->mNumBones; ++i) {
+                aiBone* bone = new aiBone();
+
+                Ref<Node> joint = node.skin->jointNames[i];
+                bone->mName = joint->name;
+                GetNodeTransform(bone->mOffsetMatrix, *joint);
+
+                std::vector<aiVertexWeight>& weights = weighting[i];
+
+                bone->mNumWeights = weights.size();
+                if (bone->mNumWeights > 0) {
+                    bone->mWeights = new aiVertexWeight[bone->mNumWeights];
+                    memcpy(bone->mWeights, weights.data(), bone->mNumWeights * sizeof(aiVertexWeight));
+                }
+                mesh->mBones[i] = bone;
             }
         }
+
+        int k = 0;
+        for (unsigned int j = meshOffsets[mesh_idx]; j < meshOffsets[mesh_idx + 1]; ++j, ++k) {
+            ainode->mMeshes[k] = j;
+        }
     }
 
     if (node.camera) {
@@ -802,6 +872,151 @@ void glTF2Importer::ImportNodes(glTF2::Asset& r)
     //}
 }
 
+struct AnimationSamplers {
+    AnimationSamplers() : translation(nullptr), rotation(nullptr), scale(nullptr) {}
+
+    Animation::Sampler* translation;
+    Animation::Sampler* rotation;
+    Animation::Sampler* scale;
+};
+
+aiNodeAnim* CreateNodeAnim(glTF2::Asset& r, Node& node, AnimationSamplers& samplers)
+{
+    aiNodeAnim* anim = new aiNodeAnim();
+    anim->mNodeName = node.name;
+
+    static const float kMillisecondsFromSeconds = 1000.f;
+
+    if (samplers.translation) {
+        float* times = nullptr;
+        samplers.translation->input->ExtractData(times);
+        aiVector3D* values = nullptr;
+        samplers.translation->output->ExtractData(values);
+        anim->mNumPositionKeys = samplers.translation->input->count;
+        anim->mPositionKeys = new aiVectorKey[anim->mNumPositionKeys];
+        for (unsigned int i = 0; i < anim->mNumPositionKeys; ++i) {
+            anim->mPositionKeys[i].mTime = times[i] * kMillisecondsFromSeconds;
+            anim->mPositionKeys[i].mValue = values[i];
+        }
+        delete[] times;
+        delete[] values;
+    } else if (node.translation.isPresent) {
+        anim->mNumPositionKeys = 1;
+        anim->mPositionKeys = new aiVectorKey();
+        anim->mPositionKeys->mTime = 0.f;
+        anim->mPositionKeys->mValue.x = node.translation.value[0];
+        anim->mPositionKeys->mValue.y = node.translation.value[1];
+        anim->mPositionKeys->mValue.z = node.translation.value[2];
+    }
+
+    if (samplers.rotation) {
+        float* times = nullptr;
+        samplers.rotation->input->ExtractData(times);
+        aiQuaternion* values = nullptr;
+        samplers.rotation->output->ExtractData(values);
+        anim->mNumRotationKeys = samplers.rotation->input->count;
+        anim->mRotationKeys = new aiQuatKey[anim->mNumRotationKeys];
+        for (unsigned int i = 0; i < anim->mNumRotationKeys; ++i) {
+            anim->mRotationKeys[i].mTime = times[i] * kMillisecondsFromSeconds;
+            anim->mRotationKeys[i].mValue.x = values[i].w;
+            anim->mRotationKeys[i].mValue.y = values[i].x;
+            anim->mRotationKeys[i].mValue.z = values[i].y;
+            anim->mRotationKeys[i].mValue.w = values[i].z;
+        }
+        delete[] times;
+        delete[] values;
+    } else if (node.rotation.isPresent) {
+        anim->mNumRotationKeys = 1;
+        anim->mRotationKeys = new aiQuatKey();
+        anim->mRotationKeys->mTime = 0.f;
+        anim->mRotationKeys->mValue.x = node.rotation.value[0];
+        anim->mRotationKeys->mValue.y = node.rotation.value[1];
+        anim->mRotationKeys->mValue.z = node.rotation.value[2];
+        anim->mRotationKeys->mValue.w = node.rotation.value[3];
+    }
+
+    if (samplers.scale) {
+        float* times = nullptr;
+        samplers.scale->input->ExtractData(times);
+        aiVector3D* values = nullptr;
+        samplers.scale->output->ExtractData(values);
+        anim->mNumScalingKeys = samplers.scale->input->count;
+        anim->mScalingKeys = new aiVectorKey[anim->mNumScalingKeys];
+        for (unsigned int i = 0; i < anim->mNumScalingKeys; ++i) {
+            anim->mScalingKeys[i].mTime = times[i] * kMillisecondsFromSeconds;
+            anim->mScalingKeys[i].mValue = values[i];
+        }
+        delete[] times;
+        delete[] values;
+    } else if (node.scale.isPresent) {
+        anim->mNumScalingKeys = 1;
+        anim->mScalingKeys = new aiVectorKey();
+        anim->mScalingKeys->mTime = 0.f;
+        anim->mScalingKeys->mValue.x = node.scale.value[0];
+        anim->mScalingKeys->mValue.y = node.scale.value[1];
+        anim->mScalingKeys->mValue.z = node.scale.value[2];
+    }
+
+    return anim;
+}
+
+std::unordered_map<unsigned int, AnimationSamplers> GatherSamplers(Animation& anim)
+{
+    std::unordered_map<unsigned int, AnimationSamplers> samplers;
+    for (unsigned int c = 0; c < anim.channels.size(); ++c) {
+        Animation::Channel& channel = anim.channels[c];
+        if (channel.sampler >= static_cast<int>(anim.samplers.size())) {
+            continue;
+        }
+
+        const unsigned int node_index = channel.target.node.GetIndex();
+
+        AnimationSamplers& sampler = samplers[node_index];
+        if (channel.target.path == AnimationPath_TRANSLATION) {
+            sampler.translation = &anim.samplers[channel.sampler];
+        } else if (channel.target.path == AnimationPath_ROTATION) {
+            sampler.rotation = &anim.samplers[channel.sampler];
+        } else if (channel.target.path == AnimationPath_SCALE) {
+            sampler.scale = &anim.samplers[channel.sampler];
+        }
+    }
+
+    return samplers;
+}
+
+void glTF2Importer::ImportAnimations(glTF2::Asset& r)
+{
+    if (!r.scene) return;
+
+    mScene->mNumAnimations = r.animations.Size();
+    if (mScene->mNumAnimations == 0) {
+        return;
+    }
+
+    mScene->mAnimations = new aiAnimation*[mScene->mNumAnimations];
+    for (unsigned int i = 0; i < r.animations.Size(); ++i) {
+        Animation& anim = r.animations[i];
+
+        aiAnimation* ai_anim = new aiAnimation();
+        ai_anim->mName = anim.name;
+        ai_anim->mDuration = 0;
+        ai_anim->mTicksPerSecond = 0;
+
+        std::unordered_map<unsigned int, AnimationSamplers> samplers = GatherSamplers(anim);
+
+        ai_anim->mNumChannels = r.skins[0].jointNames.size();
+        if (ai_anim->mNumChannels > 0) {
+            ai_anim->mChannels = new aiNodeAnim*[ai_anim->mNumChannels];
+            int j = 0;
+            for (auto& iter : r.skins[0].jointNames) {
+                ai_anim->mChannels[j] = CreateNodeAnim(r, *iter, samplers[iter.GetIndex()]);
+                ++j;
+            }
+        }
+        mScene->mAnimations[i] = ai_anim;
+    }
+}
+
 void glTF2Importer::ImportEmbeddedTextures(glTF2::Asset& r)
 {
     embeddedTexIdxs.resize(r.images.Size(), -1);
@@ -869,6 +1084,8 @@ void glTF2Importer::InternReadFile(const std::string& pFile, aiScene* pScene, IO
 
     ImportNodes(asset);
 
+    ImportAnimations(asset);
+
     if (pScene->mNumMeshes == 0) {
         pScene->mFlags |= AI_SCENE_FLAGS_INCOMPLETE;
     }
diff --git a/code/glTF2Importer.h b/code/glTF2Importer.h
index 31d935da4..7414e2f95 100644
--- a/code/glTF2Importer.h
+++ b/code/glTF2Importer.h
@@ -83,7 +83,7 @@ private:
     void ImportCameras(glTF2::Asset& a);
     void ImportLights(glTF2::Asset& a);
     void ImportNodes(glTF2::Asset& a);
-
+    void ImportAnimations(glTF2::Asset& a);
 };
 
 } // Namespace assimp