Update to tinyusdz "dev" branch commit b622782

2024-04-06 16:13:50 -07:00 · 2024-04-06 16:13:50 -07:00 · 0bba1aa5b5
parent 47ed2f5b29
commit 0bba1aa5b5
8 changed files with 1730 additions and 58 deletions
--- a/contrib/tinyusdz/tinyusdz_repo/README.md
+++ b/contrib/tinyusdz/tinyusdz_repo/README.md
@ -8,7 +8,7 @@
  * Working on the branch: https://github.com/syoyo/tinyusdz/tree/rendermesh-refactor 
  * [ ] USD to RenderScene(OpenGL/Vulkan) conversion https://github.com/syoyo/tinyusdz/issues/109
  * [ ] GeomSubset/Material Binding API support for shading/texturing https://github.com/syoyo/tinyusdz/issues/103 
- 
+  * [ ] UTF8 Identifier support https://github.com/syoyo/tinyusdz/issues/47
 
 ## Mid-term todo

@ -22,10 +22,12 @@
  * [x] variantSet
  * [ ] Validate composition is correctly operated.
 * Better usdLux support https://github.com/syoyo/tinyusdz/issues/101 
+* [ ] Support parsing usd-wg USD aasets
+  * https://github.com/syoyo/tinyusdz/issues/135
 * Support reading & compose some production USD scenes
  * [ ] Moana island v2.1 https://github.com/syoyo/tinyusdz/issues/90
  * [ ] ALAB USD production scene https://github.com/syoyo/tinyusdz/issues/91
- 
+  
 * MaterialX https://github.com/syoyo/tinyusdz/issues/86
  * USD + MateriralX + PBR rendering example using https://github.com/lighttransport/pbrlab
 * Improve interoperability with Blender USD export/import https://github.com/syoyo/tinyusdz/issues/98
--- a/contrib/tinyusdz/tinyusdz_repo/src/ascii-parser.cc
+++ b/contrib/tinyusdz/tinyusdz_repo/src/ascii-parser.cc
@ -792,12 +792,12 @@ bool AsciiParser::MaybeCustom() {
 }

 bool AsciiParser::ParseDict(std::map<std::string, MetaVariable> *out_dict) {
-  // '{' (type name '=' value)+ '}'
+  // '{' comment | (type name '=' value)+ '}'
  if (!Expect('{')) {
    return false;
  }

-  if (!SkipWhitespaceAndNewline()) {
+  if (!SkipCommentAndWhitespaceAndNewline()) {
    return false;
  }

@ -820,7 +820,7 @@ bool AsciiParser::ParseDict(std::map<std::string, MetaVariable> *out_dict) {
        PUSH_ERROR_AND_RETURN("Failed to parse dict element.");
      }

-      if (!SkipWhitespaceAndNewline()) {
+      if (!SkipCommentAndWhitespaceAndNewline()) {
        return false;
      }

@ -833,7 +833,7 @@ bool AsciiParser::ParseDict(std::map<std::string, MetaVariable> *out_dict) {
    }
  }

-  if (!SkipWhitespaceAndNewline()) {
+  if (!SkipCommentAndWhitespaceAndNewline()) {
    return false;
  }

@ -1834,7 +1834,7 @@ bool AsciiParser::ParseStageMetaOpt() {
 }

 // Parse Stage meta
-// meta = ( metadata_opt )
+// meta = '(' (comment | metadata_opt)+ ')'
 //      ;
 bool AsciiParser::ParseStageMetas() {
  if (!Expect('(')) {
@ -1866,7 +1866,7 @@ bool AsciiParser::ParseStageMetas() {
      return true;

    } else {
-      if (!SkipWhitespace()) {
+      if (!SkipCommentAndWhitespaceAndNewline()) {
        // eof
        return false;
      }
@ -3361,7 +3361,7 @@ bool AsciiParser::ParseRelationship(Relationship *result) {
      PUSH_ERROR_AND_RETURN("Failed to parse None.");
    }

-    // Should be empty.
+    // Should be empty for None.
    if (value.has_value()) {
      PUSH_ERROR_AND_RETURN("Failed to parse None.");
    }
@ -3622,10 +3622,6 @@ bool AsciiParser::ParsePrimProps(std::map<std::string, Property> *props, std::ve
      return false;
    }

-    if (MaybeNone()) {
-      return true;
-    }
-
    Relationship rel;
    if (!ParseRelationship(&rel)) {
      PUSH_ERROR_AND_RETURN("Failed to parse `rel` property.");
--- a/contrib/tinyusdz/tinyusdz_repo/src/prim-reconstruct.cc
+++ b/contrib/tinyusdz/tinyusdz_repo/src/prim-reconstruct.cc
@ -1686,28 +1686,35 @@ nonstd::expected<bool, std::string> ParseEnumProperty(
                           __target, __strict_check) {                          \
  if (__prop.first == __name) {                                              \
    if (__table.count(__name)) { continue; } \
-    const Attribute &attr = __prop.second.get_attribute();                           \
-    if (auto tok = attr.get_value<value::token>()) {                     \
-      auto e = __enum_handler(tok.value().str());                            \
-      if (e) {                                                               \
-        __target = e.value();                                                \
-        /* TODO: attr meta __target.meta = attr.meta;  */                    \
-        __table.insert(__name);                                              \
-      } else if (__strict_check) {                                            \
-        PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name()  \
-                                  << ") " << e.error());                     \
-      } else { \
-        PUSH_WARN("`" << tok.value().str() << "` is not allowed token for `" << __name << "`. Set to default token value."); \
-        /* TODO: attr meta __target.meta = attr.meta;  */                    \
-        __table.insert(__name);                                              \
-      } \
-    } else {                                                                 \
-      PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name()    \
-                                << ") Property type mismatch. " << __name    \
-                                << " must be type `token`, but got `"        \
-                                << attr.type_name() << "`.");            \
-    }                                                                        \
-  } }
+    if ((__prop.second.value_type_name() == value::TypeTraits<value::token>::type_name()) && __prop.second.is_attribute() && __prop.second.is_empty()) { \
+      PUSH_WARN("No value assigned to `" << __name << "` token attribute. Set default token value."); \
+      /* TODO: attr meta __target.meta = attr.meta;  */                    \
+      __table.insert(__name);                                              \
+    } else { \
+      const Attribute &attr = __prop.second.get_attribute();                           \
+      if (auto tok = attr.get_value<value::token>()) {                     \
+        auto e = __enum_handler(tok.value().str());                            \
+        if (e) {                                                               \
+          __target = e.value();                                                \
+          /* TODO: attr meta __target.meta = attr.meta;  */                    \
+          __table.insert(__name);                                              \
+        } else if (__strict_check) {                                            \
+          PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name()  \
+                                    << ") " << e.error());                     \
+        } else { \
+          PUSH_WARN("`" << tok.value().str() << "` is not allowed token for `" << __name << "`. Set to default token value."); \
+          /* TODO: attr meta __target.meta = attr.meta;  */                    \
+          __table.insert(__name);                                              \
+        } \
+      } else {                                                                 \
+        PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name()    \
+                                  << ") Property type mismatch. " << __name    \
+                                  << " must be type `token`, but got `"        \
+                                  << attr.type_name() << "`.");            \
+      }                                                                        \
+    } \
+  } \
+}


 // Add custom property(including property with "primvars" prefix)
@ -2152,13 +2159,13 @@ bool ReconstructMaterialBindingProperties(
        PUSH_ERROR_AND_RETURN(fmt::format("`{}` must be a Relationship", prop.first));
      }

-      std::string purpose_name = removePrefix(prop.first, kMaterialBindingCollection + std::string(":"));
+      std::string purpose_name = removePrefix(prop.first, kMaterialBinding + std::string(":"));
      if (purpose_name.empty()) {
        PUSH_ERROR_AND_RETURN("empty PURPOSE is not allowed for 'mateirial:binding:'");
      }
      std::vector<std::string> names = split(purpose_name, ":");
      if (names.size() > 1) {
-        PUSH_ERROR_AND_RETURN("PURPOSE must not have nested namespaces for 'mateirial:binding'");
+        PUSH_ERROR_AND_RETURN(fmt::format("PURPOSE `{}` must not have nested namespaces for 'mateirial:binding'", purpose_name));
      }
      value::token mat_purpose = value::token(names[0]);

--- a/contrib/tinyusdz/tinyusdz_repo/src/str-util.cc
+++ b/contrib/tinyusdz/tinyusdz_repo/src/str-util.cc
@ -2,6 +2,7 @@
 // Copyright 2023 - Present, Light Transport Entertainment, Inc.
 #include "str-util.hh"

+#include "unicode-xid.hh"
 #include "common-macros.inc"

 namespace tinyusdz {
@ -470,6 +471,68 @@ inline std::string extract_utf8_char(const std::string &str, uint32_t start_i,
  }
 }

+inline uint32_t to_codepoint(const char *s, uint32_t &char_len) {
+  if (!s) {
+    char_len = 0;
+    return ~0u;
+  }
+
+  char_len = detail::utf8_len(static_cast<unsigned char>(s[0]));
+  if (char_len == 0) {
+    return ~0u;
+  }
+
+  uint32_t code = 0;
+  if (char_len == 1) {
+    unsigned char s0 = static_cast<unsigned char>(s[0]);
+    if (s0 > 0x7f) {
+      return ~0u;
+    }
+    code = uint32_t(s0) & 0x7f;
+  } else if (char_len == 2) {
+    // 11bit: 110y-yyyx 10xx-xxxx
+    unsigned char s0 = static_cast<unsigned char>(s[0]);
+    unsigned char s1 = static_cast<unsigned char>(s[1]);
+
+    if (((s0 & 0xe0) == 0xc0) && ((s1 & 0xc0) == 0x80)) {
+      code = (uint32_t(s0 & 0x1f) << 6) | (s1 & 0x3f);
+    } else {
+      return ~0u;
+    }
+  } else if (char_len == 3) {
+    // 16bit: 1110-yyyy 10yx-xxxx 10xx-xxxx
+    unsigned char s0 = static_cast<unsigned char>(s[0]);
+    unsigned char s1 = static_cast<unsigned char>(s[1]);
+    unsigned char s2 = static_cast<unsigned char>(s[2]);
+    if (((s0 & 0xf0) == 0xe0) && ((s1 & 0xc0) == 0x80) &&
+        ((s2 & 0xc0) == 0x80)) {
+      code =
+          (uint32_t(s0 & 0xf) << 12) | (uint32_t(s1 & 0x3f) << 6) | (s2 & 0x3f);
+    } else {
+      return ~0u;
+    }
+  } else if (char_len == 4) {
+    // 21bit: 1111-0yyy 10yy-xxxx 10xx-xxxx 10xx-xxxx
+    unsigned char s0 = static_cast<unsigned char>(s[0]);
+    unsigned char s1 = static_cast<unsigned char>(s[1]);
+    unsigned char s2 = static_cast<unsigned char>(s[2]);
+    unsigned char s3 = static_cast<unsigned char>(s[3]);
+    if (((s0 & 0xf8) == 0xf0) && ((s1 & 0xc0) == 0x80) &&
+        ((s2 & 0xc0) == 0x80) && ((s2 & 0xc0) == 0x80)) {
+      code = (uint32_t(s0 & 0x7) << 18) | (uint32_t(s1 & 0x3f) << 12) |
+             (uint32_t(s2 & 0x3f) << 6) | uint32_t(s3 & 0x3f);
+    } else {
+      return ~0u;
+    }
+  } else {
+    // ???
+    char_len = 0;
+    return ~0u;
+  }
+
+  return code;
+}
+
 }  // namespace detail

 std::vector<std::string> to_utf8_chars(const std::string &str) {
@ -544,6 +607,7 @@ uint32_t to_utf8_code(const std::string &s) {
  return code;
 }

+
 #if 0
 std::string to_utf8_char(const uint32_t code) {

@ -568,4 +632,47 @@ bool is_valid_utf8(const std::string &str) {
  return true;
 }

+std::vector<uint32_t> to_codepoints(const std::string &str) {
+
+  std::vector<uint32_t> cps;
+
+  for (size_t i = 0; i < str.size(); ) {
+    uint32_t char_len;
+    uint32_t cp = detail::to_codepoint(str.c_str() + i, char_len);
+
+    if ((cp > kMaxUTF8Codepoint) || (char_len == 0)) {
+      return std::vector<uint32_t>();
+    }
+
+    cps.push_back(cp);
+
+    i += char_len;
+  }
+
+  return cps;
+}
+
+bool is_valid_utf8_identifier(const std::string &str) {
+  // First convert to codepoint values.
+  std::vector<uint32_t> codepoints = to_codepoints(str);
+
+  if (codepoints.empty()) {
+    return false;
+  }
+
+  // (XID_Start|_) (XID_Continue|_)+
+  
+  if ((codepoints[0] != '_') || !unicode_xid::is_xid_start(codepoints[0])) {
+    return false;
+  }
+
+  for (size_t i = 1; i < codepoints.size(); i++) {
+    if ((codepoints[i] != '_') || !unicode_xid::is_xid_continue(codepoints[i])) {
+      return false;
+    }
+  }
+
+  return true; 
+}
+
 }  // namespace tinyusdz
--- a/contrib/tinyusdz/tinyusdz_repo/src/str-util.hh
+++ b/contrib/tinyusdz/tinyusdz_repo/src/str-util.hh
@ -12,6 +12,8 @@

 namespace tinyusdz {

+constexpr size_t kMaxUTF8Codepoint = 0x10ffff;
+
 enum class CharEncoding
 {
  None,
@ -240,30 +242,42 @@ std::string unescapeControlSequence(const std::string &str);

 std::string buildEscapedAndQuotedStringForUSDA(const std::string &str);

+///
+/// Determine if input UTF-8 string is Unicode Identifier
+/// (UAX31 Default Identifier)
+///
+bool is_valid_utf8_identifier(const std::string &str);
+
 // TfIsValidIdentifier in pxrUSD equivalanet
-// TODO: support UTF-8
-inline bool isValidIdentifier(const std::string &str) {
+// Supports UTF-8 identifier(UAX31 Default Identifier. pxrUSD supports UTF8 Identififer from 24.03)
+inline bool isValidIdentifier(const std::string &str, bool is_utf8 = true) {

  if (str.empty()) {
    return false;
  }

-  // first char
-  // [a-ZA-Z_]
-  if ((('a' <= str[0]) && (str[0] <= 'z')) || (('A' <= str[0]) && (str[0] <= 'Z')) || (str[0] == '_')) {
-    // ok
+  if (is_utf8) {
+    return is_valid_utf8_identifier(str);
  } else {
-    return false;
-  }
-
-  // remain chars
-  // [a-ZA-Z0-9_]
-  for (size_t i = 1; i < str.length(); i++) {
-    if ((('a' <= str[i]) && (str[i] <= 'z')) || (('A' <= str[i]) && (str[i] <= 'Z')) || (('0' <= str[i]) && (str[i] <= '9')) || (str[i] == '_')) {
+    // legacy
+    
+    // first char
+    // [a-ZA-Z_]
+    if ((('a' <= str[0]) && (str[0] <= 'z')) || (('A' <= str[0]) && (str[0] <= 'Z')) || (str[0] == '_')) {
      // ok
    } else {
      return false;
    }
+
+    // remaining chars
+    // [a-ZA-Z0-9_]
+    for (size_t i = 1; i < str.length(); i++) {
+      if ((('a' <= str[i]) && (str[i] <= 'z')) || (('A' <= str[i]) && (str[i] <= 'Z')) || (('0' <= str[i]) && (str[i] <= '9')) || (str[i] == '_')) {
+        // ok
+      } else {
+        return false;
+      }
+    }
  }

  return true;
@ -272,7 +286,9 @@ inline bool isValidIdentifier(const std::string &str) {

 // TfMakeValidIdentifier in pxrUSD equivalanet
 // TODO: support UTF-8
-inline std::string makeIdentifierValid(const std::string &str) {
+inline std::string makeIdentifierValid(const std::string &str, bool is_utf8 = true) {
+  (void)is_utf8;
+
  std::string s;

  if (str.empty()) {
@ -312,8 +328,12 @@ inline std::string makeIdentifierValid(const std::string &str) {
 bool makeUniqueName(std::multiset<std::string> &nameSet, const std::string &name, std::string *unique_name);


+///
+/// Determine if input string is valid UTF-8 string.
+///
 bool is_valid_utf8(const std::string &str);

+
 ///
 /// Convert string buffer to list of UTF-8 chars.
 /// Example: 'こんにちは' => ['こ', 'ん', 'に', 'ち', 'は']
@ -326,6 +346,13 @@ std::vector<std::string> to_utf8_chars(const std::string &str);
 ///
 uint32_t to_utf8_code(const std::string &u8char);

+///
+/// Convert UTF-8 string to codepoint values.
+///
+/// Return empty array when input is not a valid UTF-8 string.
+///
+std::vector<uint32_t> to_codepoints(const std::string &str);
+
 ///
 /// Convert UTF-8 codepoint to UTF-8 string.
 ///
--- a/contrib/tinyusdz/tinyusdz_repo/src/tinyusdz.cc
+++ b/contrib/tinyusdz/tinyusdz_repo/src/tinyusdz.cc
@ -525,8 +525,9 @@ bool LoadUSDZFromMemory(const uint8_t *addr, const size_t length,
        return false;
      }

-      if (asset_size > (options.max_allowed_asset_size_in_mb * 1024 * 1024)) {
-        PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Asset file size too large.");
+      if (asset_size > (options.max_allowed_asset_size_in_mb * 1024ull * 1024ull)) {
+        PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Asset no[{}] file size too large. {} bytes (max_allowed_asset_size {})",
+          i, asset_size, options.max_allowed_asset_size_in_mb * 1024ull * 1024ull));
      }

      DCOUT("Image asset size: " << asset_size);
@ -537,26 +538,26 @@ bool LoadUSDZFromMemory(const uint8_t *addr, const size_t length,

        if (info) {
          if (info->width == 0) {
-            PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Image has zero width.");
+            PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Assset no[{}] Image has zero width.", i));
          }

          if (info->width > options.max_image_width) {
            PUSH_ERROR_AND_RETURN_TAG(
-                kTagUSDZ, fmt::format("Asset no[{}] Image width too large", i));
+                kTagUSDZ, fmt::format("Asset no[{}] Image width too large. {} (max_image_width {})", i, info->width, options.max_image_width));
          }

          if (info->height == 0) {
-            PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Image has zero height.");
+            PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Asset no[{}] Image has zero height.", i));
          }

          if (info->height > options.max_image_height) {
            PUSH_ERROR_AND_RETURN_TAG(
                kTagUSDZ,
-                fmt::format("Asset no[{}] Image height too large", i));
+                fmt::format("Asset no[{}] Image height too large. {} (max_image_height {})", i, info->height, options.max_image_height));
          }

          if (info->channels == 0) {
-            PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Image has zero channels.");
+            PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Asset no[{}] Image has zero channels.", i));
          }

          if (info->channels > options.max_image_channels) {
--- a/contrib/tinyusdz/tinyusdz_repo/src/unicode-xid-table.inc
+++ b/contrib/tinyusdz/tinyusdz_repo/src/unicode-xid-table.inc
--- a/contrib/tinyusdz/tinyusdz_repo/src/unicode-xid.hh
+++ b/contrib/tinyusdz/tinyusdz_repo/src/unicode-xid.hh
@ -0,0 +1,73 @@
+// SPDX-License-Identifier: MIT
+// Copyright 2024 - Present, Light Transport Entertainment Inc.
+//
+// UTF-8 Unicode identifier XID_Start and XID_Continue validation utility.
+//
+// Based on UAX31 Default Identifier and Unicode 5.1.0
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
+#include <limits>
+
+namespace unicode_xid {
+
+constexpr uint32_t kMaxCodepoint = 0x10FFFF;
+
+namespace detail {
+
+// Assume table is sorted by the first key(lower)
+#include "unicode-xid-table.inc"
+
+}
+
+
+inline bool is_xid_start(uint32_t codepoint) {
+  if (codepoint > kMaxCodepoint) {
+    return false;
+  }
+
+  // first find lower location based on the first key, then test with second key with linear search for (lower <= codepoint <= upper) range check.
+  // NOTE: second item in query is not used. fill it T::min just in case.
+  auto it = std::lower_bound(detail::kXID_StartTable.begin(), detail::kXID_StartTable.end(), std::make_pair(int(codepoint), (std::numeric_limits<int>::min)()));
+
+  // subtract 1 to get the first entry of possible hit(lower <= codepoint <= upper)
+  if ((it != detail::kXID_StartTable.begin() && (int(codepoint) < it->second))) {
+    it--;
+  }
+
+  for (; it != detail::kXID_StartTable.end(); it++) {
+    if ((int(codepoint) >= it->first) && (int(codepoint) <= it->second)) { // range end is inclusive.  
+      return true;
+    }
+  }
+
+  return false;
+}
+
+inline bool is_xid_continue(uint32_t codepoint) {
+  if (codepoint > kMaxCodepoint) {
+    return false;
+  }
+
+  auto it = std::lower_bound(detail::kXID_ContinueTable.begin(), detail::kXID_ContinueTable.end(), std::make_pair(int(codepoint), (std::numeric_limits<int>::min)()));
+
+  // subtract 1 to get the first entry of possible hit(lower <= codepoint <= upper)
+  if ((it != detail::kXID_ContinueTable.begin() && (int(codepoint) < it->second))) {
+    it--;
+  }
+
+  for (; it != detail::kXID_ContinueTable.end(); it++) {
+    if ((int(codepoint) >= it->first) && (int(codepoint) <= it->second)) { // range end is inclusive.  
+      return true;
+    }
+  }
+
+  return false;
+}
+
+} // namespace unicode_xid
+
+