Update to tinyusdz "dev" branch commit b622782

pull/5522/head
Steve M 2024-04-06 16:13:50 -07:00
parent 47ed2f5b29
commit 0bba1aa5b5
8 changed files with 1730 additions and 58 deletions

View File

@ -8,7 +8,7 @@
* Working on the branch: https://github.com/syoyo/tinyusdz/tree/rendermesh-refactor
* [ ] USD to RenderScene(OpenGL/Vulkan) conversion https://github.com/syoyo/tinyusdz/issues/109
* [ ] GeomSubset/Material Binding API support for shading/texturing https://github.com/syoyo/tinyusdz/issues/103
* [ ] UTF8 Identifier support https://github.com/syoyo/tinyusdz/issues/47
## Mid-term todo
@ -22,10 +22,12 @@
* [x] variantSet
* [ ] Validate composition is correctly operated.
* Better usdLux support https://github.com/syoyo/tinyusdz/issues/101
* [ ] Support parsing usd-wg USD aasets
* https://github.com/syoyo/tinyusdz/issues/135
* Support reading & compose some production USD scenes
* [ ] Moana island v2.1 https://github.com/syoyo/tinyusdz/issues/90
* [ ] ALAB USD production scene https://github.com/syoyo/tinyusdz/issues/91
* MaterialX https://github.com/syoyo/tinyusdz/issues/86
* USD + MateriralX + PBR rendering example using https://github.com/lighttransport/pbrlab
* Improve interoperability with Blender USD export/import https://github.com/syoyo/tinyusdz/issues/98

View File

@ -792,12 +792,12 @@ bool AsciiParser::MaybeCustom() {
}
bool AsciiParser::ParseDict(std::map<std::string, MetaVariable> *out_dict) {
// '{' (type name '=' value)+ '}'
// '{' comment | (type name '=' value)+ '}'
if (!Expect('{')) {
return false;
}
if (!SkipWhitespaceAndNewline()) {
if (!SkipCommentAndWhitespaceAndNewline()) {
return false;
}
@ -820,7 +820,7 @@ bool AsciiParser::ParseDict(std::map<std::string, MetaVariable> *out_dict) {
PUSH_ERROR_AND_RETURN("Failed to parse dict element.");
}
if (!SkipWhitespaceAndNewline()) {
if (!SkipCommentAndWhitespaceAndNewline()) {
return false;
}
@ -833,7 +833,7 @@ bool AsciiParser::ParseDict(std::map<std::string, MetaVariable> *out_dict) {
}
}
if (!SkipWhitespaceAndNewline()) {
if (!SkipCommentAndWhitespaceAndNewline()) {
return false;
}
@ -1834,7 +1834,7 @@ bool AsciiParser::ParseStageMetaOpt() {
}
// Parse Stage meta
// meta = ( metadata_opt )
// meta = '(' (comment | metadata_opt)+ ')'
// ;
bool AsciiParser::ParseStageMetas() {
if (!Expect('(')) {
@ -1866,7 +1866,7 @@ bool AsciiParser::ParseStageMetas() {
return true;
} else {
if (!SkipWhitespace()) {
if (!SkipCommentAndWhitespaceAndNewline()) {
// eof
return false;
}
@ -3361,7 +3361,7 @@ bool AsciiParser::ParseRelationship(Relationship *result) {
PUSH_ERROR_AND_RETURN("Failed to parse None.");
}
// Should be empty.
// Should be empty for None.
if (value.has_value()) {
PUSH_ERROR_AND_RETURN("Failed to parse None.");
}
@ -3622,10 +3622,6 @@ bool AsciiParser::ParsePrimProps(std::map<std::string, Property> *props, std::ve
return false;
}
if (MaybeNone()) {
return true;
}
Relationship rel;
if (!ParseRelationship(&rel)) {
PUSH_ERROR_AND_RETURN("Failed to parse `rel` property.");

View File

@ -1686,28 +1686,35 @@ nonstd::expected<bool, std::string> ParseEnumProperty(
__target, __strict_check) { \
if (__prop.first == __name) { \
if (__table.count(__name)) { continue; } \
const Attribute &attr = __prop.second.get_attribute(); \
if (auto tok = attr.get_value<value::token>()) { \
auto e = __enum_handler(tok.value().str()); \
if (e) { \
__target = e.value(); \
/* TODO: attr meta __target.meta = attr.meta; */ \
__table.insert(__name); \
} else if (__strict_check) { \
PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name() \
<< ") " << e.error()); \
} else { \
PUSH_WARN("`" << tok.value().str() << "` is not allowed token for `" << __name << "`. Set to default token value."); \
/* TODO: attr meta __target.meta = attr.meta; */ \
__table.insert(__name); \
} \
} else { \
PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name() \
<< ") Property type mismatch. " << __name \
<< " must be type `token`, but got `" \
<< attr.type_name() << "`."); \
} \
} }
if ((__prop.second.value_type_name() == value::TypeTraits<value::token>::type_name()) && __prop.second.is_attribute() && __prop.second.is_empty()) { \
PUSH_WARN("No value assigned to `" << __name << "` token attribute. Set default token value."); \
/* TODO: attr meta __target.meta = attr.meta; */ \
__table.insert(__name); \
} else { \
const Attribute &attr = __prop.second.get_attribute(); \
if (auto tok = attr.get_value<value::token>()) { \
auto e = __enum_handler(tok.value().str()); \
if (e) { \
__target = e.value(); \
/* TODO: attr meta __target.meta = attr.meta; */ \
__table.insert(__name); \
} else if (__strict_check) { \
PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name() \
<< ") " << e.error()); \
} else { \
PUSH_WARN("`" << tok.value().str() << "` is not allowed token for `" << __name << "`. Set to default token value."); \
/* TODO: attr meta __target.meta = attr.meta; */ \
__table.insert(__name); \
} \
} else { \
PUSH_ERROR_AND_RETURN("(" << value::TypeTraits<__klass>::type_name() \
<< ") Property type mismatch. " << __name \
<< " must be type `token`, but got `" \
<< attr.type_name() << "`."); \
} \
} \
} \
}
// Add custom property(including property with "primvars" prefix)
@ -2152,13 +2159,13 @@ bool ReconstructMaterialBindingProperties(
PUSH_ERROR_AND_RETURN(fmt::format("`{}` must be a Relationship", prop.first));
}
std::string purpose_name = removePrefix(prop.first, kMaterialBindingCollection + std::string(":"));
std::string purpose_name = removePrefix(prop.first, kMaterialBinding + std::string(":"));
if (purpose_name.empty()) {
PUSH_ERROR_AND_RETURN("empty PURPOSE is not allowed for 'mateirial:binding:'");
}
std::vector<std::string> names = split(purpose_name, ":");
if (names.size() > 1) {
PUSH_ERROR_AND_RETURN("PURPOSE must not have nested namespaces for 'mateirial:binding'");
PUSH_ERROR_AND_RETURN(fmt::format("PURPOSE `{}` must not have nested namespaces for 'mateirial:binding'", purpose_name));
}
value::token mat_purpose = value::token(names[0]);

View File

@ -2,6 +2,7 @@
// Copyright 2023 - Present, Light Transport Entertainment, Inc.
#include "str-util.hh"
#include "unicode-xid.hh"
#include "common-macros.inc"
namespace tinyusdz {
@ -470,6 +471,68 @@ inline std::string extract_utf8_char(const std::string &str, uint32_t start_i,
}
}
inline uint32_t to_codepoint(const char *s, uint32_t &char_len) {
if (!s) {
char_len = 0;
return ~0u;
}
char_len = detail::utf8_len(static_cast<unsigned char>(s[0]));
if (char_len == 0) {
return ~0u;
}
uint32_t code = 0;
if (char_len == 1) {
unsigned char s0 = static_cast<unsigned char>(s[0]);
if (s0 > 0x7f) {
return ~0u;
}
code = uint32_t(s0) & 0x7f;
} else if (char_len == 2) {
// 11bit: 110y-yyyx 10xx-xxxx
unsigned char s0 = static_cast<unsigned char>(s[0]);
unsigned char s1 = static_cast<unsigned char>(s[1]);
if (((s0 & 0xe0) == 0xc0) && ((s1 & 0xc0) == 0x80)) {
code = (uint32_t(s0 & 0x1f) << 6) | (s1 & 0x3f);
} else {
return ~0u;
}
} else if (char_len == 3) {
// 16bit: 1110-yyyy 10yx-xxxx 10xx-xxxx
unsigned char s0 = static_cast<unsigned char>(s[0]);
unsigned char s1 = static_cast<unsigned char>(s[1]);
unsigned char s2 = static_cast<unsigned char>(s[2]);
if (((s0 & 0xf0) == 0xe0) && ((s1 & 0xc0) == 0x80) &&
((s2 & 0xc0) == 0x80)) {
code =
(uint32_t(s0 & 0xf) << 12) | (uint32_t(s1 & 0x3f) << 6) | (s2 & 0x3f);
} else {
return ~0u;
}
} else if (char_len == 4) {
// 21bit: 1111-0yyy 10yy-xxxx 10xx-xxxx 10xx-xxxx
unsigned char s0 = static_cast<unsigned char>(s[0]);
unsigned char s1 = static_cast<unsigned char>(s[1]);
unsigned char s2 = static_cast<unsigned char>(s[2]);
unsigned char s3 = static_cast<unsigned char>(s[3]);
if (((s0 & 0xf8) == 0xf0) && ((s1 & 0xc0) == 0x80) &&
((s2 & 0xc0) == 0x80) && ((s2 & 0xc0) == 0x80)) {
code = (uint32_t(s0 & 0x7) << 18) | (uint32_t(s1 & 0x3f) << 12) |
(uint32_t(s2 & 0x3f) << 6) | uint32_t(s3 & 0x3f);
} else {
return ~0u;
}
} else {
// ???
char_len = 0;
return ~0u;
}
return code;
}
} // namespace detail
std::vector<std::string> to_utf8_chars(const std::string &str) {
@ -544,6 +607,7 @@ uint32_t to_utf8_code(const std::string &s) {
return code;
}
#if 0
std::string to_utf8_char(const uint32_t code) {
@ -568,4 +632,47 @@ bool is_valid_utf8(const std::string &str) {
return true;
}
std::vector<uint32_t> to_codepoints(const std::string &str) {
std::vector<uint32_t> cps;
for (size_t i = 0; i < str.size(); ) {
uint32_t char_len;
uint32_t cp = detail::to_codepoint(str.c_str() + i, char_len);
if ((cp > kMaxUTF8Codepoint) || (char_len == 0)) {
return std::vector<uint32_t>();
}
cps.push_back(cp);
i += char_len;
}
return cps;
}
bool is_valid_utf8_identifier(const std::string &str) {
// First convert to codepoint values.
std::vector<uint32_t> codepoints = to_codepoints(str);
if (codepoints.empty()) {
return false;
}
// (XID_Start|_) (XID_Continue|_)+
if ((codepoints[0] != '_') || !unicode_xid::is_xid_start(codepoints[0])) {
return false;
}
for (size_t i = 1; i < codepoints.size(); i++) {
if ((codepoints[i] != '_') || !unicode_xid::is_xid_continue(codepoints[i])) {
return false;
}
}
return true;
}
} // namespace tinyusdz

View File

@ -12,6 +12,8 @@
namespace tinyusdz {
constexpr size_t kMaxUTF8Codepoint = 0x10ffff;
enum class CharEncoding
{
None,
@ -240,30 +242,42 @@ std::string unescapeControlSequence(const std::string &str);
std::string buildEscapedAndQuotedStringForUSDA(const std::string &str);
///
/// Determine if input UTF-8 string is Unicode Identifier
/// (UAX31 Default Identifier)
///
bool is_valid_utf8_identifier(const std::string &str);
// TfIsValidIdentifier in pxrUSD equivalanet
// TODO: support UTF-8
inline bool isValidIdentifier(const std::string &str) {
// Supports UTF-8 identifier(UAX31 Default Identifier. pxrUSD supports UTF8 Identififer from 24.03)
inline bool isValidIdentifier(const std::string &str, bool is_utf8 = true) {
if (str.empty()) {
return false;
}
// first char
// [a-ZA-Z_]
if ((('a' <= str[0]) && (str[0] <= 'z')) || (('A' <= str[0]) && (str[0] <= 'Z')) || (str[0] == '_')) {
// ok
if (is_utf8) {
return is_valid_utf8_identifier(str);
} else {
return false;
}
// remain chars
// [a-ZA-Z0-9_]
for (size_t i = 1; i < str.length(); i++) {
if ((('a' <= str[i]) && (str[i] <= 'z')) || (('A' <= str[i]) && (str[i] <= 'Z')) || (('0' <= str[i]) && (str[i] <= '9')) || (str[i] == '_')) {
// legacy
// first char
// [a-ZA-Z_]
if ((('a' <= str[0]) && (str[0] <= 'z')) || (('A' <= str[0]) && (str[0] <= 'Z')) || (str[0] == '_')) {
// ok
} else {
return false;
}
// remaining chars
// [a-ZA-Z0-9_]
for (size_t i = 1; i < str.length(); i++) {
if ((('a' <= str[i]) && (str[i] <= 'z')) || (('A' <= str[i]) && (str[i] <= 'Z')) || (('0' <= str[i]) && (str[i] <= '9')) || (str[i] == '_')) {
// ok
} else {
return false;
}
}
}
return true;
@ -272,7 +286,9 @@ inline bool isValidIdentifier(const std::string &str) {
// TfMakeValidIdentifier in pxrUSD equivalanet
// TODO: support UTF-8
inline std::string makeIdentifierValid(const std::string &str) {
inline std::string makeIdentifierValid(const std::string &str, bool is_utf8 = true) {
(void)is_utf8;
std::string s;
if (str.empty()) {
@ -312,8 +328,12 @@ inline std::string makeIdentifierValid(const std::string &str) {
bool makeUniqueName(std::multiset<std::string> &nameSet, const std::string &name, std::string *unique_name);
///
/// Determine if input string is valid UTF-8 string.
///
bool is_valid_utf8(const std::string &str);
///
/// Convert string buffer to list of UTF-8 chars.
/// Example: 'こんにちは' => ['こ', 'ん', 'に', 'ち', 'は']
@ -326,6 +346,13 @@ std::vector<std::string> to_utf8_chars(const std::string &str);
///
uint32_t to_utf8_code(const std::string &u8char);
///
/// Convert UTF-8 string to codepoint values.
///
/// Return empty array when input is not a valid UTF-8 string.
///
std::vector<uint32_t> to_codepoints(const std::string &str);
///
/// Convert UTF-8 codepoint to UTF-8 string.
///

View File

@ -525,8 +525,9 @@ bool LoadUSDZFromMemory(const uint8_t *addr, const size_t length,
return false;
}
if (asset_size > (options.max_allowed_asset_size_in_mb * 1024 * 1024)) {
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Asset file size too large.");
if (asset_size > (options.max_allowed_asset_size_in_mb * 1024ull * 1024ull)) {
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Asset no[{}] file size too large. {} bytes (max_allowed_asset_size {})",
i, asset_size, options.max_allowed_asset_size_in_mb * 1024ull * 1024ull));
}
DCOUT("Image asset size: " << asset_size);
@ -537,26 +538,26 @@ bool LoadUSDZFromMemory(const uint8_t *addr, const size_t length,
if (info) {
if (info->width == 0) {
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Image has zero width.");
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Assset no[{}] Image has zero width.", i));
}
if (info->width > options.max_image_width) {
PUSH_ERROR_AND_RETURN_TAG(
kTagUSDZ, fmt::format("Asset no[{}] Image width too large", i));
kTagUSDZ, fmt::format("Asset no[{}] Image width too large. {} (max_image_width {})", i, info->width, options.max_image_width));
}
if (info->height == 0) {
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Image has zero height.");
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Asset no[{}] Image has zero height.", i));
}
if (info->height > options.max_image_height) {
PUSH_ERROR_AND_RETURN_TAG(
kTagUSDZ,
fmt::format("Asset no[{}] Image height too large", i));
fmt::format("Asset no[{}] Image height too large. {} (max_image_height {})", i, info->height, options.max_image_height));
}
if (info->channels == 0) {
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, "Image has zero channels.");
PUSH_ERROR_AND_RETURN_TAG(kTagUSDZ, fmt::format("Asset no[{}] Image has zero channels.", i));
}
if (info->channels > options.max_image_channels) {

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,73 @@
// SPDX-License-Identifier: MIT
// Copyright 2024 - Present, Light Transport Entertainment Inc.
//
// UTF-8 Unicode identifier XID_Start and XID_Continue validation utility.
//
// Based on UAX31 Default Identifier and Unicode 5.1.0
#pragma once
#include <algorithm>
#include <cstdint>
#include <utility>
#include <vector>
#include <limits>
namespace unicode_xid {
constexpr uint32_t kMaxCodepoint = 0x10FFFF;
namespace detail {
// Assume table is sorted by the first key(lower)
#include "unicode-xid-table.inc"
}
inline bool is_xid_start(uint32_t codepoint) {
if (codepoint > kMaxCodepoint) {
return false;
}
// first find lower location based on the first key, then test with second key with linear search for (lower <= codepoint <= upper) range check.
// NOTE: second item in query is not used. fill it T::min just in case.
auto it = std::lower_bound(detail::kXID_StartTable.begin(), detail::kXID_StartTable.end(), std::make_pair(int(codepoint), (std::numeric_limits<int>::min)()));
// subtract 1 to get the first entry of possible hit(lower <= codepoint <= upper)
if ((it != detail::kXID_StartTable.begin() && (int(codepoint) < it->second))) {
it--;
}
for (; it != detail::kXID_StartTable.end(); it++) {
if ((int(codepoint) >= it->first) && (int(codepoint) <= it->second)) { // range end is inclusive.
return true;
}
}
return false;
}
inline bool is_xid_continue(uint32_t codepoint) {
if (codepoint > kMaxCodepoint) {
return false;
}
auto it = std::lower_bound(detail::kXID_ContinueTable.begin(), detail::kXID_ContinueTable.end(), std::make_pair(int(codepoint), (std::numeric_limits<int>::min)()));
// subtract 1 to get the first entry of possible hit(lower <= codepoint <= upper)
if ((it != detail::kXID_ContinueTable.begin() && (int(codepoint) < it->second))) {
it--;
}
for (; it != detail::kXID_ContinueTable.end(); it++) {
if ((int(codepoint) >= it->first) && (int(codepoint) <= it->second)) { // range end is inclusive.
return true;
}
}
return false;
}
} // namespace unicode_xid