-
Notifications
You must be signed in to change notification settings - Fork 71
Env map importance sampling #969
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 114 commits
58c9c13
fef5c4e
9114750
afa2459
d90486a
dbc5852
6ebfbff
a1e86cd
d225413
aadbbb5
5b6e805
9ad89b2
ba64dc2
4f06427
fc59400
2a9bee5
4571ba0
756f79e
950fd87
9af8759
f459321
b50018a
c91d4d1
5194f7f
edc489a
b94019a
cff237a
8a10dee
7b728e4
92202e6
51cbb81
200c51f
695e667
d500e23
11dd229
563343a
99cc8c2
a2c52bf
c2021ab
a4e81d1
a2362ff
d6ed947
5043062
5c159d0
6c9c3b2
e54fc56
5b85c44
108b129
e8aa426
e7e2c70
8b4a6d0
c0b2eae
f54b77f
f306698
0c11ab7
77fb274
6a9cda4
32cebd4
12d4653
0b3954b
65c0e82
ffb8d69
ffddb4a
5990b90
967094d
deb6906
34f5872
4b7a984
a6f1704
e57c999
7a8ba14
9295167
9241d29
8108ca7
326544d
6292560
e2b9676
ec6adf0
97b8439
c23fb61
345065b
7c7c30a
615c328
4988fb2
eb47cee
0ebe456
2676c55
35469eb
b5d4012
cef9494
a26691d
5a8e7f4
0cdbbec
22db1dc
769f5f4
0afcadc
646d165
523b9c0
1d1cb73
3cfa3c6
bd32b33
504b00b
4b1b52c
181a487
cd42179
dafba0a
a5194b4
c074e6a
17a7de7
38b50cf
00e5b4c
99a8c09
4b53ad5
2dc25fa
dbcde57
1d9cda0
86b0345
f3242d9
86c93a6
22a9ab8
19a1e42
2ff04b9
a70d61e
5335497
69a840b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| +1 −1 | CMakeLists.txt | |
| +1 −1 | icd/VkICD_mock_icd.json.in | |
| +456 −542 | icd/generated/function_declarations.h | |
| +682 −761 | icd/generated/function_definitions.h | |
| +816 −1,050 | icd/generated/vk_typemap_helper.h | |
| +2 −2 | scripts/known_good.json | |
| +1 −1 | tests/icd/mock_icd_tests.cpp | |
| +11 −3 | vulkaninfo/CMakeLists.txt | |
| +1 −160 | vulkaninfo/generated/vulkaninfo.hpp | |
| +6 −11 | vulkaninfo/vulkaninfo.cpp |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,237 @@ | ||
| // Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. | ||
| // This file is part of the "Nabla Engine". | ||
| // For conditions of distribution and use, see copyright notice in nabla.h | ||
|
|
||
| #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ | ||
|
|
||
| #include <nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl> | ||
| #include <nbl/builtin/hlsl/sampling/basic.hlsl> | ||
| #include <nbl/builtin/hlsl/sampling/hierarchical_image/accessors.hlsl> | ||
| #include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl> | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace sampling | ||
| { | ||
|
|
||
| // TODO: Implement corner sampling or centered sampling based on the type of LuminanceAccessor | ||
| template <typename LuminanceAccessorT | ||
| NBL_PRIMARY_REQUIRES( | ||
| hierarchical_image::MipmappedLuminanceReadAccessor<LuminanceAccessorT> | ||
| ) | ||
| struct HierarchicalLuminanceSampler | ||
| { | ||
| using this_type = HierarchicalLuminanceSampler<LuminanceAccessorT>; | ||
| using scalar_type = typename LuminanceAccessorT::value_type; | ||
| using vector2_type = vector<scalar_type, 2>; | ||
| using vector4_type = vector<scalar_type, 4>; | ||
| using domain_type = vector2_type; | ||
| using codomain_type = vector2_type; | ||
| using weight_type = scalar_type; | ||
| using density_type = scalar_type; | ||
| struct cache_type | ||
| { | ||
| scalar_type rcpPmf; | ||
| }; | ||
|
|
||
| LuminanceAccessorT _map; | ||
| uint16_t2 _lastTexel; | ||
| uint16_t _lastMipLevel : 15; | ||
| uint16_t _aspect2x1 : 1; | ||
|
|
||
| static this_type create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap) | ||
| { | ||
| this_type result; | ||
| result._map = lumaMap; | ||
| const uint16_t2 mapSize = lumaMap.resolution(); | ||
| result._lastTexel = mapSize - uint16_t2(1, 1); | ||
| // Note: We use mapSize.y here because currently the map aspect ratio can only be 1x1 or 2x1 | ||
| result._lastMipLevel = _static_cast<uint16_t>(findMSB(_static_cast<uint32_t>(mapSize.y))); | ||
| result._aspect2x1 = mapSize.x != mapSize.y; | ||
| return result; | ||
| } | ||
|
devshgraphicsprogramming marked this conversation as resolved.
|
||
|
|
||
| static bool __choseSecond(scalar_type first, scalar_type second, NBL_REF_ARG(scalar_type) xi, NBL_REF_ARG(scalar_type) rcpPmf) | ||
| { | ||
| // numerical resilience against IEEE754 | ||
| scalar_type rcpChoiceProb = scalar_type(0); | ||
| PartitionRandVariable<scalar_type> partition; | ||
| partition.leftProb = scalar_type(1) / (scalar_type(1) + (second / first)); | ||
| bool choseSecond = partition(xi, rcpChoiceProb); | ||
| rcpPmf *= rcpChoiceProb; | ||
| return choseSecond; | ||
| } | ||
|
|
||
| // Cannot use textureGather since we need to pass the mipLevel | ||
| vector4_type __texelGather(uint16_t2 coord, uint16_t level) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| assert(coord.x < _lastTexel.x && coord.y < _lastTexel.y); | ||
| scalar_type p0, p1, p2, p3; | ||
| _map.get(p0, coord + uint16_t2(0, 1), level); | ||
| _map.get(p1, coord + uint16_t2(1, 1), level); | ||
| _map.get(p2, coord + uint16_t2(1, 0), level); | ||
| _map.get(p3, coord + uint16_t2(0, 0), level); | ||
| return vector4_type(p0, p1, p2, p3); | ||
| } | ||
|
|
||
| codomain_type generate(const domain_type v, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| uint16_t2 p = uint16_t2(0, 0); | ||
|
|
||
| domain_type xi = v; | ||
| scalar_type rcpPmf = 1; | ||
| if (_aspect2x1) { | ||
| scalar_type p0, p1; | ||
| // do one split in the X axis first cause penultimate full mip would have been 2x1 | ||
| _map.get(p0, uint16_t2(0, 0), _lastMipLevel); | ||
| _map.get(p1, uint16_t2(1, 0), _lastMipLevel); | ||
| p.x = __choseSecond(p0, p1, xi.x, rcpPmf) ? 1 : 0; | ||
| } | ||
|
|
||
| for (int i = _lastMipLevel - 1; i >= 0; i--) | ||
| { | ||
| p <<= 1; | ||
| const vector4_type values = __texelGather(p, i); | ||
| scalar_type wx_0, wx_1; | ||
| { | ||
| const scalar_type wy_0 = values[3] + values[2]; | ||
| const scalar_type wy_1 = values[1] + values[0]; | ||
| if (__choseSecond(wy_0, wy_1, xi.y, rcpPmf)) | ||
| { | ||
| p.y |= 1; | ||
| wx_0 = values[0]; | ||
| wx_1 = values[1]; | ||
| } | ||
| else | ||
| { | ||
| wx_0 = values[3]; | ||
| wx_1 = values[2]; | ||
| } | ||
| } | ||
| if (__choseSecond(wx_0, wx_1, xi.x, rcpPmf)) | ||
| p.x |= 1; | ||
| } | ||
|
|
||
|
|
||
| // If we don`t add xi, the sample will clump to the lowest corner of environment map texel. Each time we call PartitionRandVariable(), the output xi is the new xi that determines how left and right(or top and bottom for y axis) to choose the child partition. It means that if for some input xi, the output xi = 0, then the input xi is the edge of choosing this partition and the previous partition, and vice versa, if output xi = 1, then the input xi is the edge of choosing this partition and the next partition. Hence, by adding xi to the lower corner of the texel, we create a gradual transition from one pixel to another. Without adding output xi, the calculation of jacobian using the difference of sample value would not work. | ||
| // Since we want to do corner sampling. We have to handle edge texels as corner cases. Remember, in corner sampling we map uv [0,1] to [center of first texel, center of last texel]. So when p is an edge texel, we have to remap xi. [0.5, 1] when p == 0, and [0.5, 1] when p == length - 1. | ||
| if (p.x == 0) | ||
| xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); | ||
| if (p.y == 0) | ||
| xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); | ||
| if (p.x == _lastTexel.x) | ||
| xi.x = xi.x * scalar_type(0.5); | ||
| if (p.y == _lastTexel.y) | ||
| xi.y = xi.y * scalar_type(0.5); | ||
|
|
||
| // We reduce by 0.5 and divide with _lastTexel instead of map size to normalize the cornered sampling coordinate | ||
| const vector2_type directionUV = (vector2_type(p.x, p.y) + xi - domain_type(0.5, 0.5)) / _lastTexel; | ||
|
|
||
| cache.rcpPmf = rcpPmf; | ||
|
|
||
| return directionUV; | ||
| } | ||
|
|
||
| density_type forwardPdf(const domain_type xi, const cache_type cache) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return (_lastTexel.x * _lastTexel.y) / cache.rcpPmf; | ||
| } | ||
|
|
||
| weight_type forwardWeight(const domain_type xi, const cache_type cache) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return forwardPdf(xi, cache); | ||
| } | ||
|
|
||
| // Doesn't comply with sampler concept. This class is extracted so can be used on warpmap generation without passing in unnecessary information like avgLuma. So, need to pass in avgLuma when calculating backwardPdf. | ||
| density_type backwardPdf(codomain_type codomainVal) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return _map.load(codomainVal) * _map.getAvgLuma(); | ||
| } | ||
|
|
||
|
devshgraphicsprogramming marked this conversation as resolved.
|
||
| weight_type backwardWeight(const codomain_type codomainVal) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return backwardPdf(codomainVal); | ||
| } | ||
|
|
||
| }; | ||
|
|
||
| // TODO(kevinyu): Add constraint for PostWarpT | ||
| template <typename LuminanceAccessorT, typename PostWarpT | ||
| NBL_PRIMARY_REQUIRES( | ||
| hierarchical_image::MipmappedLuminanceReadAccessor<LuminanceAccessorT> | ||
| ) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when #1001 gets merged, this sampler will be a and your PostWarpT will be required to meet |
||
| struct ComposedHierarchicalSampler | ||
| { | ||
| using this_type = ComposedHierarchicalSampler<LuminanceAccessorT, PostWarpT>; | ||
| using warp_generator_type = HierarchicalLuminanceSampler<LuminanceAccessorT>; | ||
| using scalar_type = typename LuminanceAccessorT::value_type; | ||
| using density_type = scalar_type; | ||
| using weight_type = scalar_type; | ||
| using vector2_type = vector<scalar_type, 2>; | ||
| using vector3_type = vector<scalar_type, 3>; | ||
| using vector4_type = vector<scalar_type, 4>; | ||
| using domain_type = typename warp_generator_type::domain_type; | ||
| using codomain_type = typename PostWarpT::codomain_type; | ||
|
|
||
| static_assert(is_same_v<typename PostWarpT::domain_type, typename warp_generator_type::codomain_type> && is_same_v<typename PostWarpT::density_type, density_type> && is_same_v<typename PostWarpT::weight_type, weight_type>); | ||
|
|
||
| struct cache_type | ||
| { | ||
| typename warp_generator_type::cache_type warpGeneratorCache; | ||
| typename PostWarpT::density_type postWarpPdf; | ||
| }; | ||
|
|
||
| warp_generator_type _warpGenerator; | ||
| PostWarpT _postWarp; | ||
|
|
||
| static this_type create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap) | ||
| { | ||
| this_type result; | ||
| result._warpGenerator = warp_generator_type::create(lumaMap); | ||
| return result; | ||
| } | ||
|
|
||
| codomain_type generate(const domain_type xi, NBL_REF_ARG(cache_type) cache) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| const typename warp_generator_type::codomain_type warpSample = _warpGenerator.generate(xi, cache.warpGeneratorCache); | ||
| typename PostWarpT::cache_type postWarpCache; | ||
| const codomain_type postWarpSample = _postWarp.generate(warpSample, postWarpCache); | ||
|
|
||
| // I have to store the postWarpDensity here, so I don't have to call generate on warpGenerator again just to feed it to PostWarpT, even though for spherical it is unused. | ||
| cache.postWarpPdf = _postWarp.forwardPdf(warpSample, postWarpCache); | ||
|
|
||
| return postWarpSample; | ||
| } | ||
|
|
||
| density_type forwardPdf(const domain_type xi, const cache_type cache) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return _warpGenerator.forwardPdf(xi, cache.warpGeneratorCache) * cache.postWarpPdf; | ||
| } | ||
|
|
||
| weight_type forwardWeight(const domain_type xi, const cache_type cache) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return forwardPdf(xi, cache); | ||
| } | ||
|
|
||
| density_type backwardPdf(const codomain_type codomainVal) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| typename PostWarpT::domain_type postWarpDomain = _postWarp.generateInverse(codomainVal); | ||
| return _postWarp.backwardPdf(codomainVal) * _warpGenerator.backwardPdf(postWarpDomain, _warpGenerator._map.getAvgLuma()); | ||
| } | ||
|
|
||
| weight_type backwardWeight(const codomain_type codomainVal) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return backwardPdf(codomainVal); | ||
| } | ||
| }; | ||
|
|
||
|
|
||
|
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| #ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_ACCESSORS_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_ACCESSORS_INCLUDED_ | ||
|
|
||
| #include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace sampling | ||
| { | ||
| namespace hierarchical_image | ||
| { | ||
|
|
||
| // declare concept | ||
| #define NBL_CONCEPT_NAME MipmappedLuminanceReadAccessor | ||
| #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) | ||
| #define NBL_CONCEPT_TPLT_PRM_NAMES (AccessorT) | ||
| // not the greatest syntax but works | ||
| #define NBL_CONCEPT_PARAM_0 (accessor,AccessorT) | ||
| #define NBL_CONCEPT_PARAM_1 (pixelCoord,uint16_t2) | ||
| #define NBL_CONCEPT_PARAM_2 (level,uint16_t) | ||
| #define NBL_CONCEPT_PARAM_3 (outVal,typename AccessorT::value_type) | ||
| // start concept | ||
| NBL_CONCEPT_BEGIN(4) | ||
| // need to be defined AFTER the concept begins | ||
| #define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 | ||
| #define pixelCoord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 | ||
| #define level NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 | ||
| #define outVal NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 | ||
| NBL_CONCEPT_END( | ||
| ((NBL_CONCEPT_REQ_TYPE)(AccessorT::value_type)) | ||
| // Note(kevin): I don't use MipmappedLoadableImage here, since that concept require layer as parameter. So the sampler have to store the layerIndex. The logic is similar across all layer. So the accessor should be the one that store the layerIndex | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template get(outVal,pixelCoord,level)) , ::nbl::hlsl::is_same_v, void)) | ||
| // Ask(kevin): Should getAvgLuma follow get, where the outVal is the first parameter instead of the return value? | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.getAvgLuma()), ::nbl::hlsl::is_same_v, typename AccessorT::value_type)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.resolution()), ::nbl::hlsl::is_same_v, uint16_t2)) | ||
| ); | ||
| #undef accessor | ||
| #undef pixelCoord | ||
| #undef level | ||
| #undef outVal | ||
| #include <nbl/builtin/hlsl/concepts/__end.hlsl> | ||
|
|
||
| // declare concept | ||
| #define NBL_CONCEPT_NAME LuminanceReadAccessor | ||
| #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) | ||
| #define NBL_CONCEPT_TPLT_PRM_NAMES (AccessorT) | ||
| // not the greatest syntax but works | ||
| #define NBL_CONCEPT_PARAM_0 (accessor, AccessorT) | ||
| // start concept | ||
| NBL_CONCEPT_BEGIN(1) | ||
| // need to be defined AFTER the concept begins | ||
| #define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 | ||
| NBL_CONCEPT_END( | ||
| ((NBL_CONCEPT_REQ_TYPE)(AccessorT::value_type)) | ||
| ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(concepts::accessors::GenericReadAccessor, AccessorT, typename AccessorT::value_type, float32_t2)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.getAvgLuma()), ::nbl::hlsl::is_same_v, typename AccessorT::value_type)) | ||
| ); | ||
| #undef accessor | ||
| #include <nbl/builtin/hlsl/concepts/__end.hlsl> | ||
|
devshgraphicsprogramming marked this conversation as resolved.
|
||
|
|
||
| // gatherUvs return 4 UVs in a square for manual bilinear interpolation with differentiability | ||
| // declare concept | ||
| #define NBL_CONCEPT_NAME WarpAccessor | ||
| #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) | ||
| #define NBL_CONCEPT_TPLT_PRM_NAMES (WarpAccessorT) | ||
| // not the greatest syntax but works | ||
| #define NBL_CONCEPT_PARAM_0 (accessor,WarpAccessorT) | ||
| #define NBL_CONCEPT_PARAM_1 (coord,vector<float32_t, 2>) | ||
| #define NBL_CONCEPT_PARAM_2 (val, matrix<typename WarpAccessorT::scalar_type, 4, 2>) | ||
| #define NBL_CONCEPT_PARAM_3 (interpolant, vector<typename WarpAccessorT::scalar_type, 2>) | ||
| // start concept | ||
| NBL_CONCEPT_BEGIN(4) | ||
| // need to be defined AFTER the concept begins | ||
| #define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 | ||
| #define coord NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 | ||
| #define val NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 | ||
| #define interpolant NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 | ||
| NBL_CONCEPT_END( | ||
| ((NBL_CONCEPT_REQ_TYPE)(WarpAccessorT::scalar_type)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.gatherUv(coord, val)), ::nbl::hlsl::is_same_v, void)) | ||
| ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.resolution()), ::nbl::hlsl::is_same_v, uint16_t2)) | ||
| ); | ||
| #undef accessor | ||
| #undef coord | ||
| #undef val | ||
| #undef interpolant | ||
| #include <nbl/builtin/hlsl/concepts/__end.hlsl> | ||
|
devshgraphicsprogramming marked this conversation as resolved.
|
||
|
|
||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| #ifndef _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ | ||
| #define _NBL_HLSL_SAMPLING_HIERARCHICAL_IMAGE_COMMON_INCLUDED_ | ||
|
|
||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
|
||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace sampling | ||
| { | ||
| namespace hierarchical_image | ||
| { | ||
|
|
||
| NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float32_t3 LumaRgbCoefficients = { 0.2126729f, 0.7151522f, 0.0721750f }; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use numbers from the OETF/EOTF headers |
||
|
|
||
| struct SLumaGenPushConstants | ||
| { | ||
| uint32_t lumaMapWidth : 16; | ||
| uint32_t lumaMapHeight : 16; | ||
| uint16_t lumaMapLayer; | ||
| }; | ||
|
|
||
| struct SWarpGenPushConstants | ||
| { | ||
| uint32_t lumaMapWidth : 16; | ||
| uint32_t lumaMapHeight : 16; | ||
| uint32_t warpMapWidth : 16; | ||
| uint32_t warpMapHeight : 16; | ||
|
devshgraphicsprogramming marked this conversation as resolved.
|
||
| // Both warpMap and lumaMap should have the same layer count | ||
| uint16_t lumaMapLayer; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you cannot use uint16 in push constants (AMD), have to use uint32_t and use a bitfield instead |
||
| }; | ||
|
|
||
| NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GenWarpWorkgroupDim = 16; | ||
| NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t GenLumaWorkgroupDim = 16; | ||
|
|
||
|
devshgraphicsprogramming marked this conversation as resolved.
|
||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its not the last, its the penultimate (one minus last) because you're not after the last one which is 1x1, you're tapping 2x1 or 2x2