UE SSR Realization

pre frame reduction

SSRTPrevFrameReduction.usf

用来生成mip map的source texture 都是上一帧的depth 和 color

  • 根据screen space 的 velocity 计算上一帧的uv
      float4 EncodedVelocity = GBufferVelocityTexture.SampleLevel(GBufferVelocityTextureSampler, SceneBufferUV, 0);
      if (EncodedVelocity.x > 0.0)
      {
          PrevScreen = ThisClip.xy - DecodeVelocityFromTexture(EncodedVelocity).xy;
      }
    
  • mip map level = 5
  • 第一个pass生成mip map level 0-1 level 0 就直接是pre frame 的color texture的下一级采样了,等于不再浪费一个level去存原始的color值,这难道就是传说中的SSR在half resolution里做。 这里的操作很骚气,每个pass最多只生成3个level,第一个pass只生成2个level,所以只输出最后2个color_output。
  • 第二个pass生成mip map level 2-4

    判断是否计算天空

    bool bIsSky = WorldDepth > SkyDistance && SkyDistance > 0.0;
    

    depth

    取了最大,这个有点奇怪,按理说不是最小的depth么? 并不是在这个pass生成的hi-z, 但是也是取了四个pixel中的最大depth。 后来看了下,应该是做SSRT的时候用的是minimum depth,这里计算权重的时候还是用了最大的depth.

color and alpha

alpha应该是为半透明物体的渲染时用的。 其实这里看实现真的有点诡异的。

  • 取前一帧的原color值时 这里是根据uv和depth反算出世界坐标,然后计算到上一级的世界坐标的平方,再除以radius平方来作为weight的,讲道理这四个weight加一起也不一定是1,这样直接加权会不会有问题。

      float WorldDepthToPixelWorldRadius = GetTanHalfFieldOfView().x * View.ViewSizeAndInvSize.z * 100; //  这边的100倍是随意弄出来的,还是有啥含义额
     // 这里的radius计算也是有点迷,不太理解这是什么含义,这里是拿world_depth / near_plane_depth么
      float WorldBluringRadius = WorldDepthToPixelWorldRadius * WorldDepth;
      float InvSquareWorldBluringRadius = rcp(WorldBluringRadius * WorldBluringRadius);
    
      float2 SampleUV = PrevFrameUV; // + View.BufferSizeAndInvSize.zw * (float2(i % 2, i / 2) - 0.5);
                          SampleUV = clamp(SampleUV, PrevBufferBilinearUVMinMax.xy, PrevBufferBilinearUVMinMax.zw);
    
      float PrevDeviceZ = PrevSceneDepth.SampleLevel(PrevSceneDepthSampler, SampleUV, 0).r;
    
      float3 SampleWorldPosition = ComputeTranslatedWorldPosition(PrevScreen.xy, ConvertFromDeviceZ(PrevDeviceZ), /* bIsPrevFrame = */ true);
    
      float SampleDistSquare = length2(RefWorldPosition - SampleWorldPosition);
    
      float SampleWeight = saturate(1 - SampleDistSquare * InvSquareWorldBluringRadius);
    
      PrevColor = float4(PrevSceneColor.SampleLevel(PrevSceneColorSampler, SampleUV, 0).rgb * SampleWeight, SampleWeight);
    
  • mip map level 0-4 这里的权重计算方式跟上面还不一样,是直接用depth相减然后除以radius平方,最后还除以一个4,这是什么骚操作额,看不懂,难道是因为不好再反计算world position了,总之感觉有点怪。

      UNROLL
      for (uint x = 0; x < 2; x++)
      {
          UNROLL
          for (uint y = 0; y < 2; y++)
          {
              ....
    
              float SampleWeight = 1.0;
              #if DIM_LEAK_FREE
              {
                  float NeighborDepth = SharedFurthestDepth[LDSIndex];
                  float SampleDist = (FurthestDepth - NeighborDepth);
    
                  SampleWeight = saturate(1 - (SampleDist * SampleDist) * InvSquareWorldBluringRadius);
              }
              #endif
    
              ReducedColor += Color * SampleWeight;
          }
      }
    
      ReducedColor *= rcp(4.0);
    

tile-based classification

搜了下感觉这个代码是不是废弃了,并没有人调用,看逻辑感觉也有点诡异。 SSRTTileClassification.usf & SSRTTileClassificationBuffer.ush

分组

8x8个像素为一个tile,每个tile分成8个方向去计算。取8x8中的(4,4)位置作为代表计算。

// GROUP_TILE_SIZE 8
// SSRT_DIRECTIONALITY_DIVISION 8
[numthreads(GROUP_TILE_SIZE, GROUP_TILE_SIZE, SSRT_DIRECTIONALITY_DIVISION)]

uint2 TileCoord = DispatchThreadId;
    uint DirectionId = GroupThreadId.z;

float2 ViewportUV = (TileCoord * GROUP_TILE_SIZE + GROUP_TILE_SIZE / 2) * View.ViewSizeAndInvSize.zw;

float DirectionAndle = float(DirectionId) * (2 * PI * rcp(float(SSRT_DIRECTIONALITY_DIVISION)));

classification

讲道理这边应该用furthest depth.

  • 计算screen space 下的 start, direction, end

        float2 RayStartScreen = ViewportUVToScreenPos(ViewportUV);
      float2 RayStepScreen = normalize(View.ViewSizeAndInvSize.zw * RayPixelDirection);
      RayStepScreen *= GetStepScreenFactorToClipAtScreenEdge(RayStartScreen, RayStepScreen);
    
      float2 RayEndViewportUV = ScreenPosToViewportUV(RayStartScreen + RayStepScreen);
    
      float2 RayStartPixel = ViewportUV * View.ViewSizeAndInvSize.xy;
      float2 RayEndPixel = RayEndViewportUV * View.ViewSizeAndInvSize.xy;
      float MaxSampleDistance = length(RayStartPixel - RayEndPixel);
    
  • ray marching 每个方向走五步,步子越来越大,每一步计算周围的最小depth,除以delta_width, 计算一个最大的角度,存储这个最大的角度和tile代表pixel的最小depth. 这个有啥参考价值么,思路感觉有点怪呀~

ray discard

根据之前计算的最大角度去剔除一些ray marching direction.

bool TestRayEarlyReturn(FSSRTTileInfos TileInfos, FSSRTRay Ray)
{
    float2 RayStepPixel = Ray.RayStepScreen.xy * View.ViewSizeAndInvSize.xy;

    float DeltaU = length(RayStepPixel) * (0.5 * View.ViewSizeAndInvSize.z);
    float DeltaZ = Ray.RayStepScreen.z;
    float RayTheta = atan2(DeltaU, -DeltaZ);

    uint DirectionId =ComputeRayDirectionId(Ray);
    bool bEarlyReturn = RayTheta > TileInfos.Directionality[DirectionId];

    return bEarlyReturn;
}

deal with gbuffer data

  • roughness 这一步是为了啥??
      float GetRoughnessFade(in float Roughness)
      {
          // mask SSR to reduce noise and for better performance, roughness of 0 should have SSR, at MaxRoughness we fade to 0
          return min(Roughness * SSRParams.y + 2, 1.0);
      }
    

SSR quality

different quality 决定

  • num of steps
  • num of rays
  • b_glossy ?? 只 trace 一条光线的时候 是否用 GGX importance sampling
#if SSR_QUALITY == 1
    uint NumSteps = 8;
    uint NumRays = 1;
    bool bGlossy = false;
#elif SSR_QUALITY == 2
    uint NumSteps = 16;
    uint NumRays = 1;
    #if SSR_OUTPUT_FOR_DENOISER
        bool bGlossy = true;
    #else
        bool bGlossy = false;
    #endif
#elif SSR_QUALITY == 3
    uint NumSteps = 8;
    uint NumRays = 4;
    bool bGlossy = true;
#else // SSR_QUALITY == 4
    uint NumSteps = 12;
    uint NumRays = 12;
    bool bGlossy = true;
#endif

importance sampling

half vector

  • GGX of visible normal
  • GGX ```cpp float3x3 TangentBasis = GetTangentBasis( N ); float3 TangentV = mul( TangentBasis, V );

float2 E = Hammersley16( i, NumRays, Random );

float3 H = mul( ImportanceSampleVisibleGGX(UniformSampleDisk(E), roughnessroughness, TangentV ).xyz, TangentBasis ); float3 L = 2 dot( V, H ) * H - V;

```cpp
// [ Heitz 2018, "Sampling the GGX Distribution of Visible Normals" ]
// http://jcgt.org/published/0007/04/01/
// PDF = G_SmithV * VoH * D / NoV / (4 * VoH)
// PDF = G_SmithV * D / (4 * NoV)
float4 ImportanceSampleVisibleGGX( float2 DiskE, float a2, float3 V )
{
    // NOTE: See below for anisotropic version that avoids this sqrt
    float a = sqrt(a2);

    // stretch
    float3 Vh = normalize( float3( a * V.xy, V.z ) );

    // Orthonormal basis
    // Tangent0 is orthogonal to N.
    #if 1 // Stable tangent basis based on V.
        float3 Tangent0 = (V.z < 0.9999) ? normalize( cross( float3(0, 0, 1), V ) ) : float3(1, 0, 0);
        float3 Tangent1 = normalize(cross( Vh, Tangent0 ));
    #else
        float3 Tangent0 = (Vh.z < 0.9999) ? normalize( cross( float3(0, 0, 1), Vh ) ) : float3(1, 0, 0);
        float3 Tangent1 = cross( Vh, Tangent0 );
    #endif

    float2 p = DiskE;
    float s = 0.5 + 0.5 * Vh.z;
    p.y = (1 - s) * sqrt( 1 - p.x * p.x ) + s * p.y;

    float3 H;
    H  = p.x * Tangent0;
    H += p.y * Tangent1;
    H += sqrt( saturate( 1 - dot( p, p ) ) ) * Vh;

    // unstretch
    H = normalize( float3( a * H.xy, max(0.0, H.z) ) );

    return float4(H, VisibleGGXPDF(V, H, a2));
}

float2 Hammersley16( uint Index, uint NumSamples, uint2 Random )
{
    float E1 = frac( (float)Index / NumSamples + float( Random.x ) * (1.0 / 65536.0) );
    float E2 = float( ( reversebits(Index) >> 16 ) ^ Random.y ) * (1.0 / 65536.0);
    return float2( E1, E2 );
}

float2 UniformSampleDisk( float2 E )
{
    float Theta = 2 * PI * E.x;
    float Radius = sqrt( E.y );
    return Radius * float2( cos( Theta ), sin( Theta ) );
}

// [ Duff et al. 2017, "Building an Orthonormal Basis, Revisited" ]
// Discontinuity at TangentZ.z == 0

float3x3 GetTangentBasis( float3 TangentZ )
{
    const float Sign = TangentZ.z >= 0 ? 1 : -1;
    const float a = -rcp( Sign + TangentZ.z );
    const float b = TangentZ.x * TangentZ.y * a;

    float3 TangentX = { 1 + Sign * a * Pow2( TangentZ.x ), Sign * b, -Sign * TangentZ.x };
    float3 TangentY = { b,  Sign + a * Pow2( TangentZ.y ), -TangentZ.y };

    return float3x3( TangentX, TangentY, TangentZ );
}

incident light

  • cos hemisphere
  • uniform concentric disk

jitter

InterleavedGradientNoise 作为step_offset

// high frequency dither pattern appearing almost random without banding steps
//note: from "NEXT GENERATION POST PROCESSING IN CALL OF DUTY: ADVANCED WARFARE"
//      http://advances.realtimerendering.com/s2014/index.html
// Epic extended by FrameId
// ~7 ALU operations (2 frac, 3 mad, 2 *)
// @return 0..1
float InterleavedGradientNoise( float2 uv, float FrameId )
{
    // magic values are found by experimentation
    uv += FrameId * (float2(47, 17) * 0.695f);

    const float3 magic = float3( 0.06711056f, 0.00583715f, 52.9829189f );
    return frac(magic.z * frac(dot(uv, magic.xy)));
}

ray marching

transform a ray for screen space ray casting

  • 计算一条ray的world space的 start 和 end point
  • 将world space 下的 start 和 end 转换到 screen space --> RayStartScreen
  • 计算screen space下的trace direction --> RayStepScreen
  • 计算 CompareTolerance 是做啥的 ???

    FSSRTRay InitScreenSpaceRayFromWorldSpace(
      float3 RayOriginTranslatedWorld,
      float3 WorldRayDirection,
      float WorldTMax,
      float SceneDepth,
      float SlopeCompareToleranceScale,
      const bool bExtendRayToScreenBorder,
      out bool bRayWasClipped)
    {
      WorldTMax = min(WorldTMax, 1000000);
    
      float3 ViewRayDirection = mul(float4(WorldRayDirection, 0.0), View.TranslatedWorldToView).xyz;
    
      float RayEndWorldDistance = ViewRayDirection.z < 0.0 ? min(-0.95 * SceneDepth / ViewRayDirection.z, WorldTMax) : WorldTMax;
    
      float3 RayEndWorld = RayOriginTranslatedWorld + WorldRayDirection * RayEndWorldDistance;
    
      float4 RayStartClip = mul(float4(RayOriginTranslatedWorld, 1.0), View.TranslatedWorldToClip);
      float4 RayEndClip = mul(float4(RayEndWorld, 1.0), View.TranslatedWorldToClip);
    
      float3 RayStartScreen = RayStartClip.xyz * rcp(RayStartClip.w);
      float3 RayEndScreen = RayEndClip.xyz * rcp(RayEndClip.w);
    
      float4 RayDepthClip = RayStartClip + mul(float4(0, 0, RayEndWorldDistance, 0), View.ViewToClip);
      float3 RayDepthScreen = RayDepthClip.xyz * rcp(RayDepthClip.w);
    
      FSSRTRay Ray;
      Ray.RayStartScreen = RayStartScreen;
      Ray.RayStepScreen = RayEndScreen - RayStartScreen;
    
      float ClipToScreenFactor = GetStepScreenFactorToClipAtScreenEdge(RayStartScreen.xy, Ray.RayStepScreen.xy);
      if (!bExtendRayToScreenBorder)
      {
          bRayWasClipped = ClipToScreenFactor < 1.0 || RayEndWorldDistance != WorldTMax;
          ClipToScreenFactor = min(ClipToScreenFactor, 1.0);
      }
      else
      {
          bRayWasClipped = true;
      }
    
      Ray.RayStepScreen *= ClipToScreenFactor;
      Ray.CompareTolerance = max(abs(Ray.RayStepScreen.z), (RayStartScreen.z - RayDepthScreen.z) * SlopeCompareToleranceScale);
    
      return Ray;
    }
    

    ray marching

  • mip map level
    Level += (8.0 / NumSteps) * Roughness;
    
  • 四个step作为一把batch一起算
  • 计算hit的时候用二分法,还用了前后两步插值

hit result

  • 所有rays的结果累加求平均
  • 剩下的是在做啥呦
    OutColor.rgb *= rcp( 1 - Luminance(OutColor.rgb) )
    OutColor *= RoughnessFade;
    OutColor *= SSRParams.r;
    
Copyright © tingxia.top 2021 all right reserved,powered by Gitbook该文件修订时间: 2022-10-05 11:24:53

results matching ""

    No results matching ""