diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs index 5660df68a..3049ea132 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs @@ -73,7 +73,7 @@ namespace Ryujinx.Graphics.Gpu.Engine } else { - unsafe int Convert(Span dstSpan, ReadOnlySpan srcSpan) where T : unmanaged + unsafe bool Convert(Span dstSpan, ReadOnlySpan srcSpan) where T : unmanaged { fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) { @@ -91,10 +91,10 @@ namespace Ryujinx.Graphics.Gpu.Engine } } } - return 1; + return true; } - int _ = srcBpp switch + bool _ = srcBpp switch { 1 => Convert(dstSpan, srcSpan), 2 => Convert(dstSpan, srcSpan), diff --git a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs index c393cc474..2227e5ffc 100644 --- a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs +++ b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs @@ -35,6 +35,7 @@ namespace Ryujinx.Graphics.Texture // Variables for built in iteration. private int _yPart; + private int _yzPart; private int _zPart; public BlockLinearLayout( @@ -105,13 +106,14 @@ namespace Ryujinx.Graphics.Texture public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) { // Justification: - // The offset is a combination of separate x and y parts. + // The 2D offset is a combination of separate x and y parts. // Both components increase with input and never overlap bits. - // Therefore for each component, the minimum input value is the lowest that component can go. Opposite goes for maximum. + // Therefore for each component, the minimum input value is the lowest that component can go. + // Minimum total value is minimum X component + minimum Y component. Similar goes for maximum. int start = GetOffset(x, y, 0); - int end = GetOffset(x + width, y + height, 0); - return (start, (end - start) + _texBpp); + int end = GetOffset(x + width - 1, y + height - 1, 0) + _texBpp; // Cover the last pixel. + return (start, end - start); } public bool LayoutMatches(BlockLinearLayout other) @@ -134,6 +136,7 @@ namespace Ryujinx.Graphics.Texture offset += ((y & 0x01) >> 0) << 4; _yPart = offset; + _yzPart = offset + _zPart; } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -144,24 +147,45 @@ namespace Ryujinx.Graphics.Texture offset += ((z & _bdMask) * GobSize) << _bhShift; _zPart = offset; + _yzPart = offset + _yPart; + } + + /// + /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 16. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffsetWithLineOffset16(int x) + { + int offset = (x / GobStride) << _xShift; + + offset += ((x & 0x3f) >> 5) << 8; + offset += ((x & 0x1f) >> 4) << 5; + + return offset + _yzPart; + } + + /// + /// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 64. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffsetWithLineOffset64(int x) + { + int offset = (x / GobStride) << _xShift; + + return offset + _yzPart; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffsetWithLineOffset(int x) + public int GetOffset(int x) { + x <<= _bppShift; int offset = (x / GobStride) << _xShift; offset += ((x & 0x3f) >> 5) << 8; offset += ((x & 0x1f) >> 4) << 5; offset += (x & 0x0f); - return offset + _yPart + _zPart; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffset(int x) - { - return GetOffsetWithLineOffset(x << _bppShift); + return offset + _yzPart; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index 516b325c2..95768aede 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -65,10 +65,14 @@ namespace Ryujinx.Graphics.Texture int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); + int xStart = strideTrunc / bytesPerPixel; int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int outStrideGap = stride - w * bytesPerPixel; + int alignment = gobWidth; if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) @@ -86,13 +90,14 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ, bytesPerPixel); - unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged + unsafe bool Convert(Span output, ReadOnlySpan data) where T : unmanaged { - fixed (byte* outputBPtr = output, dataBPtr = data) + fixed (byte* outputPtr = output, dataPtr = data) { + byte* outPtr = outputPtr + outOffs; for (int layer = 0; layer < layers; layer++) { - int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); + byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); for (int z = 0; z < d; z++) { @@ -100,51 +105,58 @@ namespace Ryujinx.Graphics.Texture for (int y = 0; y < h; y++) { layoutConverter.SetY(y); - for (int x = 0; x < strideTrunc; x += 16) - { - int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x); - *(Vector128*)(outputBPtr + outOffs + x) = *(Vector128*)(dataBPtr + offset); + for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) + { + byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)offset; + Vector128 value2 = *(Vector128*)offset2; + Vector128 value3 = *(Vector128*)offset3; + Vector128 value4 = *(Vector128*)offset4; + + *(Vector128*)outPtr = value; + *(Vector128*)(outPtr + 16) = value2; + *(Vector128*)(outPtr + 32) = value3; + *(Vector128*)(outPtr + 48) = value4; } - for (int x = xStart; x < w; x++) + for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) { - int offset = inBaseOffset + layoutConverter.GetOffset(x); + byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); - ((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset); + *(Vector128*)outPtr = *(Vector128*)offset; } - outOffs += stride; + for (int x = xStart; x < w; x++, outPtr += bytesPerPixel) + { + byte* offset = inBaseOffset + layoutConverter.GetOffset(x); + + *(T*)outPtr = *(T*)offset; + } + + outPtr += outStrideGap; } } } + outOffs += stride * h * d * layers; } + return true; } - switch (bytesPerPixel) + bool _ = bytesPerPixel switch { - case 1: - Convert(output, data); - break; - case 2: - Convert(output, data); - break; - case 4: - Convert(output, data); - break; - case 8: - Convert(output, data); - break; - case 12: - Convert(output, data); - break; - case 16: - Convert>(output, data); - break; - - default: - throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); - } + 1 => Convert(output, data), + 2 => Convert(output, data), + 4 => Convert(output, data), + 8 => Convert(output, data), + 12 => Convert(output, data), + 16 => Convert>(output, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; } return output; } @@ -162,52 +174,19 @@ namespace Ryujinx.Graphics.Texture int h = BitUtils.DivRoundUp(height, blockHeight); int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int lineSize = w * bytesPerPixel; Span output = new byte[h * outStride]; int outOffs = 0; + int inOffs = 0; - unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged + for (int y = 0; y < h; y++) { - fixed (byte* outputBPtr = output, dataBPtr = data) - { - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++) - { - int offset = y * stride + x * bytesPerPixel; + data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); - ((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset); - } - - outOffs += outStride; - } - } - } - - switch (bytesPerPixel) - { - case 1: - Convert(output, data); - break; - case 2: - Convert(output, data); - break; - case 4: - Convert(output, data); - break; - case 8: - Convert(output, data); - break; - case 12: - Convert(output, data); - break; - case 16: - Convert>(output, data); - break; - - default: - throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); + inOffs += stride; + outOffs += outStride; } return output; @@ -257,8 +236,16 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ >>= 1; } + int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); + + int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); + + int xStart = strideTrunc / bytesPerPixel; + int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int inStrideGap = stride - w * bytesPerPixel; + int alignment = gobWidth; if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) @@ -276,13 +263,14 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ, bytesPerPixel); - unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged + unsafe bool Convert(Span output, ReadOnlySpan data) where T : unmanaged { - fixed (byte* outputBPtr = output, dataBPtr = data) + fixed (byte* outputPtr = output, dataPtr = data) { + byte* inPtr = dataPtr + inOffs; for (int layer = 0; layer < layers; layer++) { - int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); + byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); for (int z = 0; z < d; z++) { @@ -290,44 +278,58 @@ namespace Ryujinx.Graphics.Texture for (int y = 0; y < h; y++) { layoutConverter.SetY(y); - for (int x = 0; x < w; x++) - { - int offset = outBaseOffset + layoutConverter.GetOffset(x); - *(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x]; + for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) + { + byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)inPtr; + Vector128 value2 = *(Vector128*)(inPtr + 16); + Vector128 value3 = *(Vector128*)(inPtr + 32); + Vector128 value4 = *(Vector128*)(inPtr + 48); + + *(Vector128*)offset = value; + *(Vector128*)offset2 = value2; + *(Vector128*)offset3 = value3; + *(Vector128*)offset4 = value4; } - inOffs += stride; + for (int x = 0; x < strideTrunc; x += 16, inPtr += 16) + { + byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128*)offset = *(Vector128*)inPtr; + } + + for (int x = xStart; x < w; x++, inPtr += bytesPerPixel) + { + byte* offset = outBaseOffset + layoutConverter.GetOffset(x); + + *(T*)offset = *(T*)inPtr; + } + + inPtr += inStrideGap; } } } + inOffs += stride * h * d * layers; } + return true; } - switch (bytesPerPixel) + bool _ = bytesPerPixel switch { - case 1: - Convert(output, data); - break; - case 2: - Convert(output, data); - break; - case 4: - Convert(output, data); - break; - case 8: - Convert(output, data); - break; - case 12: - Convert(output, data); - break; - case 16: - Convert>(output, data); - break; - - default: - throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); - } + 1 => Convert(output, data), + 2 => Convert(output, data), + 4 => Convert(output, data), + 8 => Convert(output, data), + 12 => Convert(output, data), + 16 => Convert>(output, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; } return output; @@ -342,56 +344,23 @@ namespace Ryujinx.Graphics.Texture int bytesPerPixel, ReadOnlySpan data) { - int w = BitUtils.DivRoundUp(width, blockWidth); + int w = BitUtils.DivRoundUp(width, blockWidth); int h = BitUtils.DivRoundUp(height, blockHeight); int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int lineSize = width * bytesPerPixel; Span output = new byte[h * stride]; int inOffs = 0; + int outOffs = 0; - unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged + for (int y = 0; y < h; y++) { - fixed (byte* outputBPtr = output, dataBPtr = data) - { - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++) - { - int offset = y * stride + x * bytesPerPixel; + data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); - *(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x]; - } - - inOffs += inStride; - } - } - } - - switch (bytesPerPixel) - { - case 1: - Convert(output, data); - break; - case 2: - Convert(output, data); - break; - case 4: - Convert(output, data); - break; - case 8: - Convert(output, data); - break; - case 12: - Convert(output, data); - break; - case 16: - Convert>(output, data); - break; - - default: - throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); + inOffs += inStride; + outOffs += stride; } return output; diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs index 6cf646d9c..4817e50ad 100644 --- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs +++ b/Ryujinx.Graphics.Texture/OffsetCalculator.cs @@ -84,24 +84,11 @@ namespace Ryujinx.Graphics.Texture } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public int GetOffsetWithLineOffset(int x) - { - if (_isLinear) - { - return x + _yPart; - } - else - { - return _layoutConverter.GetOffsetWithLineOffset(x); - } - } - public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) { if (_isLinear) { - return (y * _stride + x, height * _stride); + return (y * _stride + x * _bytesPerPixel, height * _stride); } else {