Fix a few things, 64 byte block fast copy.
This commit is contained in:
parent
8a7e25de71
commit
36211f2caa
4 changed files with 153 additions and 173 deletions
|
@ -73,7 +73,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
|||
}
|
||||
else
|
||||
{
|
||||
unsafe int Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
|
||||
unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
|
||||
{
|
||||
fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
|
||||
{
|
||||
|
@ -91,10 +91,10 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
|||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int _ = srcBpp switch
|
||||
bool _ = srcBpp switch
|
||||
{
|
||||
1 => Convert<byte>(dstSpan, srcSpan),
|
||||
2 => Convert<ushort>(dstSpan, srcSpan),
|
||||
|
|
|
@ -35,6 +35,7 @@ namespace Ryujinx.Graphics.Texture
|
|||
|
||||
// Variables for built in iteration.
|
||||
private int _yPart;
|
||||
private int _yzPart;
|
||||
private int _zPart;
|
||||
|
||||
public BlockLinearLayout(
|
||||
|
@ -105,13 +106,14 @@ namespace Ryujinx.Graphics.Texture
|
|||
public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
|
||||
{
|
||||
// Justification:
|
||||
// The offset is a combination of separate x and y parts.
|
||||
// The 2D offset is a combination of separate x and y parts.
|
||||
// Both components increase with input and never overlap bits.
|
||||
// Therefore for each component, the minimum input value is the lowest that component can go. Opposite goes for maximum.
|
||||
// Therefore for each component, the minimum input value is the lowest that component can go.
|
||||
// Minimum total value is minimum X component + minimum Y component. Similar goes for maximum.
|
||||
|
||||
int start = GetOffset(x, y, 0);
|
||||
int end = GetOffset(x + width, y + height, 0);
|
||||
return (start, (end - start) + _texBpp);
|
||||
int end = GetOffset(x + width - 1, y + height - 1, 0) + _texBpp; // Cover the last pixel.
|
||||
return (start, end - start);
|
||||
}
|
||||
|
||||
public bool LayoutMatches(BlockLinearLayout other)
|
||||
|
@ -134,6 +136,7 @@ namespace Ryujinx.Graphics.Texture
|
|||
offset += ((y & 0x01) >> 0) << 4;
|
||||
|
||||
_yPart = offset;
|
||||
_yzPart = offset + _zPart;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
|
@ -144,24 +147,45 @@ namespace Ryujinx.Graphics.Texture
|
|||
offset += ((z & _bdMask) * GobSize) << _bhShift;
|
||||
|
||||
_zPart = offset;
|
||||
_yzPart = offset + _yPart;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 16.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int GetOffsetWithLineOffset16(int x)
|
||||
{
|
||||
int offset = (x / GobStride) << _xShift;
|
||||
|
||||
offset += ((x & 0x3f) >> 5) << 8;
|
||||
offset += ((x & 0x1f) >> 4) << 5;
|
||||
|
||||
return offset + _yzPart;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 64.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int GetOffsetWithLineOffset64(int x)
|
||||
{
|
||||
int offset = (x / GobStride) << _xShift;
|
||||
|
||||
return offset + _yzPart;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int GetOffsetWithLineOffset(int x)
|
||||
public int GetOffset(int x)
|
||||
{
|
||||
x <<= _bppShift;
|
||||
int offset = (x / GobStride) << _xShift;
|
||||
|
||||
offset += ((x & 0x3f) >> 5) << 8;
|
||||
offset += ((x & 0x1f) >> 4) << 5;
|
||||
offset += (x & 0x0f);
|
||||
|
||||
return offset + _yPart + _zPart;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int GetOffset(int x)
|
||||
{
|
||||
return GetOffsetWithLineOffset(x << _bppShift);
|
||||
return offset + _yzPart;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -65,10 +65,14 @@ namespace Ryujinx.Graphics.Texture
|
|||
|
||||
int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
|
||||
|
||||
int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
|
||||
|
||||
int xStart = strideTrunc / bytesPerPixel;
|
||||
|
||||
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
||||
|
||||
int outStrideGap = stride - w * bytesPerPixel;
|
||||
|
||||
int alignment = gobWidth;
|
||||
|
||||
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
|
||||
|
@ -86,13 +90,14 @@ namespace Ryujinx.Graphics.Texture
|
|||
mipGobBlocksInZ,
|
||||
bytesPerPixel);
|
||||
|
||||
unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
||||
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
||||
{
|
||||
fixed (byte* outputBPtr = output, dataBPtr = data)
|
||||
fixed (byte* outputPtr = output, dataPtr = data)
|
||||
{
|
||||
byte* outPtr = outputPtr + outOffs;
|
||||
for (int layer = 0; layer < layers; layer++)
|
||||
{
|
||||
int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
|
||||
byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
|
||||
|
||||
for (int z = 0; z < d; z++)
|
||||
{
|
||||
|
@ -100,51 +105,58 @@ namespace Ryujinx.Graphics.Texture
|
|||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
layoutConverter.SetY(y);
|
||||
for (int x = 0; x < strideTrunc; x += 16)
|
||||
{
|
||||
int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x);
|
||||
|
||||
*(Vector128<byte>*)(outputBPtr + outOffs + x) = *(Vector128<byte>*)(dataBPtr + offset);
|
||||
for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
|
||||
{
|
||||
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
|
||||
byte* offset2 = offset + 0x20;
|
||||
byte* offset3 = offset + 0x100;
|
||||
byte* offset4 = offset + 0x120;
|
||||
|
||||
Vector128<byte> value = *(Vector128<byte>*)offset;
|
||||
Vector128<byte> value2 = *(Vector128<byte>*)offset2;
|
||||
Vector128<byte> value3 = *(Vector128<byte>*)offset3;
|
||||
Vector128<byte> value4 = *(Vector128<byte>*)offset4;
|
||||
|
||||
*(Vector128<byte>*)outPtr = value;
|
||||
*(Vector128<byte>*)(outPtr + 16) = value2;
|
||||
*(Vector128<byte>*)(outPtr + 32) = value3;
|
||||
*(Vector128<byte>*)(outPtr + 48) = value4;
|
||||
}
|
||||
|
||||
for (int x = xStart; x < w; x++)
|
||||
for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
|
||||
{
|
||||
int offset = inBaseOffset + layoutConverter.GetOffset(x);
|
||||
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
|
||||
|
||||
((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset);
|
||||
*(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
|
||||
}
|
||||
|
||||
outOffs += stride;
|
||||
for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
|
||||
{
|
||||
byte* offset = inBaseOffset + layoutConverter.GetOffset(x);
|
||||
|
||||
*(T*)outPtr = *(T*)offset;
|
||||
}
|
||||
|
||||
outPtr += outStrideGap;
|
||||
}
|
||||
}
|
||||
}
|
||||
outOffs += stride * h * d * layers;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (bytesPerPixel)
|
||||
bool _ = bytesPerPixel switch
|
||||
{
|
||||
case 1:
|
||||
Convert<byte>(output, data);
|
||||
break;
|
||||
case 2:
|
||||
Convert<ushort>(output, data);
|
||||
break;
|
||||
case 4:
|
||||
Convert<uint>(output, data);
|
||||
break;
|
||||
case 8:
|
||||
Convert<ulong>(output, data);
|
||||
break;
|
||||
case 12:
|
||||
Convert<Bpp12Pixel>(output, data);
|
||||
break;
|
||||
case 16:
|
||||
Convert<Vector128<byte>>(output, data);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.");
|
||||
}
|
||||
1 => Convert<byte>(output, data),
|
||||
2 => Convert<ushort>(output, data),
|
||||
4 => Convert<uint>(output, data),
|
||||
8 => Convert<ulong>(output, data),
|
||||
12 => Convert<Bpp12Pixel>(output, data),
|
||||
16 => Convert<Vector128<byte>>(output, data),
|
||||
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
|
||||
};
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
@ -162,52 +174,19 @@ namespace Ryujinx.Graphics.Texture
|
|||
int h = BitUtils.DivRoundUp(height, blockHeight);
|
||||
|
||||
int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
||||
int lineSize = w * bytesPerPixel;
|
||||
|
||||
Span<byte> output = new byte[h * outStride];
|
||||
|
||||
int outOffs = 0;
|
||||
int inOffs = 0;
|
||||
|
||||
unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
fixed (byte* outputBPtr = output, dataBPtr = data)
|
||||
{
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int offset = y * stride + x * bytesPerPixel;
|
||||
data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
|
||||
|
||||
((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset);
|
||||
}
|
||||
|
||||
outOffs += outStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (bytesPerPixel)
|
||||
{
|
||||
case 1:
|
||||
Convert<byte>(output, data);
|
||||
break;
|
||||
case 2:
|
||||
Convert<ushort>(output, data);
|
||||
break;
|
||||
case 4:
|
||||
Convert<uint>(output, data);
|
||||
break;
|
||||
case 8:
|
||||
Convert<ulong>(output, data);
|
||||
break;
|
||||
case 12:
|
||||
Convert<Bpp12Pixel>(output, data);
|
||||
break;
|
||||
case 16:
|
||||
Convert<Vector128<byte>>(output, data);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.");
|
||||
inOffs += stride;
|
||||
outOffs += outStride;
|
||||
}
|
||||
|
||||
return output;
|
||||
|
@ -257,8 +236,16 @@ namespace Ryujinx.Graphics.Texture
|
|||
mipGobBlocksInZ >>= 1;
|
||||
}
|
||||
|
||||
int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
|
||||
|
||||
int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
|
||||
|
||||
int xStart = strideTrunc / bytesPerPixel;
|
||||
|
||||
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
||||
|
||||
int inStrideGap = stride - w * bytesPerPixel;
|
||||
|
||||
int alignment = gobWidth;
|
||||
|
||||
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
|
||||
|
@ -276,13 +263,14 @@ namespace Ryujinx.Graphics.Texture
|
|||
mipGobBlocksInZ,
|
||||
bytesPerPixel);
|
||||
|
||||
unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
||||
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
||||
{
|
||||
fixed (byte* outputBPtr = output, dataBPtr = data)
|
||||
fixed (byte* outputPtr = output, dataPtr = data)
|
||||
{
|
||||
byte* inPtr = dataPtr + inOffs;
|
||||
for (int layer = 0; layer < layers; layer++)
|
||||
{
|
||||
int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
|
||||
byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
|
||||
|
||||
for (int z = 0; z < d; z++)
|
||||
{
|
||||
|
@ -290,44 +278,58 @@ namespace Ryujinx.Graphics.Texture
|
|||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
layoutConverter.SetY(y);
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int offset = outBaseOffset + layoutConverter.GetOffset(x);
|
||||
|
||||
*(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x];
|
||||
for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
|
||||
{
|
||||
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
|
||||
byte* offset2 = offset + 0x20;
|
||||
byte* offset3 = offset + 0x100;
|
||||
byte* offset4 = offset + 0x120;
|
||||
|
||||
Vector128<byte> value = *(Vector128<byte>*)inPtr;
|
||||
Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
|
||||
Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
|
||||
Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
|
||||
|
||||
*(Vector128<byte>*)offset = value;
|
||||
*(Vector128<byte>*)offset2 = value2;
|
||||
*(Vector128<byte>*)offset3 = value3;
|
||||
*(Vector128<byte>*)offset4 = value4;
|
||||
}
|
||||
|
||||
inOffs += stride;
|
||||
for (int x = 0; x < strideTrunc; x += 16, inPtr += 16)
|
||||
{
|
||||
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
|
||||
|
||||
*(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
|
||||
}
|
||||
|
||||
for (int x = xStart; x < w; x++, inPtr += bytesPerPixel)
|
||||
{
|
||||
byte* offset = outBaseOffset + layoutConverter.GetOffset(x);
|
||||
|
||||
*(T*)offset = *(T*)inPtr;
|
||||
}
|
||||
|
||||
inPtr += inStrideGap;
|
||||
}
|
||||
}
|
||||
}
|
||||
inOffs += stride * h * d * layers;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (bytesPerPixel)
|
||||
bool _ = bytesPerPixel switch
|
||||
{
|
||||
case 1:
|
||||
Convert<byte>(output, data);
|
||||
break;
|
||||
case 2:
|
||||
Convert<ushort>(output, data);
|
||||
break;
|
||||
case 4:
|
||||
Convert<uint>(output, data);
|
||||
break;
|
||||
case 8:
|
||||
Convert<ulong>(output, data);
|
||||
break;
|
||||
case 12:
|
||||
Convert<Bpp12Pixel>(output, data);
|
||||
break;
|
||||
case 16:
|
||||
Convert<Vector128<byte>>(output, data);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.");
|
||||
}
|
||||
1 => Convert<byte>(output, data),
|
||||
2 => Convert<ushort>(output, data),
|
||||
4 => Convert<uint>(output, data),
|
||||
8 => Convert<ulong>(output, data),
|
||||
12 => Convert<Bpp12Pixel>(output, data),
|
||||
16 => Convert<Vector128<byte>>(output, data),
|
||||
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
|
||||
};
|
||||
}
|
||||
|
||||
return output;
|
||||
|
@ -342,56 +344,23 @@ namespace Ryujinx.Graphics.Texture
|
|||
int bytesPerPixel,
|
||||
ReadOnlySpan<byte> data)
|
||||
{
|
||||
int w = BitUtils.DivRoundUp(width, blockWidth);
|
||||
int w = BitUtils.DivRoundUp(width, blockWidth);
|
||||
int h = BitUtils.DivRoundUp(height, blockHeight);
|
||||
|
||||
int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
||||
int lineSize = width * bytesPerPixel;
|
||||
|
||||
Span<byte> output = new byte[h * stride];
|
||||
|
||||
int inOffs = 0;
|
||||
int outOffs = 0;
|
||||
|
||||
unsafe void Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
fixed (byte* outputBPtr = output, dataBPtr = data)
|
||||
{
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int offset = y * stride + x * bytesPerPixel;
|
||||
data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
|
||||
|
||||
*(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x];
|
||||
}
|
||||
|
||||
inOffs += inStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (bytesPerPixel)
|
||||
{
|
||||
case 1:
|
||||
Convert<byte>(output, data);
|
||||
break;
|
||||
case 2:
|
||||
Convert<ushort>(output, data);
|
||||
break;
|
||||
case 4:
|
||||
Convert<uint>(output, data);
|
||||
break;
|
||||
case 8:
|
||||
Convert<ulong>(output, data);
|
||||
break;
|
||||
case 12:
|
||||
Convert<Bpp12Pixel>(output, data);
|
||||
break;
|
||||
case 16:
|
||||
Convert<Vector128<byte>>(output, data);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.");
|
||||
inOffs += inStride;
|
||||
outOffs += stride;
|
||||
}
|
||||
|
||||
return output;
|
||||
|
|
|
@ -84,24 +84,11 @@ namespace Ryujinx.Graphics.Texture
|
|||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int GetOffsetWithLineOffset(int x)
|
||||
{
|
||||
if (_isLinear)
|
||||
{
|
||||
return x + _yPart;
|
||||
}
|
||||
else
|
||||
{
|
||||
return _layoutConverter.GetOffsetWithLineOffset(x);
|
||||
}
|
||||
}
|
||||
|
||||
public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
|
||||
{
|
||||
if (_isLinear)
|
||||
{
|
||||
return (y * _stride + x, height * _stride);
|
||||
return (y * _stride + x * _bytesPerPixel, height * _stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue