mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2025-01-01 06:20:00 +01:00
5e724cf24e
* Delete DelegateTypes.cs * Delete DelegateCache.cs * Add files via upload * Update Horizon.cs * Update Program.cs * Update MainWindow.cs * Update Aot.cs * Update RelocEntry.cs * Update Translator.cs * Update MemoryManager.cs * Update InstEmitMemoryHelper.cs * Update Delegates.cs * Nit. * Nit. * Nit. * 10 fewer MSIL bytes for us * Add comment. Nits. * Update Translator.cs * Update Aot.cs * Nits. * Opt.. * Opt.. * Opt.. * Opt.. * Allow to change compression level. * Update MemoryManager.cs * Update Translator.cs * Manage corner cases during the save phase. Nits. * Update Aot.cs * Translator response tweak for Aot disabled. Nit. * Nit. * Nits. * Create DelegateHelpers.cs * Update Delegates.cs * Nit. * Nit. * Nits. * Fix due to #784. * Fixes due to #757 & #841. * Fix due to #846. * Fix due to #847. * Use MethodInfo for managed method calls. Use IR methods instead of managed methods about Max/Min (S/U). Follow-ups & Nits. * Add missing exception messages. Reintroduce slow path for Fmov_Vi. Implement slow path for Fmov_Si. * Switch to the new folder structure. Nits. * Impl. index-based relocation information. Impl. cache file version field. * Nit. * Address gdkchan comments. Mainly: - fixed cache file corruption issue on exit; - exposed a way to disable AOT on the GUI. * Address AcK77 comment. * Address Thealexbarney, jduncanator & emmauss comments. Header magic, CpuId (FI) & Aot -> Ptc. * Adaptation to the new application reloading system. Improvements to the call system of managed methods. Follow-ups. Nits. * Get the same boot times as on master when PTC is disabled. * Profiled Aot. * A32 support (#897). * #975 support (1 of 2). * #975 support (2 of 2). * Rebase fix & nits. * Some fixes and nits (still one bug left). * One fix & nits. * Tests fix (by gdk) & nits. * Support translations not only in high quality and rejit. Nits. * Added possibility to skip translations and continue execution, using `ESC` key. * Update SettingsWindow.cs * Update GLRenderer.cs * Update Ptc.cs * Disabled Profiled PTC by default as requested in the past by gdk. * Fix rejit bug. Increased number of parallel translations. Add stack unwinding stuffs support (1 of 2). Nits. * Add stack unwinding stuffs support (2 of 2). Tuned number of parallel translations. * Restored the ability to assemble jumps with 8-bit offset when Profiled PTC is disabled or during profiling. Modifications due to rebase. Nits. * Limited profiling of the functions to be translated to the addresses belonging to the range of static objects only. * Nits. * Nits. * Update Delegates.cs * Nit. * Update InstEmitSimdArithmetic.cs * Address riperiperi comments. * Fixed the issue of unjustifiably longer boot times at the second boot than at the first boot, measured at the same time or reference point and with the same number of translated functions. * Implemented a simple redundant load/save mechanism. Halved the value of Decoder.MaxInstsPerFunction more appropriate for the current performance of the Translator. Replaced by Logger.PrintError to Logger.PrintDebug in TexturePool.cs about the supposed invalid texture format to avoid the spawn of the log. Nits. * Nit. Improved Logger.PrintError in TexturePool.cs to avoid log spawn. Added missing code for FZ handling (in output) for fp max/min instructions (slow paths). * Add configuration migration for PTC Co-authored-by: Thog <me@thog.eu>
1496 lines
54 KiB
C#
1496 lines
54 KiB
C#
using ARMeilleure.Decoders;
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
using ARMeilleure.State;
|
|
using ARMeilleure.Translation;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.Reflection;
|
|
|
|
using static ARMeilleure.Instructions.InstEmitHelper;
|
|
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
|
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
|
|
|
|
namespace ARMeilleure.Instructions
|
|
{
|
|
using Func1I = Func<Operand, Operand>;
|
|
|
|
static partial class InstEmit
|
|
{
|
|
public static void Fcvt_S(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
if (op.Size == 0 && op.Opc == 1) // Single -> Double.
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP64, ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP32, ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
|
|
|
|
res = context.ZeroExtend16(OperandType.I64, res);
|
|
|
|
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
|
|
}
|
|
else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
|
|
|
|
Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
|
|
{
|
|
throw new NotImplementedException("Double-precision to half-precision.");
|
|
}
|
|
else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
|
|
{
|
|
throw new NotImplementedException("Half-precision to double-precision.");
|
|
}
|
|
else // Invalid encoding.
|
|
{
|
|
Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
|
|
}
|
|
}
|
|
|
|
public static void Fcvtas_Gp(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
|
}
|
|
|
|
public static void Fcvtas_S(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
|
|
}
|
|
|
|
public static void Fcvtas_V(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
|
|
}
|
|
|
|
public static void Fcvtau_Gp(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
|
}
|
|
|
|
public static void Fcvtau_S(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
|
|
}
|
|
|
|
public static void Fcvtau_V(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
|
|
}
|
|
|
|
public static void Fcvtl_V(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (Optimizations.UseSse2 && sizeF == 1)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
Operand res;
|
|
|
|
if (op.RegisterSize == RegisterSize.Simd128)
|
|
{
|
|
res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n);
|
|
}
|
|
else
|
|
{
|
|
res = n;
|
|
}
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = 4 >> sizeF;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
if (sizeF == 0)
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
|
|
|
|
Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
|
|
|
|
Operand e = context.ConvertToFP(OperandType.FP64, ne);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtms_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtmu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtn_V(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (Optimizations.UseSse2 && sizeF == 1)
|
|
{
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Operand res = context.VectorZeroUpper64(d);
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
|
|
|
|
nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
|
|
|
|
Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
|
|
? Intrinsic.X86Movlhps
|
|
: Intrinsic.X86Movhlps;
|
|
|
|
res = context.AddIntrinsic(movInst, res, nInt);
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
else
|
|
{
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int elems = 4 >> sizeF;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
|
|
|
|
e = context.ZeroExtend16(OperandType.I64, e);
|
|
|
|
res = EmitVectorInsert(context, res, e, part + index, 1);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand e = context.ConvertToFP(OperandType.FP32, ne);
|
|
|
|
res = context.VectorInsert(res, e, part + index);
|
|
}
|
|
}
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtnu_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtnu_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtps_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtpu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => op1);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtzs_Gp_Fixed(context);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => op1);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtzu_Gp_Fixed(context);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_Gp(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
res = context.SignExtend32(OperandType.I64, res);
|
|
}
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
res = context.SignExtend32(OperandType.I64, res);
|
|
}
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
res = EmitI2fFBitsMul(context, res, op.FBits);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Scvtf_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorCvtf(context, signed: true);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorCvtf(context, signed: true);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_Gp(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: false);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: false);
|
|
|
|
res = EmitI2fFBitsMul(context, res, op.FBits);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Ucvtf_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
|
|
|
|
Operand res = EmitFPConvert(context, ne, sizeF, signed: false);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorCvtf(context, signed: false);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitVectorCvtf(context, signed: false);
|
|
}
|
|
}
|
|
|
|
private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, n, index);
|
|
|
|
Operand e = emit(ne);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
|
|
|
|
e = context.Call(info, e);
|
|
|
|
e = context.ZeroExtend32(OperandType.I64, e);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
|
|
e = context.Call(info, e);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, e, index, sizeI);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int fBits = GetFBits(context);
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, n, index);
|
|
|
|
Operand e = EmitF2iFBitsMul(context, ne, fBits);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
|
|
|
|
e = context.Call(info, e);
|
|
|
|
e = context.ZeroExtend32(OperandType.I64, e);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
|
|
e = context.Call(info, e);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, e, index, sizeI);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitFcvt___Gp(context, emit, signed: true);
|
|
}
|
|
|
|
private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitFcvt___Gp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
Operand res = signed
|
|
? EmitScalarFcvts(context, emit(ne), 0)
|
|
: EmitScalarFcvtu(context, emit(ne), 0);
|
|
|
|
SetIntOrZR(context, op.Rd, res);
|
|
}
|
|
|
|
private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
EmitFcvtz__Gp_Fixed(context, signed: true);
|
|
}
|
|
|
|
private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
EmitFcvtz__Gp_Fixed(context, signed: false);
|
|
}
|
|
|
|
private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
Operand res = signed
|
|
? EmitScalarFcvts(context, ne, op.FBits)
|
|
: EmitScalarFcvtu(context, ne, op.FBits);
|
|
|
|
SetIntOrZR(context, op.Rd, res);
|
|
}
|
|
|
|
private static void EmitVectorCvtf(ArmEmitterContext context, bool signed)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
int fBits = GetFBits(context);
|
|
|
|
int elems = op.GetBytesCount() >> sizeI;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
|
|
|
|
Operand e = EmitFPConvert(context, ne, sizeF, signed);
|
|
|
|
e = EmitI2fFBitsMul(context, e, fBits);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static int GetFBits(ArmEmitterContext context)
|
|
{
|
|
if (context.CurrOp is OpCodeSimdShImm op)
|
|
{
|
|
return GetImmShr(op);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
|
|
Debug.Assert((uint)size < 2);
|
|
|
|
OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
if (signed)
|
|
{
|
|
return context.ConvertToFP(type, value);
|
|
}
|
|
else
|
|
{
|
|
return context.ConvertToFPUI(type, value);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
value = EmitF2iFBitsMul(context, value, fBits);
|
|
|
|
MethodInfo info;
|
|
|
|
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
|
|
}
|
|
else
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
|
|
}
|
|
|
|
return context.Call(info, value);
|
|
}
|
|
|
|
private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
value = EmitF2iFBitsMul(context, value, fBits);
|
|
|
|
MethodInfo info;
|
|
|
|
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
|
|
}
|
|
else
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
}
|
|
|
|
return context.Call(info, value);
|
|
}
|
|
|
|
private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
if (fBits == 0)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
if (value.Type == OperandType.FP32)
|
|
{
|
|
return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
|
|
}
|
|
else /* if (value.Type == OperandType.FP64) */
|
|
{
|
|
return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
|
|
}
|
|
}
|
|
|
|
private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
if (fBits == 0)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
if (value.Type == OperandType.FP32)
|
|
{
|
|
return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
|
|
}
|
|
else /* if (value.Type == OperandType.FP64) */
|
|
{
|
|
return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
|
|
}
|
|
}
|
|
|
|
public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
|
|
{
|
|
Debug.Assert(opF.Type == OperandType.V128);
|
|
|
|
Operand longL = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opF); // opFL
|
|
Operand res = context.VectorCreateScalar(longL);
|
|
|
|
if (!scalar)
|
|
{
|
|
Operand opFH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
|
|
Operand longH = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opFH);
|
|
Operand resH = context.VectorCreateScalar(longH);
|
|
res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
|
|
{
|
|
Debug.Assert(op.Type == OperandType.V128);
|
|
|
|
Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
|
|
Operand res = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
|
|
|
|
if (!scalar)
|
|
{
|
|
Operand opH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, op); // res doesn't matter.
|
|
Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
|
|
Operand resH = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
|
|
res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 - fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand mask = scalar // 65536.000f (1 << 16)
|
|
? X86GetScalar (context, 0x47800000)
|
|
: X86GetAllElements(context, 0x47800000);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 - fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand mask = scalar // 4294967296.0000000d (1L << 32)
|
|
? X86GetScalar (context, 0x41F0000000000000L)
|
|
: X86GetAllElements(context, 0x41F0000000000000L);
|
|
|
|
Operand res = context.AddIntrinsic (Intrinsic.X86Psrlq, n, Const(32));
|
|
res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
|
|
res = context.AddIntrinsic (Intrinsic.X86Mulpd, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic (Intrinsic.X86Psllq, n, Const(32));
|
|
res2 = context.AddIntrinsic (Intrinsic.X86Psrlq, res2, Const(32));
|
|
res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 + fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
|
|
? X86GetScalar (context, 0x4F000000)
|
|
: X86GetAllElements(context, 0x4F000000);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper96(dRes);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
|
|
? X86GetScalar (context, 0x43E0000000000000L)
|
|
: X86GetAllElements(context, 0x43E0000000000000L);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 + fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
|
|
? X86GetScalar (context, 0x4F000000)
|
|
: X86GetAllElements(context, 0x4F000000);
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
|
|
dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper96(dRes);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
|
|
? X86GetScalar (context, 0x43E0000000000000L)
|
|
: X86GetAllElements(context, 0x43E0000000000000L);
|
|
|
|
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
|
|
dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
if (op.Size == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
|
|
int fpScaled = 0x3F800000 + op.FBits * 0x800000;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
int fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x4F000000 // 2.14748365E9f (2147483648)
|
|
: 0x5F000000; // 9.223372E18f (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int64)
|
|
{
|
|
nInt = context.SignExtend32(OperandType.I64, nInt);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
else /* if (op.Size == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
|
|
long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
long fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
|
|
: 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
if (op.Size == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
|
|
int fpScaled = 0x3F800000 + op.FBits * 0x800000;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
int fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x4F000000 // 2.14748365E9f (2147483648)
|
|
: 0x5F000000; // 9.223372E18f (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int64)
|
|
{
|
|
nInt = context.SignExtend32(OperandType.I64, nInt);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
else /* if (op.Size == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
|
|
long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
long fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
|
|
: 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
|
|
{
|
|
OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
|
|
|
|
return context.VectorExtract(type, GetVec(reg), index);
|
|
}
|
|
}
|
|
}
|