Untitled diff
276 lines
GenTree* Compiler::gtNewSimdMaxNode(
GenTree* Compiler::gtNewSimdMinNode(
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
{
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(varTypeIsSIMD(type));
assert(varTypeIsSIMD(type));
assert(getSIMDTypeForSize(simdSize) == type);
assert(getSIMDTypeForSize(simdSize) == type);
assert(op1 != nullptr);
assert(op1 != nullptr);
assert(op1->TypeIs(type));
assert(op1->TypeIs(type));
assert(op2 != nullptr);
assert(op2 != nullptr);
assert(op2->TypeIs(type));
assert(op2->TypeIs(type));
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(varTypeIsArithmetic(simdBaseType));
assert(varTypeIsArithmetic(simdBaseType));
#if defined(TARGET_XARCH)
#if defined(TARGET_XARCH)
if (varTypeIsFloating(simdBaseType))
if (varTypeIsFloating(simdBaseType))
{
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
{
NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax;
NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax;
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x05), minMaxIntrinsic, simdBaseJitType,
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x04), minMaxIntrinsic, simdBaseJitType,
simdSize);
simdSize);
}
}
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
{
// If AVX512 is supported, we can use vrangeps/vrangepd to correctly handle the Vector.Max(-0.0, 0.0) = 0.0
// If AVX512 is supported, we can use vrangeps/vrangepd to correctly handle the Vector.Min(-0.0, 0.0) = -0.0
// case. We still need to check for NaN as vrangeps/vrangepd does not handle NaN as specified in IEEE 754
// case. We still need to check for NaN as vrangeps/vrangepd does not handle NaN as specified in IEEE 754
// 2019.
// 2019.
//
//
// This can be represented as the following managed code:
// This can be represented as the following managed code:
// Vector128<float> range = Avx512DQ.VL.Range(op1, op2, 0x5);
// Vector128<float> range = Avx512DQ.VL.Range(op1, op2, 0x4);
// Vector128<float> fixup1 = Avx512F.VL.Fixup(op1, op2, Vector128<int>.One, 0);
// Vector128<float> fixup1 = Avx512F.VL.Fixup(op1, op2, Vector128<int>.One, 0);
// Vector128<float> fixup2 = Avx512F.VL.Fixup(range, fixup1, Vector128<int>.One, 0);
// Vector128<float> fixup2 = Avx512F.VL.Fixup(range, fixup1, Vector128<int>.One, 0);
// return fixup2;
// return fixup2;
//
//
// 0x5 is the control byte for vrangeps/vrangepd:
// 0x4 is the control byte for vrangeps/vrangepd:
// Imm8[1:0] = 01b : Select Max value
// Imm8[1:0] = 00b : Select Min value
// Imm8[3:2] = 01b : Select sign(Compare_Result)
// Imm8[3:2] = 01b : Select sign(Compare_Result)
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* rangeOp = gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x5), NI_AVX512_Range,
GenTree* rangeOp = gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x4), NI_AVX512_Range,
simdBaseJitType, simdSize);
simdBaseJitType, simdSize);
GenTreeVecCon* tblVecCon1 = gtNewVconNode(type);
GenTreeVecCon* tblVecCon1 = gtNewVconNode(type);
GenTreeVecCon* tblVecCon2 = gtNewVconNode(type);
GenTreeVecCon* tblVecCon2 = gtNewVconNode(type);
const int64_t tblValue = 0x1;
const int64_t tblValue = 0x1;
tblVecCon1->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
tblVecCon1->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
tblVecCon2->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
tblVecCon2->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
NamedIntrinsic fixupIntrinsic = NI_AVX512_Fixup;
NamedIntrinsic fixupIntrinsic = NI_AVX512_Fixup;
GenTree* fixup1 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, tblVecCon1, gtNewIconNode(0),
GenTree* fixup1 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, tblVecCon1, gtNewIconNode(0),
fixupIntrinsic, simdBaseJitType, simdSize);
fixupIntrinsic, simdBaseJitType, simdSize);
GenTree* fixup2 = gtNewSimdHWIntrinsicNode(type, rangeOp, fixup1, tblVecCon2, gtNewIconNode(0),
GenTree* fixup2 = gtNewSimdHWIntrinsicNode(type, rangeOp, fixup1, tblVecCon2, gtNewIconNode(0),
fixupIntrinsic, simdBaseJitType, simdSize);
fixupIntrinsic, simdBaseJitType, simdSize);
return fixup2;
return fixup2;
}
}
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
GenTree* op1Dup4 = gtCloneExpr(op1Dup3);
GenTree* op2Dup1 = fgMakeMultiUse(&op2);
GenTree* op2Dup1 = fgMakeMultiUse(&op2);
GenTree* op2Dup2 = gtCloneExpr(op2Dup1);
GenTree* op2Dup2 = gtCloneExpr(op2Dup1);
GenTree* op2Dup3 = gtCloneExpr(op2Dup2);
GenTree* equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize);
GenTree* equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize);
GenTree* isNegativeMask = gtNewSimdIsNegativeNode(type, op2Dup1, simdBaseJitType, simdSize);
GenTree* isNegativeMask = gtNewSimdIsNegativeNode(type, op1Dup1, simdBaseJitType, simdSize);
GenTree* isNaNMask = gtNewSimdIsNaNNode(type, op1Dup1, simdBaseJitType, simdSize);
GenTree* isNaNMask = gtNewSimdIsNaNNode(type, op1Dup2, simdBaseJitType, simdSize);
GenTree* lessThanMask = gtNewSimdCmpOpNode(GT_LT, type, op2Dup2, op1Dup2, simdBaseJitType, simdSize);
GenTree* lessThanMask = gtNewSimdCmpOpNode(GT_LT, type, op1Dup3, op2Dup1, simdBaseJitType, simdSize);
GenTree* mask = gtNewSimdBinOpNode(GT_AND, type, equalsMask, isNegativeMask, simdBaseJitType, simdSize);
GenTree* mask = gtNewSimdBinOpNode(GT_AND, type, equalsMask, isNegativeMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, isNaNMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, isNaNMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, lessThanMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, lessThanMask, simdBaseJitType, simdSize);
return gtNewSimdCndSelNode(type, mask, op1Dup3, op2Dup3, simdBaseJitType, simdSize);
return gtNewSimdCndSelNode(type, mask, op1Dup4, op2Dup2, simdBaseJitType, simdSize);
}
}
#endif // TARGET_XARCH
#endif // TARGET_XARCH
return gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize);
return gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize);
}
}
GenTree* Compiler::gtNewSimdMaxNativeNode(
GenTree* Compiler::gtNewSimdMinNativeNode(
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
{
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(varTypeIsSIMD(type));
assert(varTypeIsSIMD(type));
assert(getSIMDTypeForSize(simdSize) == type);
assert(getSIMDTypeForSize(simdSize) == type);
assert(op1 != nullptr);
assert(op1 != nullptr);
assert(op1->TypeIs(type));
assert(op1->TypeIs(type));
assert(op2 != nullptr);
assert(op2 != nullptr);
assert(op2->TypeIs(type));
assert(op2->TypeIs(type));
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(varTypeIsArithmetic(simdBaseType));
assert(varTypeIsArithmetic(simdBaseType));
NamedIntrinsic intrinsic = NI_Illegal;
NamedIntrinsic intrinsic = NI_Illegal;
#if defined(TARGET_XARCH)
#if defined(TARGET_XARCH)
if (simdSize == 32)
if (simdSize == 32)
{
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
if (varTypeIsFloating(simdBaseType))
if (varTypeIsFloating(simdBaseType))
{
{
intrinsic = NI_AVX_Max;
intrinsic = NI_AVX_Min;
}
}
else
else
{
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
if (!varTypeIsLong(simdBaseType))
if (!varTypeIsLong(simdBaseType))
{
{
intrinsic = NI_AVX2_Max;
intrinsic = NI_AVX2_Min;
}
}
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
{
intrinsic = NI_AVX512_Max;
intrinsic = NI_AVX512_Min;
}
}
}
}
}
}
else if (simdSize == 64)
else if (simdSize == 64)
{
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512));
intrinsic = NI_AVX512_Max;
intrinsic = NI_AVX512_Min;
}
}
else
else
{
{
switch (simdBaseType)
switch (simdBaseType)
{
{
case TYP_BYTE:
case TYP_BYTE:
case TYP_USHORT:
case TYP_USHORT:
{
{
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
{
intrinsic = NI_SSE41_Max;
intrinsic = NI_SSE41_Min;
break;
break;
}
}
uint64_t constVal = 0;
GenTree* constVal = nullptr;
CorInfoType opJitType = simdBaseJitType;
CorInfoType opJitType = simdBaseJitType;
var_types opType = simdBaseType;
var_types opType = simdBaseType;
genTreeOps fixupOp1 = GT_NONE;
genTreeOps fixupOp1 = GT_NONE;
genTreeOps fixupOp2 = GT_NONE;
genTreeOps fixupOp2 = GT_NONE;
switch (simdBaseType)
switch (simdBaseType)
{
{
case TYP_BYTE:
case TYP_BYTE:
{
{
constVal = 0x8080808080808080;
constVal = gtNewIconNode(0x80808080);
fixupOp1 = GT_SUB;
fixupOp1 = GT_SUB;
fixupOp2 = GT_ADD;
fixupOp2 = GT_ADD;
simdBaseJitType = CORINFO_TYPE_UBYTE;
simdBaseJitType = CORINFO_TYPE_UBYTE;
simdBaseType = TYP_UBYTE;
simdBaseType = TYP_UBYTE;
break;
break;
}
}
case TYP_USHORT:
case TYP_USHORT:
{
{
constVal = 0x8000800080008000;
constVal = gtNewIconNode(0x80008000);
fixupOp1 = GT_ADD;
fixupOp1 = GT_ADD;
fixupOp2 = GT_SUB;
fixupOp2 = GT_SUB;
simdBaseJitType = CORINFO_TYPE_SHORT;
simdBaseJitType = CORINFO_TYPE_SHORT;
simdBaseType = TYP_SHORT;
simdBaseType = TYP_SHORT;
break;
break;
}
}
default:
default:
{
{
unreached();
unreached();
}
}
}
}
assert(constVal != nullptr);
assert(fixupOp1 != GT_NONE);
assert(fixupOp1 != GT_NONE);
assert(fixupOp2 != GT_NONE);
assert(fixupOp2 != GT_NONE);
assert(opJitType != simdBaseJitType);
assert(opJitType != simdBaseJitType);
assert(opType != simdBaseType);
assert(opType != simdBaseType);
GenTreeVecCon* vecCon1 = gtNewVconNode(type);
GenTree* constVector = gtNewSimdCreateBroadcastNode(type, constVal, CORINFO_TYPE_INT, simdSize);
for (unsigned i = 0; i < (simdSize / 8); i++)
{
vecCon1->gtSimdVal.u64[i] = constVal;
}
GenTree* vecCon2 = gtCloneExpr(vecCon1);
GenTree* constVectorDup1 = fgMakeMultiUse(&constVector);
GenTree* vecCon3 = gtCloneExpr(vecCon2);
GenTree* constVectorDup2 = gtCloneExpr(constVectorDup1);
// op1 = op1 - constVector
// op1 = op1 - constVector
// -or-
// -or-
// op1 = op1 + constVector
// op1 = op1 + constVector
op1 = gtNewSimdBinOpNode(fixupOp1, type, op1, vecCon1, opJitType, simdSize);
op1 = gtNewSimdBinOpNode(fixupOp1, type, op1, constVector, opJitType, simdSize);
// op2 = op2 - constVector
// op2 = op2 - constVectorDup1
// -or-
// -or-
// op2 = op2 + constVector
// op2 = op2 + constVectorDup1
op2 = gtNewSimdBinOpNode(fixupOp1, type, op2, vecCon2, opJitType, simdSize);
op2 = gtNewSimdBinOpNode(fixupOp1, type, op2, constVectorDup1, opJitType, simdSize);
// op1 = Max(op1, op2)
// op1 = Min(op1, op2)
op1 = gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize);
op1 = gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize);
// result = op1 + constVector
// result = op1 + constVectorDup2
// -or-
// -or-
// result = op1 - constVector
// result = op1 - constVectorDup2
return gtNewSimdBinOpNode(fixupOp2, type, op1, vecCon3, opJitType, simdSize);
return gtNewSimdBinOpNode(fixupOp2, type, op1, constVectorDup2, opJitType, simdSize);
}
}
case TYP_INT:
case TYP_INT:
case TYP_UINT:
case TYP_UINT:
{
{
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
{
intrinsic = NI_SSE41_Max;
intrinsic = NI_SSE41_Min;
}
}
break;
break;
}
}
case TYP_LONG:
case TYP_LONG:
case TYP_ULONG:
case TYP_ULONG:
{
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
{
intrinsic = NI_AVX512_Max;
intrinsic = NI_AVX512_Min;
}
}
break;
break;
}
}
case TYP_FLOAT:
case TYP_FLOAT:
case TYP_UBYTE:
case TYP_UBYTE:
case TYP_SHORT:
case TYP_SHORT:
case TYP_DOUBLE:
case TYP_DOUBLE:
{
{
intrinsic = NI_X86Base_Max;
intrinsic = NI_X86Base_Min;
break;
break;
}
}
default:
default:
{
{
unreached();
unreached();
}
}
}
}
}
}
#elif defined(TARGET_ARM64)
#elif defined(TARGET_ARM64)
if (!varTypeIsLong(simdBaseType))
if (!varTypeIsLong(simdBaseType))
{
{
if (simdBaseType == TYP_DOUBLE)
if (simdBaseType == TYP_DOUBLE)
{
{
intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_MaxScalar : NI_AdvSimd_Arm64_Max;
intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_MinScalar : NI_AdvSimd_Arm64_Min;
}
}
else
else
{
{
intrinsic = NI_AdvSimd_Max;
intrinsic = NI_AdvSimd_Min;
}
}
}
}
#else
#else
#error Unsupported platform
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64
#endif // !TARGET_XARCH && !TARGET_ARM64
if (intrinsic != NI_Illegal)
if (intrinsic != NI_Illegal)
{
{
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
}
}
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op2Dup = fgMakeMultiUse(&op2);
// op1 = op1 > op2
// op1 = op1 < op2
op1 = gtNewSimdCmpOpNode(GT_GT, type, op1, op2, simdBaseJitType, simdSize);
op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize);
// result = ConditionalSelect(op1, op1Dup, op2Dup)
// result = ConditionalSelect(op1, op1Dup, op2Dup)
return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize);
return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize);
}
}