Untitled diff

Created Diff never expires
53 removals
Lines
Total
Removed
Words
Total
Removed
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
276 lines
48 additions
Lines
Total
Added
Words
Total
Added
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
272 lines
GenTree* Compiler::gtNewSimdMaxNode(
GenTree* Compiler::gtNewSimdMinNode(
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
{
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(IsBaselineSimdIsaSupportedDebugOnly());


assert(varTypeIsSIMD(type));
assert(varTypeIsSIMD(type));
assert(getSIMDTypeForSize(simdSize) == type);
assert(getSIMDTypeForSize(simdSize) == type);


assert(op1 != nullptr);
assert(op1 != nullptr);
assert(op1->TypeIs(type));
assert(op1->TypeIs(type));


assert(op2 != nullptr);
assert(op2 != nullptr);
assert(op2->TypeIs(type));
assert(op2->TypeIs(type));


var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(varTypeIsArithmetic(simdBaseType));
assert(varTypeIsArithmetic(simdBaseType));


#if defined(TARGET_XARCH)
#if defined(TARGET_XARCH)
if (varTypeIsFloating(simdBaseType))
if (varTypeIsFloating(simdBaseType))
{
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
{
NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax;
NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax;
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x05), minMaxIntrinsic, simdBaseJitType,
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x04), minMaxIntrinsic, simdBaseJitType,
simdSize);
simdSize);
}
}
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
{
// If AVX512 is supported, we can use vrangeps/vrangepd to correctly handle the Vector.Max(-0.0, 0.0) = 0.0
// If AVX512 is supported, we can use vrangeps/vrangepd to correctly handle the Vector.Min(-0.0, 0.0) = -0.0
// case. We still need to check for NaN as vrangeps/vrangepd does not handle NaN as specified in IEEE 754
// case. We still need to check for NaN as vrangeps/vrangepd does not handle NaN as specified in IEEE 754
// 2019.
// 2019.
//
//
// This can be represented as the following managed code:
// This can be represented as the following managed code:
// Vector128<float> range = Avx512DQ.VL.Range(op1, op2, 0x5);
// Vector128<float> range = Avx512DQ.VL.Range(op1, op2, 0x4);
// Vector128<float> fixup1 = Avx512F.VL.Fixup(op1, op2, Vector128<int>.One, 0);
// Vector128<float> fixup1 = Avx512F.VL.Fixup(op1, op2, Vector128<int>.One, 0);
// Vector128<float> fixup2 = Avx512F.VL.Fixup(range, fixup1, Vector128<int>.One, 0);
// Vector128<float> fixup2 = Avx512F.VL.Fixup(range, fixup1, Vector128<int>.One, 0);
// return fixup2;
// return fixup2;
//
//
// 0x5 is the control byte for vrangeps/vrangepd:
// 0x4 is the control byte for vrangeps/vrangepd:
// Imm8[1:0] = 01b : Select Max value
// Imm8[1:0] = 00b : Select Min value
// Imm8[3:2] = 01b : Select sign(Compare_Result)
// Imm8[3:2] = 01b : Select sign(Compare_Result)


GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* rangeOp = gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x5), NI_AVX512_Range,
GenTree* rangeOp = gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x4), NI_AVX512_Range,
simdBaseJitType, simdSize);
simdBaseJitType, simdSize);
GenTreeVecCon* tblVecCon1 = gtNewVconNode(type);
GenTreeVecCon* tblVecCon1 = gtNewVconNode(type);
GenTreeVecCon* tblVecCon2 = gtNewVconNode(type);
GenTreeVecCon* tblVecCon2 = gtNewVconNode(type);
const int64_t tblValue = 0x1;
const int64_t tblValue = 0x1;
tblVecCon1->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
tblVecCon1->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
tblVecCon2->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
tblVecCon2->EvaluateBroadcastInPlace((simdBaseType == TYP_FLOAT) ? TYP_INT : TYP_LONG, tblValue);
NamedIntrinsic fixupIntrinsic = NI_AVX512_Fixup;
NamedIntrinsic fixupIntrinsic = NI_AVX512_Fixup;
GenTree* fixup1 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, tblVecCon1, gtNewIconNode(0),
GenTree* fixup1 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, tblVecCon1, gtNewIconNode(0),
fixupIntrinsic, simdBaseJitType, simdSize);
fixupIntrinsic, simdBaseJitType, simdSize);
GenTree* fixup2 = gtNewSimdHWIntrinsicNode(type, rangeOp, fixup1, tblVecCon2, gtNewIconNode(0),
GenTree* fixup2 = gtNewSimdHWIntrinsicNode(type, rangeOp, fixup1, tblVecCon2, gtNewIconNode(0),
fixupIntrinsic, simdBaseJitType, simdSize);
fixupIntrinsic, simdBaseJitType, simdSize);
return fixup2;
return fixup2;
}
}


GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
GenTree* op1Dup4 = gtCloneExpr(op1Dup3);


GenTree* op2Dup1 = fgMakeMultiUse(&op2);
GenTree* op2Dup1 = fgMakeMultiUse(&op2);
GenTree* op2Dup2 = gtCloneExpr(op2Dup1);
GenTree* op2Dup2 = gtCloneExpr(op2Dup1);
GenTree* op2Dup3 = gtCloneExpr(op2Dup2);


GenTree* equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize);
GenTree* equalsMask = gtNewSimdCmpOpNode(GT_EQ, type, op1, op2, simdBaseJitType, simdSize);
GenTree* isNegativeMask = gtNewSimdIsNegativeNode(type, op2Dup1, simdBaseJitType, simdSize);
GenTree* isNegativeMask = gtNewSimdIsNegativeNode(type, op1Dup1, simdBaseJitType, simdSize);
GenTree* isNaNMask = gtNewSimdIsNaNNode(type, op1Dup1, simdBaseJitType, simdSize);
GenTree* isNaNMask = gtNewSimdIsNaNNode(type, op1Dup2, simdBaseJitType, simdSize);
GenTree* lessThanMask = gtNewSimdCmpOpNode(GT_LT, type, op2Dup2, op1Dup2, simdBaseJitType, simdSize);
GenTree* lessThanMask = gtNewSimdCmpOpNode(GT_LT, type, op1Dup3, op2Dup1, simdBaseJitType, simdSize);


GenTree* mask = gtNewSimdBinOpNode(GT_AND, type, equalsMask, isNegativeMask, simdBaseJitType, simdSize);
GenTree* mask = gtNewSimdBinOpNode(GT_AND, type, equalsMask, isNegativeMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, isNaNMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, isNaNMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, lessThanMask, simdBaseJitType, simdSize);
mask = gtNewSimdBinOpNode(GT_OR, type, mask, lessThanMask, simdBaseJitType, simdSize);


return gtNewSimdCndSelNode(type, mask, op1Dup3, op2Dup3, simdBaseJitType, simdSize);
return gtNewSimdCndSelNode(type, mask, op1Dup4, op2Dup2, simdBaseJitType, simdSize);
}
}
#endif // TARGET_XARCH
#endif // TARGET_XARCH


return gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize);
return gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize);
}
}


GenTree* Compiler::gtNewSimdMaxNativeNode(
GenTree* Compiler::gtNewSimdMinNativeNode(
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
{
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(IsBaselineSimdIsaSupportedDebugOnly());


assert(varTypeIsSIMD(type));
assert(varTypeIsSIMD(type));
assert(getSIMDTypeForSize(simdSize) == type);
assert(getSIMDTypeForSize(simdSize) == type);


assert(op1 != nullptr);
assert(op1 != nullptr);
assert(op1->TypeIs(type));
assert(op1->TypeIs(type));


assert(op2 != nullptr);
assert(op2 != nullptr);
assert(op2->TypeIs(type));
assert(op2->TypeIs(type));


var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(varTypeIsArithmetic(simdBaseType));
assert(varTypeIsArithmetic(simdBaseType));


NamedIntrinsic intrinsic = NI_Illegal;
NamedIntrinsic intrinsic = NI_Illegal;


#if defined(TARGET_XARCH)
#if defined(TARGET_XARCH)
if (simdSize == 32)
if (simdSize == 32)
{
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));


if (varTypeIsFloating(simdBaseType))
if (varTypeIsFloating(simdBaseType))
{
{
intrinsic = NI_AVX_Max;
intrinsic = NI_AVX_Min;
}
}
else
else
{
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));


if (!varTypeIsLong(simdBaseType))
if (!varTypeIsLong(simdBaseType))
{
{
intrinsic = NI_AVX2_Max;
intrinsic = NI_AVX2_Min;
}
}
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
else if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
{
intrinsic = NI_AVX512_Max;
intrinsic = NI_AVX512_Min;
}
}
}
}
}
}
else if (simdSize == 64)
else if (simdSize == 64)
{
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512));
intrinsic = NI_AVX512_Max;
intrinsic = NI_AVX512_Min;
}
}
else
else
{
{
switch (simdBaseType)
switch (simdBaseType)
{
{
case TYP_BYTE:
case TYP_BYTE:
case TYP_USHORT:
case TYP_USHORT:
{
{
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
{
intrinsic = NI_SSE41_Max;
intrinsic = NI_SSE41_Min;
break;
break;
}
}


uint64_t constVal = 0;
GenTree* constVal = nullptr;
CorInfoType opJitType = simdBaseJitType;
CorInfoType opJitType = simdBaseJitType;
var_types opType = simdBaseType;
var_types opType = simdBaseType;
genTreeOps fixupOp1 = GT_NONE;
genTreeOps fixupOp1 = GT_NONE;
genTreeOps fixupOp2 = GT_NONE;
genTreeOps fixupOp2 = GT_NONE;


switch (simdBaseType)
switch (simdBaseType)
{
{
case TYP_BYTE:
case TYP_BYTE:
{
{
constVal = 0x8080808080808080;
constVal = gtNewIconNode(0x80808080);
fixupOp1 = GT_SUB;
fixupOp1 = GT_SUB;
fixupOp2 = GT_ADD;
fixupOp2 = GT_ADD;
simdBaseJitType = CORINFO_TYPE_UBYTE;
simdBaseJitType = CORINFO_TYPE_UBYTE;
simdBaseType = TYP_UBYTE;
simdBaseType = TYP_UBYTE;
break;
break;
}
}


case TYP_USHORT:
case TYP_USHORT:
{
{
constVal = 0x8000800080008000;
constVal = gtNewIconNode(0x80008000);
fixupOp1 = GT_ADD;
fixupOp1 = GT_ADD;
fixupOp2 = GT_SUB;
fixupOp2 = GT_SUB;
simdBaseJitType = CORINFO_TYPE_SHORT;
simdBaseJitType = CORINFO_TYPE_SHORT;
simdBaseType = TYP_SHORT;
simdBaseType = TYP_SHORT;
break;
break;
}
}


default:
default:
{
{
unreached();
unreached();
}
}
}
}


assert(constVal != nullptr);
assert(fixupOp1 != GT_NONE);
assert(fixupOp1 != GT_NONE);
assert(fixupOp2 != GT_NONE);
assert(fixupOp2 != GT_NONE);
assert(opJitType != simdBaseJitType);
assert(opJitType != simdBaseJitType);
assert(opType != simdBaseType);
assert(opType != simdBaseType);


GenTreeVecCon* vecCon1 = gtNewVconNode(type);
GenTree* constVector = gtNewSimdCreateBroadcastNode(type, constVal, CORINFO_TYPE_INT, simdSize);

for (unsigned i = 0; i < (simdSize / 8); i++)
{
vecCon1->gtSimdVal.u64[i] = constVal;
}


GenTree* vecCon2 = gtCloneExpr(vecCon1);
GenTree* constVectorDup1 = fgMakeMultiUse(&constVector);
GenTree* vecCon3 = gtCloneExpr(vecCon2);
GenTree* constVectorDup2 = gtCloneExpr(constVectorDup1);


// op1 = op1 - constVector
// op1 = op1 - constVector
// -or-
// -or-
// op1 = op1 + constVector
// op1 = op1 + constVector
op1 = gtNewSimdBinOpNode(fixupOp1, type, op1, vecCon1, opJitType, simdSize);
op1 = gtNewSimdBinOpNode(fixupOp1, type, op1, constVector, opJitType, simdSize);


// op2 = op2 - constVector
// op2 = op2 - constVectorDup1
// -or-
// -or-
// op2 = op2 + constVector
// op2 = op2 + constVectorDup1
op2 = gtNewSimdBinOpNode(fixupOp1, type, op2, vecCon2, opJitType, simdSize);
op2 = gtNewSimdBinOpNode(fixupOp1, type, op2, constVectorDup1, opJitType, simdSize);


// op1 = Max(op1, op2)
// op1 = Min(op1, op2)
op1 = gtNewSimdMaxNativeNode(type, op1, op2, simdBaseJitType, simdSize);
op1 = gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize);


// result = op1 + constVector
// result = op1 + constVectorDup2
// -or-
// -or-
// result = op1 - constVector
// result = op1 - constVectorDup2
return gtNewSimdBinOpNode(fixupOp2, type, op1, vecCon3, opJitType, simdSize);
return gtNewSimdBinOpNode(fixupOp2, type, op1, constVectorDup2, opJitType, simdSize);
}
}


case TYP_INT:
case TYP_INT:
case TYP_UINT:
case TYP_UINT:
{
{
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
{
intrinsic = NI_SSE41_Max;
intrinsic = NI_SSE41_Min;
}
}
break;
break;
}
}


case TYP_LONG:
case TYP_LONG:
case TYP_ULONG:
case TYP_ULONG:
{
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
{
intrinsic = NI_AVX512_Max;
intrinsic = NI_AVX512_Min;
}
}
break;
break;
}
}


case TYP_FLOAT:
case TYP_FLOAT:
case TYP_UBYTE:
case TYP_UBYTE:
case TYP_SHORT:
case TYP_SHORT:
case TYP_DOUBLE:
case TYP_DOUBLE:
{
{
intrinsic = NI_X86Base_Max;
intrinsic = NI_X86Base_Min;
break;
break;
}
}


default:
default:
{
{
unreached();
unreached();
}
}
}
}
}
}
#elif defined(TARGET_ARM64)
#elif defined(TARGET_ARM64)
if (!varTypeIsLong(simdBaseType))
if (!varTypeIsLong(simdBaseType))
{
{
if (simdBaseType == TYP_DOUBLE)
if (simdBaseType == TYP_DOUBLE)
{
{
intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_MaxScalar : NI_AdvSimd_Arm64_Max;
intrinsic = (simdSize == 8) ? NI_AdvSimd_Arm64_MinScalar : NI_AdvSimd_Arm64_Min;
}
}
else
else
{
{
intrinsic = NI_AdvSimd_Max;
intrinsic = NI_AdvSimd_Min;
}
}
}
}
#else
#else
#error Unsupported platform
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64
#endif // !TARGET_XARCH && !TARGET_ARM64


if (intrinsic != NI_Illegal)
if (intrinsic != NI_Illegal)
{
{
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
}
}


GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op2Dup = fgMakeMultiUse(&op2);


// op1 = op1 > op2
// op1 = op1 < op2
op1 = gtNewSimdCmpOpNode(GT_GT, type, op1, op2, simdBaseJitType, simdSize);
op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, op2, simdBaseJitType, simdSize);


// result = ConditionalSelect(op1, op1Dup, op2Dup)
// result = ConditionalSelect(op1, op1Dup, op2Dup)
return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize);
return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseJitType, simdSize);
}
}