Untitled diff

Created Diff never expires
21 removals
Lines
Total
Removed
Words
Total
Removed
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
110 lines
52 additions
Lines
Total
Added
Words
Total
Added
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
135 lines
// Copyright (c) 2009-2011 Intel Corporation
// Copyright (c) 2009-2011 Intel Corporation
// All rights reserved.
// All rights reserved.
//
//
// WARRANTY DISCLAIMER
// WARRANTY DISCLAIMER
//
//
// THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
// MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//
// Intel Corporation is the author of the Materials, and requests that all
// Intel Corporation is the author of the Materials, and requests that all
// problem reports or change requests be submitted to it directly
// problem reports or change requests be submitted to it directly


// https://software.intel.com/en-us/articles/bitonic-sorting
// Modified to sort int2 key-value array


__kernel void /*__attribute__((vec_type_hint(int4)))*/ BitonicSort(__global int4 * theArray,
__kernel void BitonicSort(__global int8* theArray,
const uint stage,
const uint stage,
const uint passOfStage,
const uint passOfStage,
const uint dir)
const uint dir)
{
{
size_t i = get_global_id(0);
size_t i = get_global_id(0);
int4 srcLeft, srcRight, mask;
int8 srcLeft, srcRight, mask;
int4 pseudomask;
int4 imask10 = (int4)(0, 0, -1, -1);
int4 imask10 = (int4)(0, 0, -1, -1);
int4 imask11 = (int4)(0, -1, 0, -1);
int4 imask11 = (int4)(0, -1, 0, -1);


if(stage > 0)
if(stage > 0)
{
{
if(passOfStage > 0) // upper level pass, exchange between two fours, operating on half size
if(passOfStage > 0) // upper level pass, exchange between two fours,
{
{
size_t r = 1 << (passOfStage - 1);
size_t r = 1 << (passOfStage - 1);
size_t lmask = r - 1;
size_t lmask = r - 1;
size_t left = ((i>>(passOfStage-1)) << passOfStage) + (i & lmask);
size_t left = ((i>>(passOfStage-1)) << passOfStage) + (i & lmask);
size_t right = left + r;
size_t right = left + r;


srcLeft = theArray[left];
srcLeft = theArray[left];
srcRight = theArray[right];
srcRight = theArray[right];
mask = srcLeft < srcRight;
pseudomask = srcLeft.even < srcRight.even;
mask = pseudomask.xxyyzzww;


int4 imin = (srcLeft & mask) | (srcRight & ~mask);
int8 imin = (srcLeft & mask) | (srcRight & ~mask);
int4 imax = (srcLeft & ~mask) | (srcRight & mask);
int8 imax = (srcLeft & ~mask) | (srcRight & mask);


if( ((i>>(stage-1)) & 1) ^ dir )
if( ((i>>(stage-1)) & 1) ^ dir )
{
{
theArray[left] = imin;
theArray[left] = imin;
theArray[right] = imax;
theArray[right] = imax;
}
}
else
else
{
{
theArray[right] = imin;
theArray[right] = imin;
theArray[left] = imax;
theArray[left] = imax;
}
}
}
}
else // last pass, sort inside one four, operating on full size
else // last pass, sort inside one four
{
{
srcLeft = theArray[i];
srcLeft = theArray[i];
srcRight = srcLeft.zwxy;
srcRight = srcLeft.s45670123;
mask = (srcLeft < srcRight) ^ imask10;
pseudomask = (srcLeft.even < srcRight.even) ^ imask10;
mask = pseudomask.xxyyzzww;


if(((i >> stage) & 1) ^ dir)
if(((i >> stage) & 1) ^ dir)
{
{
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
}
}
else
else
{
{
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
}
}
}
}
}
}
else // first stage, sort inside one four
else // first stage, sort inside one four
{
{
int4 imask0 = (int4)(0, -1, -1, 0);
int4 imask0 = (int4)(0, -1, -1, 0);
srcLeft = theArray[i];
srcLeft = theArray[i];
srcRight = srcLeft.yxwz;
srcRight = srcLeft.s23016745;
mask = (srcLeft < srcRight) ^ imask0;


pseudomask = (srcLeft.even < srcRight.even) ^ imask0;
mask = pseudomask.xxyyzzww;

if( dir )
if( dir )
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
else
else
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcLeft = (srcLeft & ~mask) | (srcRight & mask);


srcRight = srcLeft.zwxy;

mask = (srcLeft < srcRight) ^ imask10;
srcRight = srcLeft.s45670123;

pseudomask = (srcLeft.even < srcRight.even) ^ imask10;
mask = pseudomask.xxyyzzww;


if((i & 1) ^ dir)
if((i & 1) ^ dir)
{
{

srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
}
}
else
else
{
{
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
}
}
}
}
}
}