Untitled diff

Created Diff never expires
52 removals
110 lines
81 additions
135 lines
// Copyright (c) 2009-2011 Intel Corporation
// Copyright (c) 2009-2011 Intel Corporation
// All rights reserved.
// All rights reserved.
//
//
// WARRANTY DISCLAIMER
// WARRANTY DISCLAIMER
//
//
// THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
// MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//
// Intel Corporation is the author of the Materials, and requests that all
// Intel Corporation is the author of the Materials, and requests that all
// problem reports or change requests be submitted to it directly
// problem reports or change requests be submitted to it directly


// https://software.intel.com/en-us/articles/bitonic-sorting
// Modified to sort int2 key-value array


__kernel void /*__attribute__((vec_type_hint(int4)))*/ BitonicSort(__global int4 * theArray,
__kernel void BitonicSort(__global int8* theArray,
const uint stage,
const uint stage,
const uint passOfStage,
const uint passOfStage,
const uint dir)
const uint dir)
{
{
size_t i = get_global_id(0);
size_t i = get_global_id(0);
int4 srcLeft, srcRight, mask;
int8 srcLeft, srcRight, mask;
int4 pseudomask;
int4 imask10 = (int4)(0, 0, -1, -1);
int4 imask10 = (int4)(0, 0, -1, -1);
int4 imask11 = (int4)(0, -1, 0, -1);
int4 imask11 = (int4)(0, -1, 0, -1);


if(stage > 0)
if(stage > 0)
{
{
if(passOfStage > 0) // upper level pass, exchange between two fours, operating on half size
if(passOfStage > 0) // upper level pass, exchange between two fours,
{
{
size_t r = 1 << (passOfStage - 1);
size_t r = 1 << (passOfStage - 1);
size_t lmask = r - 1;
size_t lmask = r - 1;
size_t left = ((i>>(passOfStage-1)) << passOfStage) + (i & lmask);
size_t left = ((i>>(passOfStage-1)) << passOfStage) + (i & lmask);
size_t right = left + r;
size_t right = left + r;


srcLeft = theArray[left];
srcLeft = theArray[left];
srcRight = theArray[right];
srcRight = theArray[right];
mask = srcLeft < srcRight;
pseudomask = srcLeft.even < srcRight.even;
mask = pseudomask.xxyyzzww;


int4 imin = (srcLeft & mask) | (srcRight & ~mask);
int8 imin = (srcLeft & mask) | (srcRight & ~mask);
int4 imax = (srcLeft & ~mask) | (srcRight & mask);
int8 imax = (srcLeft & ~mask) | (srcRight & mask);


if( ((i>>(stage-1)) & 1) ^ dir )
if( ((i>>(stage-1)) & 1) ^ dir )
{
{
theArray[left] = imin;
theArray[left] = imin;
theArray[right] = imax;
theArray[right] = imax;
}
}
else
else
{
{
theArray[right] = imin;
theArray[right] = imin;
theArray[left] = imax;
theArray[left] = imax;
}
}
}
}
else // last pass, sort inside one four, operating on full size
else // last pass, sort inside one four
{
{
srcLeft = theArray[i];
srcLeft = theArray[i];
srcRight = srcLeft.zwxy;
srcRight = srcLeft.s45670123;
mask = (srcLeft < srcRight) ^ imask10;
pseudomask = (srcLeft.even < srcRight.even) ^ imask10;
mask = pseudomask.xxyyzzww;


if(((i >> stage) & 1) ^ dir)
if(((i >> stage) & 1) ^ dir)
{
{
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
}
}
else
else
{
{
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
}
}
}
}
}
}
else // first stage, sort inside one four
else // first stage, sort inside one four
{
{
int4 imask0 = (int4)(0, -1, -1, 0);
int4 imask0 = (int4)(0, -1, -1, 0);
srcLeft = theArray[i];
srcLeft = theArray[i];
srcRight = srcLeft.yxwz;
srcRight = srcLeft.s23016745;
mask = (srcLeft < srcRight) ^ imask0;


pseudomask = (srcLeft.even < srcRight.even) ^ imask0;
mask = pseudomask.xxyyzzww;

if( dir )
if( dir )
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
else
else
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcLeft = (srcLeft & ~mask) | (srcRight & mask);


srcRight = srcLeft.zwxy;

mask = (srcLeft < srcRight) ^ imask10;
srcRight = srcLeft.s45670123;

pseudomask = (srcLeft.even < srcRight.even) ^ imask10;
mask = pseudomask.xxyyzzww;


if((i & 1) ^ dir)
if((i & 1) ^ dir)
{
{

srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcLeft = (srcLeft & mask) | (srcRight & ~mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
theArray[i] = (srcLeft & mask) | (srcRight & ~mask);
}
}
else
else
{
{
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcLeft = (srcLeft & ~mask) | (srcRight & mask);
srcRight = srcLeft.yxwz;

mask = (srcLeft < srcRight) ^ imask11;
srcRight = srcLeft.s23016745;
pseudomask = (srcLeft.even < srcRight.even) ^ imask11;
mask = pseudomask.xxyyzzww;

theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
theArray[i] = (srcLeft & ~mask) | (srcRight & mask);
}
}
}
}
}
}