Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
* Aligned `dpnp.trim_zeros` with NumPy 2.4 to support a tuple of integers passed with `axis` keyword [#2746](https://github.com/IntelPython/dpnp/pull/2746)
* Aligned `strides` property of `dpnp.ndarray` with NumPy and CuPy implementations [#2747](https://github.com/IntelPython/dpnp/pull/2747)
* Extended `dpnp.nan_to_num` to support broadcasting of `nan`, `posinf`, and `neginf` keywords [#2754](https://github.com/IntelPython/dpnp/pull/2754)
* Changed `dpnp.partition` implementation to reuse `dpnp.sort` where it brings the performance benefit [#2766](https://github.com/IntelPython/dpnp/pull/2766)

### Deprecated

Expand Down
87 changes: 11 additions & 76 deletions dpnp/backend/kernels/dpnp_krnl_sorting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,90 +70,25 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref,

sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));

if (ndim == 1) // 1d array with C-contiguous data
{
_DataType *arr = static_cast<_DataType *>(array1_in);
_DataType *result = static_cast<_DataType *>(result1);
_DataType *arr = static_cast<_DataType *>(array1_in);
_DataType *result = static_cast<_DataType *>(result1);

auto policy = oneapi::dpl::execution::make_device_policy<
dpnp_partition_c_kernel<_DataType>>(q);
auto policy = oneapi::dpl::execution::make_device_policy<
dpnp_partition_c_kernel<_DataType>>(q);

// fill the result array with data from input one
q.memcpy(result, arr, size * sizeof(_DataType)).wait();
// fill the result array with data from input one
q.memcpy(result, arr, size * sizeof(_DataType)).wait();

// make a partial sorting such that:
for (size_t i = 0; i < size_; i++) {
_DataType *bufptr = result + i * shape_[0];

// for every slice it makes a partial sorting such that:
// 1. result[0 <= i < kth] <= result[kth]
// 2. result[kth <= i < size] >= result[kth]
// event-blocking call, no need for wait()
std::nth_element(policy, result, result + kth, result + size,
std::nth_element(policy, bufptr, bufptr + kth, bufptr + size,
dpnp_less_comp());
return event_ref;
}

DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true);
DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, array2_in, size, true);
DPNPC_ptr_adapter<_DataType> result1_ptr(q_ref, result1, size, true, true);
_DataType *arr = input1_ptr.get_ptr();
_DataType *arr2 = input2_ptr.get_ptr();
_DataType *result = result1_ptr.get_ptr();

auto arr_to_result_event = q.memcpy(result, arr, size * sizeof(_DataType));
arr_to_result_event.wait();

_DataType *matrix = new _DataType[shape_[ndim - 1]];

for (size_t i = 0; i < size_; ++i) {
size_t ind_begin = i * shape_[ndim - 1];
size_t ind_end = (i + 1) * shape_[ndim - 1] - 1;

for (size_t j = ind_begin; j < ind_end + 1; ++j) {
size_t ind = j - ind_begin;
matrix[ind] = arr2[j];
}
std::partial_sort(matrix, matrix + shape_[ndim - 1],
matrix + shape_[ndim - 1], dpnp_less_comp());
for (size_t j = ind_begin; j < ind_end + 1; ++j) {
size_t ind = j - ind_begin;
arr2[j] = matrix[ind];
}
}

shape_elem_type *shape = reinterpret_cast<shape_elem_type *>(
sycl::malloc_shared(ndim * sizeof(shape_elem_type), q));
auto memcpy_event = q.memcpy(shape, shape_, ndim * sizeof(shape_elem_type));

memcpy_event.wait();

sycl::range<2> gws(size_, kth + 1);
auto kernel_parallel_for_func = [=](sycl::id<2> global_id) {
size_t j = global_id[0];
size_t k = global_id[1];

_DataType val = arr2[j * shape[ndim - 1] + k];

for (size_t i = 0; i < static_cast<size_t>(shape[ndim - 1]); ++i) {
if (result[j * shape[ndim - 1] + i] == val) {
_DataType change_val1 = result[j * shape[ndim - 1] + i];
_DataType change_val2 = result[j * shape[ndim - 1] + k];
result[j * shape[ndim - 1] + k] = change_val1;
result[j * shape[ndim - 1] + i] = change_val2;
}
}
};

auto kernel_func = [&](sycl::handler &cgh) {
cgh.depends_on({memcpy_event});
cgh.parallel_for<class dpnp_partition_c_kernel<_DataType>>(
gws, kernel_parallel_for_func);
};

auto event = q.submit(kernel_func);

event.wait();

delete[] matrix;
sycl::free(shape, q);

return event_ref;
}

Expand Down
41 changes: 30 additions & 11 deletions dpnp/dpnp_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,35 +1459,54 @@ def nonzero(self):

def partition(self, /, kth, axis=-1, kind="introselect", order=None):
"""
Return a partitioned copy of an array.
Partially sorts the elements in the array in such a way that the value
of the element in k-th position is in the position it would be in a
sorted array. In the output array, all elements smaller than the k-th
element are located to the left of this element and all equal or
greater are located to its right. The ordering of the elements in the
two partitions on the either side of the k-th element in the output
array is undefined.

Rearranges the elements in the array in such a way that the value of
the element in `kth` position is in the position it would be in
a sorted array.
Refer to `dpnp.partition` for full documentation.

All elements smaller than the `kth` element are moved before this
element and all equal or greater are moved behind it. The ordering
of the elements in the two partitions is undefined.
kth : {int, sequence of ints}
Element index to partition by. The kth element value will be in its
final sorted position and all smaller elements will be moved before
it and all equal or greater elements behind it.
The order of all elements in the partitions is undefined. If
provided with a sequence of kth it will partition all elements
indexed by kth of them into their sorted position at once.
axis : int, optional
Axis along which to sort. The default is ``-1``, which means sort
along the the last axis.

Refer to `dpnp.partition` for full documentation.
Default: ``-1``.

See Also
--------
:obj:`dpnp.partition` : Return a partitioned copy of an array.
:obj:`dpnp.argpartition` : Indirect partition.
:obj:`dpnp.sort` : Full sort.

Examples
--------
>>> import dpnp as np
>>> a = np.array([3, 4, 2, 1])
>>> a.partition(3)
>>> a
array([1, 2, 3, 4]) # may vary

>>> a.partition((1, 3))
>>> a
array([1, 2, 3, 4])

"""

self._array_obj = dpnp.partition(
self, kth, axis=axis, kind=kind, order=order
).get_array()
if axis is None:
raise TypeError(
"'NoneType' object cannot be interpreted as an integer"
)
self[...] = dpnp.partition(self, kth, axis=axis, kind=kind, order=order)

def prod(
self,
Expand Down
142 changes: 113 additions & 29 deletions dpnp/dpnp_iface_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@

"""

from collections.abc import Sequence

import dpctl.tensor as dpt
import numpy
from dpctl.tensor._numpy_helper import normalize_axis_index

import dpnp
Expand All @@ -51,7 +52,6 @@
)
from .dpnp_array import dpnp_array
from .dpnp_utils import (
call_origin,
map_dtype_to_device,
)

Expand Down Expand Up @@ -147,7 +147,7 @@ def argsort(

Limitations
-----------
Parameters `order` is only supported with its default value.
Parameter `order` is only supported with its default value.
Otherwise ``NotImplementedError`` exception will be raised.
Sorting algorithms ``"quicksort"`` and ``"heapsort"`` are not supported.

Expand Down Expand Up @@ -201,44 +201,128 @@ def argsort(
)


def partition(x1, kth, axis=-1, kind="introselect", order=None):
def partition(a, kth, axis=-1, kind="introselect", order=None):
"""
Return a partitioned copy of an array.

For full documentation refer to :obj:`numpy.partition`.

Parameters
----------
a : {dpnp.ndarray, usm_ndarray}
Array to be sorted.
kth : {int, sequence of ints}
Element index to partition by. The k-th value of the element will be in
its final sorted position and all smaller elements will be moved before
it and all equal or greater elements behind it. The order of all
elements in the partitions is undefined. If provided with a sequence of
k-th it will partition all elements indexed by k-th of them into their
sorted position at once.
axis : {None, int}, optional
Axis along which to sort. If ``None``, the array is flattened before
sorting. The default is ``-1``, which sorts along the last axis.

Default: ``-1``.

Returns
-------
out : dpnp.ndarray
Array of the same type and shape as `a`.

Limitations
-----------
Input array is supported as :obj:`dpnp.ndarray`.
Input `kth` is supported as :obj:`int`.
Parameters `axis`, `kind` and `order` are supported only with default
values.
Parameters `kind` and `order` are only supported with its default value.
Otherwise ``NotImplementedError`` exception will be raised.

See Also
--------
:obj:`dpnp.ndarray.partition` : Equivalent method.
:obj:`dpnp.argpartition` : Indirect partition.
:obj:`dpnp.sort` : Full sorting.

Examples
--------
>>> import dpnp as np
>>> a = np.array([7, 1, 7, 7, 1, 5, 7, 2, 3, 2, 6, 2, 3, 0])
>>> p = np.partition(a, 4)
>>> p
array([0, 1, 1, 2, 2, 2, 3, 3, 5, 7, 7, 7, 7, 6]) # may vary

``p[4]`` is 2; all elements in ``p[:4]`` are less than or equal to
``p[4]``, and all elements in ``p[5:]`` are greater than or equal to
``p[4]``. The partition is::

[0, 1, 1, 2], [2], [2, 3, 3, 5, 7, 7, 7, 7, 6]

The next example shows the use of multiple values passed to `kth`.

>>> p2 = np.partition(a, (4, 8))
>>> p2
array([0, 1, 1, 2, 2, 2, 3, 3, 5, 6, 7, 7, 7, 7])

``p2[4]`` is 2 and ``p2[8]`` is 5. All elements in ``p2[:4]`` are less
than or equal to ``p2[4]``, all elements in ``p2[5:8]`` are greater than or
equal to ``p2[4]`` and less than or equal to ``p2[8]``, and all elements in
``p2[9:]`` are greater than or equal to ``p2[8]``. The partition is::

[0, 1, 1, 2], [2], [2, 3, 3], [5], [6, 7, 7, 7, 7]

"""

x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
if x1_desc:
if dpnp.is_cuda_backend(x1_desc.get_array()): # pragma: no cover
raise NotImplementedError(
"Running on CUDA is currently not supported"
)
dpnp.check_supported_arrays_type(a)

if not isinstance(kth, int):
pass
elif x1_desc.ndim == 0:
pass
elif kth >= x1_desc.shape[x1_desc.ndim - 1] or x1_desc.ndim + kth < 0:
pass
elif axis != -1:
pass
elif kind != "introselect":
pass
elif order is not None:
pass
else:
return dpnp_partition(x1_desc, kth, axis, kind, order).get_pyobj()
if kind != "introselect":
raise NotImplementedError(
"`kind` keyword argument is only supported with its default value."
)
if order is not None:
raise NotImplementedError(
"`order` keyword argument is only supported with its default value."
)

return call_origin(numpy.partition, x1, kth, axis, kind, order)
if axis is None:
a = dpnp.ravel(a)
axis = -1

nd = a.ndim
axis = normalize_axis_index(axis, nd)
length = a.shape[axis]

if isinstance(kth, int):
kth = (kth,)
elif not isinstance(kth, Sequence):
raise TypeError(
f"kth must be int or sequence of ints, but got {type(kth)}"
)
elif not all(isinstance(k, int) for k in kth):
raise TypeError("kth is a sequence, but not all elements are integers")

nkth = len(kth)
if nkth == 0 or a.size == 0:
return dpnp.copy(a)

# validate kth
kth = list(kth)
for i in range(nkth):
if kth[i] < 0:
kth[i] += length

if not 0 <= kth[i] < length:
raise ValueError(f"kth(={kth[i]}) out of bounds {length}")

dt = a.dtype
if (
nd > 1
or nkth > 1
or dpnp.issubdtype(dt, dpnp.unsignedinteger)
or dt in (dpnp.int8, dpnp.int16)
or dpnp.is_cuda_backend(a.get_array())
):
# sort is a faster path in case of ndim > 1
return dpnp.sort(a, axis=axis)

desc = dpnp.get_dpnp_descriptor(a, copy_when_nondefault_queue=False)
return dpnp_partition(desc, kth[0], axis, kind, order).get_pyobj()


def sort(a, axis=-1, kind=None, order=None, *, descending=False, stable=None):
Expand Down
Loading
Loading