Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use rocblas_pointer_mode_{host, device} in rocblas_*scal #1842

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
129 changes: 57 additions & 72 deletions blas/impl/KokkosBlas1_scal_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_InnerProductSpaceTraits.hpp>
#include <KokkosBlas1_scal_spec.hpp>
#include <KokkosKernels_AlwaysFalse.hpp>
#include <KokkosBlas1_scal_unified_scalar_view_impl.hpp>
#include <KokkosKernels_ScalarHint.hpp>

#ifndef KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL
#define KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL 2
Expand All @@ -28,7 +31,10 @@
namespace KokkosBlas {
namespace Impl {

// Single-vector version of MV_Scal_Functor. By default, a is still a

// Single-vector version of MV_Scal_Functor.
// a has been unified into either a scalar or a 0D view

// 1-D View. Below is a partial specialization that lets a be a
// scalar. This functor computes any of the following:
//
Expand All @@ -42,7 +48,7 @@ namespace Impl {
// Any literal coefficient of zero has BLAS semantics of ignoring the
// corresponding (multi)vector entry. This does not apply to
// coefficients in the a vector, if used.
template <class RV, class AV, class XV, int scalar_x, class SizeType>
template <class RV, class AV, class XV, KokkosKernels::Impl::ScalarHint ALPHA_HINT, class SizeType>
struct V_Scal_Functor {
typedef SizeType size_type;
typedef Kokkos::ArithTraits<typename RV::non_const_value_type> ATS;
Expand All @@ -51,90 +57,70 @@ struct V_Scal_Functor {
XV m_x;
AV m_a;

V_Scal_Functor(const RV& r, const XV& x, const AV& a,
const SizeType startingColumn)
V_Scal_Functor(const RV& r, const XV& x, const AV& a)
: m_r(r), m_x(x), m_a(a) {
static_assert(Kokkos::is_view<RV>::value,
"V_Scal_Functor: RV is not a Kokkos::View.");
static_assert(Kokkos::is_view<AV>::value,
"V_Scal_Functor: AV is not a Kokkos::View.");

// TODO: static assert truths about AV

static_assert(Kokkos::is_view<XV>::value,
"V_Scal_Functor: XV is not a Kokkos::View.");
static_assert(RV::rank == 1, "V_Scal_Functor: RV is not rank 1.");
static_assert(AV::rank == 1, "V_Scal_Functor: AV is not rank 1.");
static_assert(XV::rank == 1, "V_Scal_Functor: XV is not rank 1.");

if (startingColumn != 0) {
m_a = Kokkos::subview(
a,
std::make_pair(startingColumn, static_cast<SizeType>(a.extent(0))));
}
}

KOKKOS_INLINE_FUNCTION
void operator()(const size_type& i) const {
// scalar_x is a compile-time constant (since it is a template

using ScalarHint = KokkosKernels::Impl::ScalarHint;

// scalar_a is a compile-time constant (since it is a template
// parameter), so the compiler should evaluate these branches at
// compile time.
if (scalar_x == 0) {
if constexpr (ALPHA_HINT == ScalarHint::zero) {
m_r(i) = ATS::zero();
}
if (scalar_x == -1) {
else if constexpr (ALPHA_HINT == ScalarHint::neg_one) {
m_r(i) = -m_x(i);
}
if (scalar_x == 1) {
else if constexpr (ALPHA_HINT == ScalarHint::pos_one) {
m_r(i) = m_x(i);
}
if (scalar_x == 2) {
m_r(i) = m_a(0) * m_x(i);
else if constexpr (ALPHA_HINT == ScalarHint::none) {
m_r(i) = KokkosBlas::Impl::as_scalar(m_a) * m_x(i);
}
else {
static_assert(KokkosKernels::Impl::always_false_v<AV>, "Unexpected value for ALPHA_HINT");
}
}
};

// Partial specialization of V_Scal_Functor that lets a be a scalar
// (rather than a 1-D View, as in the most general version above).
// This functor computes any of the following:
//
// 1. Y(i) = alpha*X(i) for alpha in -1,0,1
// 2. Y(i) = a*X(i)
template <class RV, class XV, int scalar_x, class SizeType>
struct V_Scal_Functor<RV, typename XV::non_const_value_type, XV, scalar_x,
SizeType> {
typedef SizeType size_type;
typedef Kokkos::ArithTraits<typename RV::non_const_value_type> ATS;
/*! \brief

RV m_r;
XV m_x;
const typename XV::non_const_value_type m_a;
r(i) = av * x(i)
r(i) = av() * x(i)

V_Scal_Functor(const RV& r, const XV& x,
const typename XV::non_const_value_type& a,
const SizeType /* startingColumn */)
: m_r(r), m_x(x), m_a(a) {}
\param space
\param r
\param av
\param x
\param alphaHint A KokkosKernels::Impl::ScalarHint corresponding to the value of av. If not KokkosKernels::Impl:ß:ScalarHint::none, may be used to optimize the implementation

KOKKOS_INLINE_FUNCTION
void operator()(const size_type& i) const {
if (scalar_x == 0) {
m_r(i) = ATS::zero();
}
if (scalar_x == -1) {
m_r(i) = -m_x(i);
}
if (scalar_x == 1) {
m_r(i) = m_x(i);
}
if (scalar_x == 2) {
m_r(i) = m_a * m_x(i);
}
}
};
\tparam SizeType
\tparam ExecutionSpace
\tparam RV
\tparam AV
\tparam XV

*/
template <typename SizeType, typename ExecutionSpace, typename RV, typename AV, typename XV>
void V_Scal_Generic(const ExecutionSpace& space, const RV& r, const AV& av,
const XV& x,
const KokkosKernels::Impl::ScalarHint &alphaHint = KokkosKernels::Impl::ScalarHint::none) {

// TODO: assert some things about AV

// Variant of MV_Scal_Generic for single vectors (1-D Views) r and x.
// As above, av is either a 1-D View (and only its first entry will be
// read), or a scalar.
template <class execution_space, class RV, class AV, class XV, class SizeType>
void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av,
const XV& x, const SizeType startingColumn, int a = 2) {
static_assert(Kokkos::is_view<RV>::value,
"V_Scal_Generic: RV is not a Kokkos::View.");
static_assert(Kokkos::is_view<XV>::value,
Expand All @@ -143,27 +129,26 @@ void V_Scal_Generic(const execution_space& space, const RV& r, const AV& av,
static_assert(XV::rank == 1, "V_Scal_Generic: XV is not rank 1.");

const SizeType numRows = x.extent(0);
Kokkos::RangePolicy<execution_space, SizeType> policy(space, 0, numRows);
Kokkos::RangePolicy<ExecutionSpace, SizeType> policy(space, 0, numRows);

if (a == 0) {
V_Scal_Functor<RV, AV, XV, 0, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S0", policy, op);
if (alphaHint == KokkosKernels::Impl::ScalarHint::zero) {
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::zero, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::0", policy, op);
return;
}
if (a == -1) {
V_Scal_Functor<RV, AV, XV, -1, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S1", policy, op);
else if (alphaHint == KokkosKernels::Impl::ScalarHint::neg_one) {
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::neg_one, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::-1", policy, op);
return;
}
if (a == 1) {
V_Scal_Functor<RV, AV, XV, 1, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S2", policy, op);
else if (alphaHint == KokkosKernels::Impl::ScalarHint::pos_one) {
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::pos_one, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::1", policy, op);
return;
}

// a arbitrary (not -1, 0, or 1)
V_Scal_Functor<RV, AV, XV, 2, SizeType> op(r, x, av, startingColumn);
Kokkos::parallel_for("KokkosBlas::Scal::S3", policy, op);
V_Scal_Functor<RV, AV, XV, KokkosKernels::Impl::ScalarHint::none, SizeType> op(r, x, av);
Kokkos::parallel_for("KokkosBlas::Scal::none", policy, op);
}

} // namespace Impl
Expand Down
45 changes: 29 additions & 16 deletions blas/impl/KokkosBlas1_scal_mv_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <Kokkos_InnerProductSpaceTraits.hpp>
#include <KokkosBlas1_scal_spec.hpp>
#include <KokkosBlas1_scal_impl.hpp>
#include <KokkosKernels_ScalarHint.hpp>

#ifndef KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL
#define KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL 2
Expand Down Expand Up @@ -323,24 +324,25 @@ template <class execution_space, class RMV, class aVector, class XMV,
int UNROLL, class SizeType>
void MV_Scal_Unrolled(const execution_space& space, const RMV& r,
const aVector& av, const XMV& x,
const SizeType startingColumn, int a = 2) {
if (a == 0) {
const SizeType startingColumn,
const KokkosKernels::Impl::ScalarHint &a = KokkosKernels::Impl::ScalarHint::none) {
if (a == KokkosKernels::Impl::ScalarHint::zero) {
MV_Scal_Unroll_Functor<RMV, aVector, XMV, 0, UNROLL, SizeType> op(
r, x, av, startingColumn);
const SizeType numRows = x.extent(0);
Kokkos::RangePolicy<execution_space, SizeType> policy(space, 0, numRows);
Kokkos::parallel_for("KokkosBlas::Scal::MV::S0", policy, op);
return;
}
if (a == -1) {
if (a == KokkosKernels::Impl::ScalarHint::neg_one) {
MV_Scal_Unroll_Functor<RMV, aVector, XMV, -1, UNROLL, SizeType> op(
r, x, av, startingColumn);
const SizeType numRows = x.extent(0);
Kokkos::RangePolicy<execution_space, SizeType> policy(space, 0, numRows);
Kokkos::parallel_for("KokkosBlas::Scal::MV::S1", policy, op);
return;
}
if (a == 1) {
if (a == KokkosKernels::Impl::ScalarHint::pos_one) {
MV_Scal_Unroll_Functor<RMV, aVector, XMV, 1, UNROLL, SizeType> op(
r, x, av, startingColumn);
const SizeType numRows = x.extent(0);
Expand All @@ -349,7 +351,6 @@ void MV_Scal_Unrolled(const execution_space& space, const RMV& r,
return;
}

// a arbitrary (not -1, 0, or 1)
MV_Scal_Unroll_Functor<RMV, aVector, XMV, 2, UNROLL, SizeType> op(
r, x, av, startingColumn);
const SizeType numRows = x.extent(0);
Expand Down Expand Up @@ -420,7 +421,8 @@ void MV_Scal_Generic(const execution_space& space, const RVector& r,
// coefficient(s) in av, if used.
template <class execution_space, class RMV, class AV, class XMV, class SizeType>
void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
const AV& av, const XMV& x, int a = 2) {
const AV& av, const XMV& x,
const KokkosKernels::Impl::ScalarHint &aHint = KokkosKernels::Impl::ScalarHint::none) {
const SizeType numCols = x.extent(1);

#if KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL <= 2
Expand All @@ -438,7 +440,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(R_cur) RMV2D;

MV_Scal_Unrolled<execution_space, RMV2D, AV, XMV2D, 8, SizeType>(
space, R_cur, av, X_cur, j, a);
space, R_cur, av, X_cur, j, aHint);
}
for (; j + 4 <= numCols; j += 4) {
const std::pair<SizeType, SizeType> rng(j, j + 4);
Expand All @@ -448,7 +450,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(R_cur) RMV2D;

MV_Scal_Unrolled<execution_space, RMV2D, AV, XMV2D, 4, SizeType>(
space, R_cur, av, X_cur, j, a);
space, R_cur, av, X_cur, j, aHint);
}
for (; j < numCols; ++j) {
// RMV and XMV need to turn 1-D.
Expand All @@ -457,8 +459,21 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(r_cur) RV;
typedef decltype(x_cur) XV;

V_Scal_Generic<execution_space, RV, AV, XV, SizeType>(space, r_cur, av,
x_cur, j, a);
// If AV is a rank-one vector, get a rank-0 subview
// Otherwise, just pass along AV as-is
// can't short-circuit if constexpr :(
if constexpr (Kokkos::is_view_v<AV>) {
if constexpr (AV::rank == 1) {
auto a_cur = Kokkos::subview(av, j);
V_Scal_Generic<SizeType>(space, r_cur, a_cur, x_cur, aHint);
} else {
V_Scal_Generic<SizeType>(space, r_cur, av, x_cur, aHint);
}
} else {
V_Scal_Generic<SizeType>(space, r_cur, av, x_cur, aHint);
}


}

#else // KOKKOSBLAS_OPTIMIZATION_LEVEL_SCAL > 2
Expand All @@ -470,7 +485,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
typedef decltype(r_0) RV;
typedef decltype(x_0) XV;

V_Scal_Generic<execution_space, RV, AV, XV, SizeType>(space, r_0, av, x_0,
V_Scal_Generic<SizeType>(space, r_0, av, x_0,
0, a);
break;
}
Expand Down Expand Up @@ -535,7 +550,7 @@ void MV_Scal_Invoke_Left(const execution_space& space, const RMV& r,
space, r, av, x, 0, a);
break;
default:
MV_Scal_Generic<execution_space, RMV, AV, XMV, SizeType>(space, r, av, x,
MV_Scal_Generic<SizeType>(space, r, av, x,
0, a);
}

Expand Down Expand Up @@ -572,11 +587,9 @@ void MV_Scal_Invoke_Right(const execution_space& space, const RMV& r,

RV r_0 = Kokkos::subview(r, Kokkos::ALL(), 0);
XV x_0 = Kokkos::subview(x, Kokkos::ALL(), 0);
V_Scal_Generic<execution_space, RMV, aVector, XMV, 1, SizeType>(space, r_0,
av, x_0, a);
V_Scal_Generic<SizeType>(space, r_0, av, x_0, a);
} else {
MV_Scal_Generic<execution_space, RMV, aVector, XMV, SizeType>(space, r, av,
x, a);
MV_Scal_Generic<SizeType>(space, r, av, x, a);
}
}

Expand Down
Loading