Skip to content

Commit 9b22b9e

Browse files
committed
Added load_as / store_as / broadcast_as APIs
1 parent 7462f0a commit 9b22b9e

File tree

8 files changed

+188
-39
lines changed

8 files changed

+188
-39
lines changed

include/xsimd/arch/generic/xsimd_generic_memory.hpp

+16-20
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,6 @@ namespace xsimd {
7070
batch<T_out, A> load_aligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>) {
7171
return detail::load_aligned<A>(mem, cvt, A{}, detail::conversion_type<A, T_in, T_out>{});
7272
}
73-
template<class A, class T>
74-
batch<std::complex<T>, A> load_aligned(std::complex<T> const* mem, convert<std::complex<T>>, requires_arch<generic>) {
75-
return batch<std::complex<T>, A>::load_aligned(mem);
76-
}
7773

7874
// load_unaligned
7975
namespace detail {
@@ -94,10 +90,6 @@ namespace xsimd {
9490
batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>) {
9591
return detail::load_unaligned<A>(mem, cvt, generic{}, detail::conversion_type<A, T_in, T_out>{});
9692
}
97-
template<class A, class T>
98-
batch<std::complex<T>, A> load_unaligned(std::complex<T> const* mem, convert<std::complex<T>>, requires_arch<generic>) {
99-
return batch<std::complex<T>, A>::load_unaligned(mem);
100-
}
10193

10294
// store
10395
template<class T, class A>
@@ -147,39 +139,43 @@ namespace xsimd {
147139
}
148140

149141
// load_complex_aligned
150-
template <class A, class T> batch<std::complex<T>, A> load_complex_aligned(std::complex<T> const* mem, requires_arch<generic>) {
151-
using real_batch = batch<T, A>;
152-
T const *buffer = reinterpret_cast<T const *>(mem);
142+
template <class A, class T_out, class T_in>
143+
batch<std::complex<T_out>, A> load_complex_aligned(std::complex<T_in> const* mem, convert<std::complex<T_out>>, requires_arch<generic>) {
144+
using real_batch = batch<T_out, A>;
145+
T_in const* buffer = reinterpret_cast<T_in const*>(mem);
153146
real_batch hi = real_batch::load_aligned(buffer),
154147
lo = real_batch::load_aligned(buffer + real_batch::size);
155148
return detail::load_complex(hi, lo, A{});
156149
}
157150

158151
// load_complex_unaligned
159-
template <class A, class T> batch<std::complex<T>, A> load_complex_unaligned(std::complex<T> const* mem, requires_arch<generic>) {
160-
using real_batch = batch<T, A>;
161-
T const *buffer = reinterpret_cast<T const *>(mem);
152+
template <class A, class T_out, class T_in>
153+
batch<std::complex<T_out>, A> load_complex_unaligned(std::complex<T_in> const* mem, convert<std::complex<T_out>> ,requires_arch<generic>) {
154+
using real_batch = batch<T_out, A>;
155+
T_in const* buffer = reinterpret_cast<T_in const*>(mem);
162156
real_batch hi = real_batch::load_unaligned(buffer),
163157
lo = real_batch::load_unaligned(buffer + real_batch::size);
164158
return detail::load_complex(hi, lo, A{});
165159
}
166160

167161
// store_complex_aligned
168-
template <class A, class T> void store_complex_aligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<generic>) {
169-
using real_batch = batch<T, A>;
162+
template <class A, class T_out, class T_in>
163+
void store_complex_aligned(std::complex<T_out>* dst, batch<std::complex<T_in>, A> const& src, requires_arch<generic>) {
164+
using real_batch = batch<T_in, A>;
170165
real_batch hi = detail::complex_high(src, A{});
171166
real_batch lo = detail::complex_low(src, A{});
172-
T * buffer = reinterpret_cast<T*>(dst);
167+
T_out* buffer = reinterpret_cast<T_out*>(dst);
173168
lo.store_aligned(buffer);
174169
hi.store_aligned(buffer + real_batch::size);
175170
}
176171

177172
// store_compelx_unaligned
178-
template <class A, class T> void store_complex_unaligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<generic>) {
179-
using real_batch = batch<T, A>;
173+
template <class A, class T_out, class T_in>
174+
void store_complex_unaligned(std::complex<T_out>* dst, batch<std::complex<T_in>, A> const& src, requires_arch<generic>) {
175+
using real_batch = batch<T_in, A>;
180176
real_batch hi = detail::complex_high(src, A{});
181177
real_batch lo = detail::complex_low(src, A{});
182-
T * buffer = reinterpret_cast<T *>(dst);
178+
T_out* buffer = reinterpret_cast<T_out*>(dst);
183179
lo.store_unaligned(buffer);
184180
hi.store_unaligned(buffer + real_batch::size);
185181
}

include/xsimd/arch/xsimd_neon.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ namespace xsimd
525525
****************/
526526

527527
template <class A>
528-
batch<std::complex<float>, A> load_complex_aligned(std::complex<float> const* mem, requires_arch<neon>)
528+
batch<std::complex<float>, A> load_complex_aligned(std::complex<float> const* mem, convert<std::complex<float>>, requires_arch<neon>)
529529
{
530530
using real_batch = batch<float, A>;
531531
const float* buf = reinterpret_cast<const float*>(mem);
@@ -536,9 +536,9 @@ namespace xsimd
536536
}
537537

538538
template <class A>
539-
batch<std::complex<float>, A> load_complex_unaligned(std::complex<float> const* mem, requires_arch<neon>)
539+
batch<std::complex<float>, A> load_complex_unaligned(std::complex<float> const* mem, convert<std::complex<float>> cvt, requires_arch<neon>)
540540
{
541-
return load_complex_aligned<A>(mem, A{});
541+
return load_complex_aligned<A>(mem, cvt, A{});
542542
}
543543

544544
/*****************

include/xsimd/arch/xsimd_neon64.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ namespace xsimd
163163
****************/
164164

165165
template <class A>
166-
batch<std::complex<double>, A> load_complex_aligned(std::complex<double> const* mem, requires_arch<neon64>)
166+
batch<std::complex<double>, A> load_complex_aligned(std::complex<double> const* mem, convert<std::complex<double>>, requires_arch<neon64>)
167167
{
168168
using real_batch = batch<double, A>;
169169
const double* buf = reinterpret_cast<const double*>(mem);
@@ -174,9 +174,9 @@ namespace xsimd
174174
}
175175

176176
template <class A>
177-
batch<std::complex<double>, A> load_complex_unaligned(std::complex<double> const* mem, requires_arch<neon64>)
177+
batch<std::complex<double>, A> load_complex_unaligned(std::complex<double> const* mem, convert<std::complex<double>> cvt, requires_arch<neon64>)
178178
{
179-
return load_complex_aligned<A>(mem, A{});
179+
return load_complex_aligned<A>(mem, cvt, A{});
180180
}
181181

182182
/*****************

include/xsimd/types/xsimd_api.hpp

+149-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <ostream>
1919

2020
#include "../types/xsimd_batch.hpp"
21+
#include "../types/xsimd_traits.hpp"
2122
#include "../arch/xsimd_isa.hpp"
2223

2324
namespace xsimd {
@@ -323,6 +324,23 @@ batch<T, A> broadcast(T v) {
323324
return kernel::broadcast<A>(v, A{});
324325
}
325326

327+
/**
328+
* @ingroup batch_data_transfer
329+
*
330+
* Creates a batch from the single value \c v and
331+
* the specified batch value type \c To.
332+
* @param v the value used to initialize the batch
333+
* @return a new batch instance
334+
*/
335+
template <class To, class A=default_arch, class From>
336+
simd_return_type<From, To> broadcast_as(From v) {
337+
using batch_value_type = typename simd_return_type<From, To>::value_type;
338+
using value_type = typename std::conditional<std::is_same<From, bool>::value,
339+
bool,
340+
batch_value_type>::type;
341+
return simd_return_type<From, To>(value_type(v));
342+
}
343+
326344
/**
327345
* @ingroup batch_math
328346
*
@@ -959,6 +977,58 @@ batch<From, A> load_unaligned(From const* ptr) {
959977
return kernel::load_unaligned<A>(ptr, kernel::convert<From>{}, A{});
960978
}
961979

980+
/**
981+
* @ingroup batch_data_transfer
982+
*
983+
* Creates a batch from the buffer \c ptr and the specifed
984+
* batch value type \c To. The memory needs to be aligned.
985+
* @param ptr the memory buffer to read
986+
* @return a new batch instance
987+
*/
988+
template <class To, class A=default_arch, class From>
989+
simd_return_type<From, To> load_as(From const* ptr, aligned_mode) {
990+
using batch_value_type = typename simd_return_type<From, To>::value_type;
991+
return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
992+
}
993+
994+
template <class To, class A = default_arch>
995+
simd_return_type<bool, To> load_as(bool const* ptr, aligned_mode) {
996+
return simd_return_type<bool, To>::load_aligned(ptr);
997+
}
998+
999+
template <class To, class A=default_arch, class From>
1000+
simd_return_type<std::complex<From>, To> load_as(std::complex<From> const* ptr, aligned_mode)
1001+
{
1002+
using batch_value_type = typename simd_return_type<std::complex<From>, To>::value_type;
1003+
return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
1004+
}
1005+
1006+
/**
1007+
* @ingroup batch_data_transfer
1008+
*
1009+
* Creates a batch from the buffer \c ptr and the specifed
1010+
* batch value type \c To. The memory does not need to be aligned.
1011+
* @param ptr the memory buffer to read
1012+
* @return a new batch instance
1013+
*/
1014+
template <class To, class A=default_arch, class From>
1015+
simd_return_type<From, To> load_as(From const* ptr, unaligned_mode) {
1016+
using batch_value_type = typename simd_return_type<From, To>::value_type;
1017+
return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
1018+
}
1019+
1020+
template <class To, class A = default_arch>
1021+
simd_return_type<bool, To> load_as(bool const* ptr, unaligned_mode) {
1022+
return simd_return_type<bool, To>::load_unaligned(ptr);
1023+
}
1024+
1025+
template <class To, class A=default_arch, class From>
1026+
simd_return_type<std::complex<From>, To> load_as(std::complex<From> const* ptr, unaligned_mode)
1027+
{
1028+
using batch_value_type = typename simd_return_type<std::complex<From>, To>::value_type;
1029+
return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
1030+
}
1031+
9621032
/**
9631033
* @ingroup batch_math
9641034
*
@@ -1423,8 +1493,8 @@ auto ssub(T const& x, Tp const& y) -> decltype(x - y) {
14231493
* @param mem the memory buffer to write to
14241494
* @param val the batch to copy from
14251495
*/
1426-
template<class To, class A, class From>
1427-
void store(From* mem, batch<To, A> const& val, aligned_mode={}) {
1496+
template<class A, class T>
1497+
void store(T* mem, batch<T, A> const& val, aligned_mode={}) {
14281498
return kernel::store_aligned<A>(mem, val, A{});
14291499
}
14301500

@@ -1436,8 +1506,8 @@ void store(From* mem, batch<To, A> const& val, aligned_mode={}) {
14361506
* @param mem the memory buffer to write to
14371507
* @param val the batch to copy from
14381508
*/
1439-
template<class To, class A, class From>
1440-
void store(To* mem, batch<From, A> const& val, unaligned_mode) {
1509+
template<class A, class T>
1510+
void store(T* mem, batch<T, A> const& val, unaligned_mode) {
14411511
return kernel::store_unaligned<A>(mem, val, A{});
14421512
}
14431513

@@ -1449,8 +1519,8 @@ void store(To* mem, batch<From, A> const& val, unaligned_mode) {
14491519
* @param mem the memory buffer to write to
14501520
* @param val the batch to copy from
14511521
*/
1452-
template<class To, class A, class From>
1453-
void store_aligned(To* mem, batch<From, A> const& val) {
1522+
template<class A, class T>
1523+
void store_aligned(T* mem, batch<T, A> const& val) {
14541524
return kernel::store_aligned<A>(mem, val, A{});
14551525
}
14561526

@@ -1462,11 +1532,82 @@ void store_aligned(To* mem, batch<From, A> const& val) {
14621532
* @param mem the memory buffer to write to
14631533
* @param val the batch to copy
14641534
*/
1465-
template<class To, class A, class From>
1466-
void store_unaligned(To* mem, batch<From, A> const& val) {
1535+
template<class A, class T>
1536+
void store_unaligned(T* mem, batch<T, A> const& val) {
14671537
return kernel::store_unaligned<A>(mem, val, A{});
14681538
}
14691539

1540+
/**
1541+
* @ingroup batch_data_transfer
1542+
*
1543+
* Copy content of batch \c src to the buffer \c dst. The
1544+
* memory needs to be aligned.
1545+
* @param mem the memory buffer to write to
1546+
* @param val the batch to copy
1547+
*/
1548+
template <class To, class A=default_arch, class From>
1549+
void store_as(To* dst, batch<From, A> const& src, aligned_mode) {
1550+
kernel::store_aligned(dst, src, A{});
1551+
}
1552+
1553+
template <class A=default_arch, class From>
1554+
void store_as(bool* dst, batch_bool<From, A> const& src, aligned_mode) {
1555+
kernel::store(src, dst, A{});
1556+
}
1557+
1558+
template <class To, class A=default_arch, class From>
1559+
void store_as(std::complex<To>* dst, batch<std::complex<From>,A> const& src, aligned_mode) {
1560+
kernel::store_complex_aligned(dst, src, A{});
1561+
}
1562+
1563+
/**
1564+
* @ingroup batch_data_transfer
1565+
*
1566+
* Copy content of batch \c src to the buffer \c dst. The
1567+
* memory does not need to be aligned.
1568+
* @param mem the memory buffer to write to
1569+
* @param val the batch to copy
1570+
*/
1571+
template <class To, class A=default_arch, class From>
1572+
void store_as(To* dst, batch<From, A> const& src, unaligned_mode) {
1573+
kernel::store_unaligned(dst, src, A{});
1574+
}
1575+
1576+
template <class A=default_arch, class From>
1577+
void store_as(bool* dst, batch_bool<From, A> const& src, unaligned_mode) {
1578+
kernel::store(src, dst, A{});
1579+
}
1580+
1581+
template <class To, class A=default_arch, class From>
1582+
void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) {
1583+
kernel::store_complex_unaligned(dst, src, A{});
1584+
}
1585+
1586+
/**
1587+
* @ingroup batch_data_transfer
1588+
*
1589+
* Copy content of batch of boolean \c src to the buffer \c dst. The
1590+
* memory needs to be aligned.
1591+
* @param mem the memory buffer to write to
1592+
* @param val the batch to copy
1593+
*/
1594+
template <class To, class A=default_arch, class From>
1595+
void store_batch(To* dst, batch_bool<From, A> const& src, aligned_mode) {
1596+
kernel::store(src, dst, A{});
1597+
}
1598+
1599+
/**
1600+
* @ingroup batch_data_transfer
1601+
*
1602+
* Copy content of batch of boolean \c src to the buffer \c dst. The
1603+
* memory does not need to be aligned.
1604+
* @param mem the memory buffer to write to
1605+
* @param val the batch to copy
1606+
*/
1607+
template <class To, class A=default_arch, class From>
1608+
void store_batch(To* dst, batch_bool<From, A> const& src, unaligned_mode) {
1609+
kernel::store(src, dst, A{});
1610+
}
14701611
/**
14711612
* @ingroup batch_arithmetic
14721613
*

include/xsimd/types/xsimd_batch.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -952,13 +952,13 @@ namespace xsimd
952952
template<class T, class A>
953953
batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const value_type* src)
954954
{
955-
return kernel::load_complex_aligned<A>(src, A{});
955+
return kernel::load_complex_aligned<A>(src, kernel::convert<value_type>{}, A{});
956956
}
957957

958958
template<class T, class A>
959959
batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const value_type* src)
960960
{
961-
return kernel::load_complex_unaligned<A>(src, A{});
961+
return kernel::load_complex_unaligned<A>(src, kernel::convert<value_type>{}, A{});
962962
}
963963

964964
template<class T, class A>

include/xsimd/types/xsimd_traits.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#include <type_traits>
1616

17-
#include "xsimd_api.hpp"
17+
#include "xsimd_batch.hpp"
1818

1919
namespace xsimd
2020
{

test/test_api.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ class xsimd_api_test : public testing::Test
123123
batch_type b = batch_type::load(v.data(), xsimd::aligned_mode());
124124
V res(size);
125125

126-
xsimd::store(res.data(), b, xsimd::unaligned_mode());
126+
xsimd::store_as(res.data(), b, xsimd::unaligned_mode());
127127
EXPECT_VECTOR_EQ(res, v) << print_function_name(name + " unaligned");
128128

129-
xsimd::store(res.data(), b, xsimd::aligned_mode());
129+
xsimd::store_as(res.data(), b, xsimd::aligned_mode());
130130
EXPECT_VECTOR_EQ(res, v) << print_function_name(name + " aligned");
131131
}
132132

test/test_load_store.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ class load_store_test : public testing::Test
123123

124124
b = batch_type::load_aligned(v.data());
125125
EXPECT_BATCH_EQ(b, expected) << print_function_name(name + " aligned");
126+
127+
b = xsimd::load_as<value_type>(v.data(), xsimd::unaligned_mode());
128+
EXPECT_BATCH_EQ(b, expected) << print_function_name(name + " unaligned (load_as)");
129+
130+
b = xsimd::load_as<value_type>(v.data(), xsimd::aligned_mode());
131+
EXPECT_BATCH_EQ(b, expected) << print_function_name(name + " aligned (load_as)");
126132
}
127133

128134
template <class V>
@@ -136,6 +142,12 @@ class load_store_test : public testing::Test
136142

137143
b.store_aligned(res.data());
138144
EXPECT_VECTOR_EQ(res, v) << print_function_name(name + " aligned");
145+
146+
xsimd::store_as(res.data(), b, xsimd::unaligned_mode());
147+
EXPECT_VECTOR_EQ(res, v) << print_function_name(name + " unaligned (store_as)");
148+
149+
xsimd::store_as(res.data(), b, xsimd::aligned_mode());
150+
EXPECT_VECTOR_EQ(res, v) << print_function_name(name + " aligned (store_as)");
139151
}
140152

141153
template <class V>

0 commit comments

Comments
 (0)