//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H #define _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H #include <__assert> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__numeric/transform_reduce.h> #include <__pstl/backend_fwd.h> #include <__pstl/cpu_algos/cpu_traits.h> #include <__type_traits/desugars_to.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_execution_policy.h> #include <__utility/move.h> #include #include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif _LIBCPP_PUSH_MACROS #include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_STD namespace __pstl { template , __enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>, int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept { _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init) for (_DifferenceType __i = 0; __i < __n; ++__i) __init += __f(__i); return __init; } template , __enable_if_t && is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>), int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept { constexpr size_t __lane_size = __cpu_traits<_Backend>::__lane_size; const _Size __block_size = __lane_size / sizeof(_Tp); if (__n > 2 * __block_size && __block_size > 1) { alignas(__lane_size) char __lane_buffer[__lane_size]; _Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer); // initializer _PSTL_PRAGMA_SIMD for (_Size __i = 0; __i < __block_size; ++__i) { ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i))); } // main loop _Size __i = 2 * __block_size; const _Size __last_iteration = __block_size * (__n / __block_size); for (; __i < __last_iteration; __i += __block_size) { _PSTL_PRAGMA_SIMD for (_Size __j = 0; __j < __block_size; ++__j) { __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j)); } } // remainder _PSTL_PRAGMA_SIMD for (_Size __j = 0; __j < __n - __last_iteration; ++__j) { __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j)); } // combiner for (_Size __j = 0; __j < __block_size; ++__j) { __init = __binary_op(std::move(__init), std::move(__lane[__j])); } // destroyer _PSTL_PRAGMA_SIMD for (_Size __j = 0; __j < __block_size; ++__j) { __lane[__j].~_Tp(); } } else { for (_Size __i = 0; __i < __n; ++__i) { __init = __binary_op(std::move(__init), __f(__i)); } } return __init; } template struct __cpu_parallel_transform_reduce_binary { template _LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()( _Policy&& __policy, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __reduce, _BinaryOperation2 __transform) const noexcept { if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { return __cpu_traits<_Backend>::__transform_reduce( __first1, std::move(__last1), [__first1, __first2, __transform](_ForwardIterator1 __iter) { return __transform(*__iter, *(__first2 + (__iter - __first1))); }, std::move(__init), std::move(__reduce), [&__policy, __first1, __first2, __reduce, __transform]( _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) { using _TransformReduceBinaryUnseq = __pstl::__transform_reduce_binary<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; return *_TransformReduceBinaryUnseq()( std::__remove_parallel_policy(__policy), __brick_first, std::move(__brick_last), __first2 + (__brick_first - __first1), std::move(__brick_init), std::move(__reduce), std::move(__transform)); }); } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value && __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) { return __pstl::__simd_transform_reduce<_Backend>( __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) { return __transform(__first1[__i], __first2[__i]); }); } else { return std::transform_reduce( std::move(__first1), std::move(__last1), std::move(__first2), std::move(__init), std::move(__reduce), std::move(__transform)); } } }; template struct __cpu_parallel_transform_reduce { template _LIBCPP_HIDE_FROM_ABI optional<_Tp> operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __reduce, _UnaryOperation __transform) const noexcept { if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { return __cpu_traits<_Backend>::__transform_reduce( std::move(__first), std::move(__last), [__transform](_ForwardIterator __iter) { return __transform(*__iter); }, std::move(__init), __reduce, [&__policy, __transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) { using _TransformReduceUnseq = __pstl::__transform_reduce<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>; auto __res = _TransformReduceUnseq()( std::__remove_parallel_policy(__policy), std::move(__brick_first), std::move(__brick_last), std::move(__brick_init), std::move(__reduce), std::move(__transform)); _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!"); return *std::move(__res); }); } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> && __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) { return __pstl::__simd_transform_reduce<_Backend>( __last - __first, std::move(__init), std::move(__reduce), [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); }); } else { return std::transform_reduce( std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform)); } } }; } // namespace __pstl _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #endif // _LIBCPP___PSTL_CPU_ALGOS_TRANSFORM_REDUCE_H