[arch-commits] Commit in paraview/repos (4 files)

Antonio Rojas arojas at gemini.archlinux.org
Fri Nov 26 08:29:48 UTC 2021


    Date: Friday, November 26, 2021 @ 08:29:47
  Author: arojas
Revision: 1054828

archrelease: copy trunk to community-staging-x86_64

Added:
  paraview/repos/community-staging-x86_64/
  paraview/repos/community-staging-x86_64/PKGBUILD
    (from rev 1054827, paraview/trunk/PKGBUILD)
  paraview/repos/community-staging-x86_64/paraview-tbb-2021.patch
    (from rev 1054827, paraview/trunk/paraview-tbb-2021.patch)
  paraview/repos/community-staging-x86_64/paraview-vtkm-tbb-2021.patch
    (from rev 1054827, paraview/trunk/paraview-vtkm-tbb-2021.patch)

------------------------------+
 PKGBUILD                     |   83 ++++++
 paraview-tbb-2021.patch      |  280 +++++++++++++++++++++
 paraview-vtkm-tbb-2021.patch |  526 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 889 insertions(+)

Copied: paraview/repos/community-staging-x86_64/PKGBUILD (from rev 1054827, paraview/trunk/PKGBUILD)
===================================================================
--- community-staging-x86_64/PKGBUILD	                        (rev 0)
+++ community-staging-x86_64/PKGBUILD	2021-11-26 08:29:47 UTC (rev 1054828)
@@ -0,0 +1,83 @@
+# Maintainer: Bruno Pagani <archange at archlinux.org>
+# Maintainer: Mathieu Westphal <mathieu.westphal at kitware.com>
+# Contributor: Stéphane Gaudreault <stephane at archlinux.org>
+# Contributor: <xantares09 at hotmail.com>
+
+_pkg=paraview
+_mpi=openmpi
+pkgname=${_pkg}
+#-${_mpi}
+pkgver=5.9.1
+pkgrel=6
+pkgdesc="Parallel Visualization application using VTK (${_mpi} version)"
+arch=(x86_64)
+url="https://www.paraview.org"
+license=(BSD custom)
+depends=(boost-libs qt5-tools qt5-x11extras qt5-svg intel-tbb openmpi ffmpeg
+         adios2 liblas ospray pdal python-numpy cgns protobuf
+         double-conversion expat freetype2 gdal gl2ps glew hdf5 libjpeg jsoncpp
+         libjsoncpp.so libharu libxml2 lz4 xz python-mpi4py netcdf libogg
+         libpng pugixml rapidjson libtheora libtiff zlib)
+optdepends=(python-matplotlib python-pandas)
+makedepends=(cmake boost mesa gcc-fortran ninja qt5-tools qt5-xmlpatterns eigen utf8cpp)
+# pegtl https://gitlab.kitware.com/vtk/vtk/-/issues/18151
+conflicts=(vtk)
+source=(${url}/files/v${pkgver:0:3}/ParaView-v${pkgver/R/-R}.tar.xz
+        vtk-gcc11.patch::https://gitlab.kitware.com/vtk/vtk/-/merge_requests/7554.patch
+        vtk-fix-shader-initialization.patch::https://gitlab.kitware.com/vtk/vtk/-/merge_requests/7978.patch
+        paraview-tbb-2021.patch
+        paraview-vtkm-tbb-2021.patch)
+sha256sums=('0d486cb6fbf55e428845c9650486f87466efcb3155e40489182a7ea85dfd4c8d'
+            'c9959adcb59e2f2657f0144b0b68239d4174947fb2ab8051f2575241281e4d68'
+            '10864f69e2d6577c56cc536438b5dd7a52b004f6bb253a17569899922d804fe8'
+            '7091f7b4052688191ca5594f8e1dc1ed00dcc882343b9d360fbacd187161a914'
+            'ee0b5b28fdb2e906ee04b5eb28d87fa1e6646601d48b523d3d8f1760a811c416')
+
+prepare() {
+  cd ParaView-v${pkgver/R/-R}
+  # We have a patched libharu
+  sed -i "s|2.4.0|2.3.0|" VTK/ThirdParty/libharu/CMakeLists.txt
+  # Missing includes with GCC11
+  patch -p1 -d VTK < ../vtk-gcc11.patch
+  # FS#71081
+  patch -p1 -d VTK < ../vtk-fix-shader-initialization.patch
+  # Fix build with HDF5 1.12.1, https://gitlab.kitware.com/vtk/vtk/-/issues/18265
+  sed -i 's/typedef int hid_t;/typedef int64_t hid_t;/' VTK/ThirdParty/xdmf3/vtkxdmf3/core/XdmfHDF5Controller.hpp
+  # Fix build with TBB 2021
+  patch -p1 -d VTK < ../paraview-tbb-2021.patch
+  cp VTK/CMake/FindTBB.cmake VTK/ThirdParty/vtkm/vtkvtkm/vtk-m/CMake/FindTBB.cmake
+  patch -p1 -d VTK/ThirdParty/vtkm/vtkvtkm/vtk-m < "$srcdir"/paraview-vtkm-tbb-2021.patch
+}
+
+build() {
+  # LICENSEDIR blocked by https://gitlab.kitware.com/vtk/vtk/-/issues/18266
+  cmake -B build -S ParaView-v${pkgver/R/-R} -G Ninja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=/usr \
+    -DCMAKE_SKIP_INSTALL_RPATH=ON \
+    -DPARAVIEW_ENABLE_ADIOS2=ON \
+    -DPARAVIEW_ENABLE_FFMPEG=ON \
+    -DPARAVIEW_ENABLE_FIDES=ON \
+    -DPARAVIEW_ENABLE_GDAL=ON \
+    -DPARAVIEW_ENABLE_LAS=ON \
+    -DPARAVIEW_ENABLE_MOTIONFX=ON \
+    -DPARAVIEW_ENABLE_PDAL=ON \
+    -DPARAVIEW_ENABLE_RAYTRACING=ON \
+    -DPARAVIEW_ENABLE_VISITBRIDGE=ON \
+    -DPARAVIEW_ENABLE_XDMF3=ON \
+    -DPARAVIEW_USE_MPI=ON \
+    -DPARAVIEW_USE_PYTHON=ON \
+    -DPARAVIEW_VERSIONED_INSTALL=OFF \
+    -DPARAVIEW_BUILD_WITH_EXTERNAL=ON \
+    -DVTK_SMP_IMPLEMENTATION_TYPE=TBB \
+    -DVTKm_ENABLE_MPI=ON \
+    -DVTK_MODULE_USE_EXTERNAL_VTK_pegtl=OFF \
+    -Wno-dev
+  ninja -C build ${MAKEFLAGS}
+}
+
+package() {
+  DESTDIR="${pkgdir}" ninja -C build install
+  # Fix licenses install
+  mv "${pkgdir}"/usr/share/licenses/{ParaView,paraview}
+}

Copied: paraview/repos/community-staging-x86_64/paraview-tbb-2021.patch (from rev 1054827, paraview/trunk/paraview-tbb-2021.patch)
===================================================================
--- community-staging-x86_64/paraview-tbb-2021.patch	                        (rev 0)
+++ community-staging-x86_64/paraview-tbb-2021.patch	2021-11-26 08:29:47 UTC (rev 1054828)
@@ -0,0 +1,280 @@
+diff --git a/CMake/FindTBB.cmake b/CMake/FindTBB.cmake
+index 27576a5cc8..bd75631317 100644
+--- a/CMake/FindTBB.cmake
++++ b/CMake/FindTBB.cmake
+@@ -68,6 +68,32 @@
+ #  FindTBB helper functions and macros
+ #
+ 
++# Use TBBConfig.cmake if possible.
++
++set(_tbb_find_quiet)
++if (TBB_FIND_QUIETLY)
++  set(_tbb_find_quiet QUIET)
++endif ()
++set(_tbb_find_components)
++set(_tbb_find_optional_components)
++foreach (_tbb_find_component IN LISTS TBB_FIND_COMPONENTS)
++  if (TBB_FIND_REQUIRED_${_tbb_find_component})
++    list(APPEND _tbb_find_components "${_tbb_find_component}")
++  else ()
++    list(APPEND _tbb_find_optional_components "${_tbb_find_component}")
++  endif ()
++endforeach ()
++unset(_tbb_find_component)
++find_package(TBB CONFIG ${_tbb_find_quiet}
++  COMPONENTS ${_tbb_find_components}
++  OPTIONAL_COMPONENTS ${_tbb_find_optional_components})
++unset(_tbb_find_quiet)
++unset(_tbb_find_components)
++unset(_tbb_find_optional_components)
++if (TBB_FOUND)
++  return ()
++endif ()
++
+ #====================================================
+ # Fix the library path in case it is a linker script
+ #====================================================
+@@ -398,12 +424,18 @@ findpkg_finish(TBB_MALLOC_PROXY tbbmalloc_proxy)
+ #=============================================================================
+ #parse all the version numbers from tbb
+ if(NOT TBB_VERSION)
+-
+- #only read the start of the file
+- file(STRINGS
++  if (EXISTS "${TBB_INCLUDE_DIR}/oneapi/tbb/version.h")
++    file(STRINGS
++      "${TBB_INCLUDE_DIR}/oneapi/tbb/version.h"
++      TBB_VERSION_CONTENTS
++      REGEX "VERSION")
++  else()
++    #only read the start of the file
++    file(STRINGS
+       "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h"
+       TBB_VERSION_CONTENTS
+       REGEX "VERSION")
++  endif()
+ 
+   string(REGEX REPLACE
+     ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1"
+diff --git a/Common/Core/SMP/TBB/vtkSMPTools.cxx b/Common/Core/SMP/TBB/vtkSMPTools.cxx
+index e4423f26d5..90d32832e1 100644
+--- a/Common/Core/SMP/TBB/vtkSMPTools.cxx
++++ b/Common/Core/SMP/TBB/vtkSMPTools.cxx
+@@ -23,24 +23,13 @@
+ #define __TBB_NO_IMPLICIT_LINKAGE 1
+ #endif
+ 
+-#include <tbb/task_scheduler_init.h>
++#include <tbb/task_arena.h>
+ 
+ #ifdef _MSC_VER
+ #pragma pop_macro("__TBB_NO_IMPLICIT_LINKAGE")
+ #endif
+ 
+-struct vtkSMPToolsInit
+-{
+-  tbb::task_scheduler_init Init;
+-
+-  vtkSMPToolsInit(int numThreads)
+-    : Init(numThreads)
+-  {
+-  }
+-};
+-
+-static bool vtkSMPToolsInitialized = 0;
+-static int vtkTBBNumSpecifiedThreads = 0;
++static tbb::task_arena taskArena;
+ static vtkSimpleCriticalSection vtkSMPToolsCS;
+ 
+ //------------------------------------------------------------------------------
+@@ -53,23 +42,37 @@ const char* vtkSMPTools::GetBackend()
+ void vtkSMPTools::Initialize(int numThreads)
+ {
+   vtkSMPToolsCS.Lock();
+-  if (!vtkSMPToolsInitialized)
++
++  // If numThreads <= 0, don't create a task_arena
++  // and let TBB do the default thing.
++  if (numThreads > 0 && numThreads != taskArena.max_concurrency())
+   {
+-    // If numThreads <= 0, don't create a task_scheduler_init
+-    // and let TBB do the default thing.
+-    if (numThreads > 0)
++    if (taskArena.is_active())
+     {
+-      static vtkSMPToolsInit aInit(numThreads);
+-      vtkTBBNumSpecifiedThreads = numThreads;
++      taskArena.terminate();
+     }
+-    vtkSMPToolsInitialized = true;
++    taskArena.initialize(numThreads);
+   }
++
+   vtkSMPToolsCS.Unlock();
+ }
+ 
+ //------------------------------------------------------------------------------
+ int vtkSMPTools::GetEstimatedNumberOfThreads()
+ {
+-  return vtkTBBNumSpecifiedThreads ? vtkTBBNumSpecifiedThreads
+-                                   : tbb::task_scheduler_init::default_num_threads();
++  return taskArena.max_concurrency();
++}
++
++//------------------------------------------------------------------------------
++void vtk::detail::smp::vtkSMPTools_Impl_For_TBB(vtkIdType first, vtkIdType last, vtkIdType grain,
++  ExecuteFunctorPtrType functorExecuter, void* functor)
++{
++  if (taskArena.is_active())
++  {
++    taskArena.execute([&] { functorExecuter(functor, first, last, grain); });
++  }
++  else
++  {
++    functorExecuter(functor, first, last, grain);
++  }
+ }
+diff --git a/Common/Core/SMP/TBB/vtkSMPToolsInternal.h.in b/Common/Core/SMP/TBB/vtkSMPToolsInternal.h.in
+index c6614e42a4..1c7253deee 100644
+--- a/Common/Core/SMP/TBB/vtkSMPToolsInternal.h.in
++++ b/Common/Core/SMP/TBB/vtkSMPToolsInternal.h.in
+@@ -12,7 +12,8 @@
+      PURPOSE.  See the above copyright notice for more information.
+ 
+ =========================================================================*/
+-#include "vtkNew.h"
++
++#include "vtkCommonCoreModule.h" // For export macro
+ 
+ #ifdef _MSC_VER
+ #  pragma push_macro("__TBB_NO_IMPLICIT_LINKAGE")
+@@ -34,6 +35,10 @@ namespace detail
+ namespace smp
+ {
+ 
++typedef void (*ExecuteFunctorPtrType)(void*, vtkIdType, vtkIdType, vtkIdType);
++void VTKCOMMONCORE_EXPORT vtkSMPTools_Impl_For_TBB(vtkIdType first, vtkIdType last, vtkIdType grain,
++  ExecuteFunctorPtrType functorExecuter, void* functor);
++
+ //--------------------------------------------------------------------------------
+ template <typename T>
+ class FuncCall
+@@ -43,22 +48,22 @@ class FuncCall
+   void operator=(const FuncCall&) = delete;
+ 
+ public:
+-  void operator() (const tbb::blocked_range<vtkIdType>& r) const
+-  {
+-      o.Execute(r.begin(), r.end());
++  void operator()(const tbb::blocked_range<vtkIdType>& r) const {
++    o.Execute(r.begin(), r.end());
+   }
+ 
+-  FuncCall (T& _o) : o(_o)
++  FuncCall(T& _o)
++    : o(_o)
+   {
+   }
+ };
+ 
+ //--------------------------------------------------------------------------------
+ template <typename FunctorInternal>
+-void vtkSMPTools_Impl_For(
+-  vtkIdType first, vtkIdType last, vtkIdType grain,
+-  FunctorInternal& fi)
++void ExecuteFunctor(void* functor, vtkIdType first, vtkIdType last, vtkIdType grain)
+ {
++  FunctorInternal& fi = *reinterpret_cast<FunctorInternal*>(functor);
++
+   vtkIdType range = last - first;
+   if (range <= 0)
+   {
+@@ -66,20 +71,26 @@ void vtkSMPTools_Impl_For(
+   }
+   if (grain > 0)
+   {
+-    tbb::parallel_for(tbb::blocked_range<vtkIdType>(first, last, grain), FuncCall<FunctorInternal>(fi));
++    tbb::parallel_for(
++      tbb::blocked_range<vtkIdType>(first, last, grain), FuncCall<FunctorInternal>(fi));
+   }
+   else
+   {
+     // When the grain is not specified, automatically calculate an appropriate grain size so
+     // most of the time will still be spent running the calculation and not task overhead.
+-    const vtkIdType numberThreadsEstimate = 40; // Estimate of how many threads we might be able to run
+-    const vtkIdType batchesPerThread = 5; // Plan for a few batches per thread so one busy core doesn't stall the whole system
++
++    // Estimate of how many threads we might be able to run
++    const vtkIdType numberThreadsEstimate = 40;
++    // Plan for a few batches per thread so one busy core doesn't stall the whole system
++    const vtkIdType batchesPerThread = 5;
+     const vtkIdType batches = numberThreadsEstimate * batchesPerThread;
+ 
+     if (range >= batches)
+     {
+-      vtkIdType calculatedGrain = ((range - 1) / batches) + 1; // std::ceil round up for systems without cmath
+-      tbb::parallel_for(tbb::blocked_range<vtkIdType>(first, last, calculatedGrain), FuncCall<FunctorInternal>(fi));
++      // std::ceil round up for systems without cmath
++      vtkIdType calculatedGrain = ((range - 1) / batches) + 1;
++      tbb::parallel_for(
++        tbb::blocked_range<vtkIdType>(first, last, calculatedGrain), FuncCall<FunctorInternal>(fi));
+     }
+     else
+     {
+@@ -91,23 +102,26 @@ void vtkSMPTools_Impl_For(
+ }
+ 
+ //--------------------------------------------------------------------------------
+-template<typename RandomAccessIterator>
+-void vtkSMPTools_Impl_Sort(RandomAccessIterator begin,
+-                                  RandomAccessIterator end)
++template <typename FunctorInternal>
++void vtkSMPTools_Impl_For(vtkIdType first, vtkIdType last, vtkIdType grain, FunctorInternal& fi)
++{
++  vtkSMPTools_Impl_For_TBB(first, last, grain, ExecuteFunctor<FunctorInternal>, &fi);
++}
++
++//--------------------------------------------------------------------------------
++template <typename RandomAccessIterator>
++void vtkSMPTools_Impl_Sort(RandomAccessIterator begin, RandomAccessIterator end)
+ {
+   tbb::parallel_sort(begin, end);
+ }
+ 
+ //--------------------------------------------------------------------------------
+-template<typename RandomAccessIterator, typename Compare>
+-void vtkSMPTools_Impl_Sort(RandomAccessIterator begin,
+-                                  RandomAccessIterator end,
+-                                  Compare comp)
++template <typename RandomAccessIterator, typename Compare>
++void vtkSMPTools_Impl_Sort(RandomAccessIterator begin, RandomAccessIterator end, Compare comp)
+ {
+   tbb::parallel_sort(begin, end, comp);
+ }
+ 
+-
+-}//namespace smp
+-}//namespace detail
+-}//namespace vtk
++} // namespace smp
++} // namespace detail
++} // namespace vtk
+diff --git a/Common/Core/vtkSMPTools.h b/Common/Core/vtkSMPTools.h
+index cee63ad6ec..4fa28abd72 100644
+--- a/Common/Core/vtkSMPTools.h
++++ b/Common/Core/vtkSMPTools.h
+@@ -228,11 +228,8 @@ public:
+    * Initialize the underlying libraries for execution. This is
+    * not required as it is automatically called before the first
+    * execution of any parallel code. However, it can be used to
+-   * control the maximum number of threads used when the back-end
+-   * supports it (currently Simple and TBB only). Make sure to call
+-   * it before any other parallel operation.
+-   * When using Kaapi, use the KAAPI_CPUCOUNT env. variable to control
+-   * the number of threads used in the thread pool.
++   * control the maximum number of threads used. Make sure to call
++   * it before the parallel operation.
+    */
+   static void Initialize(int numThreads = 0);
+ 

Copied: paraview/repos/community-staging-x86_64/paraview-vtkm-tbb-2021.patch (from rev 1054827, paraview/trunk/paraview-vtkm-tbb-2021.patch)
===================================================================
--- community-staging-x86_64/paraview-vtkm-tbb-2021.patch	                        (rev 0)
+++ community-staging-x86_64/paraview-vtkm-tbb-2021.patch	2021-11-26 08:29:47 UTC (rev 1054828)
@@ -0,0 +1,526 @@
+From 904e784e895229d675cd7b3b43a963fdc5813ac8 Mon Sep 17 00:00:00 2001
+From: Kenneth Moreland <morelandkd at ornl.gov>
+Date: Fri, 4 Jun 2021 09:30:52 -0600
+Subject: [PATCH 2/6] Remove TBB parallel_sort patch
+
+Years ago we discovered a problem with TBB's parallel sort, which we
+patch in our local repo and submitted a change to TBB, which has been
+accepted.
+
+The code to decide whether to use our parallel_sort patch does not work
+with the latest versions of TBB because it requires including a header
+that changed names to get the TBB version.
+
+We no longer support any TBB version with this bug, so just remove the
+patch from VTK-m.
+---
+ vtkm/cont/tbb/internal/CMakeLists.txt  |   1 -
+ vtkm/cont/tbb/internal/FunctorsTBB.h   |  10 +-
+ vtkm/cont/tbb/internal/parallel_sort.h | 273 -------------------------
+ 3 files changed, 1 insertion(+), 283 deletions(-)
+ delete mode 100644 vtkm/cont/tbb/internal/parallel_sort.h
+
+diff --git a/vtkm/cont/tbb/internal/CMakeLists.txt b/vtkm/cont/tbb/internal/CMakeLists.txt
+index 1283307be..ffbf1e845 100644
+--- a/vtkm/cont/tbb/internal/CMakeLists.txt
++++ b/vtkm/cont/tbb/internal/CMakeLists.txt
+@@ -25,7 +25,6 @@ endif()
+ 
+ vtkm_declare_headers(${headers}
+    ParallelSortTBB.hxx
+-   parallel_sort.h
+    )
+ 
+ #These sources need to always be built
+diff --git a/vtkm/cont/tbb/internal/FunctorsTBB.h b/vtkm/cont/tbb/internal/FunctorsTBB.h
+index dc988f7f8..c538c2240 100644
+--- a/vtkm/cont/tbb/internal/FunctorsTBB.h
++++ b/vtkm/cont/tbb/internal/FunctorsTBB.h
+@@ -38,15 +38,6 @@ VTKM_THIRDPARTY_PRE_INCLUDE
+ // correct settings so that we don't clobber any existing function
+ #include <vtkm/internal/Windows.h>
+ 
+-#include <tbb/tbb_stddef.h>
+-#if (TBB_VERSION_MAJOR == 4) && (TBB_VERSION_MINOR == 2)
+-//we provide an patched implementation of tbb parallel_sort
+-//that fixes ADL for std::swap. This patch has been submitted to Intel
+-//and is fixed in TBB 4.2 update 2.
+-#include <vtkm/cont/tbb/internal/parallel_sort.h>
+-#else
+-#include <tbb/parallel_sort.h>
+-#endif
+ 
+ #include <numeric>
+ #include <tbb/blocked_range.h>
+@@ -54,6 +45,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
+ #include <tbb/parallel_for.h>
+ #include <tbb/parallel_reduce.h>
+ #include <tbb/parallel_scan.h>
++#include <tbb/parallel_sort.h>
+ #include <tbb/partitioner.h>
+ #include <tbb/tick_count.h>
+ 
+diff --git a/vtkm/cont/tbb/internal/parallel_sort.h b/vtkm/cont/tbb/internal/parallel_sort.h
+deleted file mode 100644
+index 3451a369f..000000000
+--- a/vtkm/cont/tbb/internal/parallel_sort.h
++++ /dev/null
+@@ -1,273 +0,0 @@
+-/*
+-    Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
+-
+-    This file is part of Threading Building Blocks.
+-
+-    Threading Building Blocks is free software; you can redistribute it
+-    and/or modify it under the terms of the GNU General Public License
+-    version 2 as published by the Free Software Foundation.
+-
+-    Threading Building Blocks is distributed in the hope that it will be
+-    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+-    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-    GNU General Public License for more details.
+-
+-    You should have received a copy of the GNU General Public License
+-    along with Threading Building Blocks; if not, write to the Free Software
+-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+-
+-    As a special exception, you may use this file as part of a free software
+-    library without restriction.  Specifically, if other files instantiate
+-    templates or use macros or inline functions from this file, or you compile
+-    this file and link it with other files to produce an executable, this
+-    file does not by itself cause the resulting executable to be covered by
+-    the GNU General Public License.  This exception does not however
+-    invalidate any other reasons why the executable file might be covered by
+-    the GNU General Public License.
+-*/
+-
+-#ifndef __TBB_parallel_sort_H
+-#define __TBB_parallel_sort_H
+-
+-#include <tbb/blocked_range.h>
+-#include <tbb/parallel_for.h>
+-
+-#include <algorithm>
+-#include <functional>
+-#include <iterator>
+-
+-namespace tbb
+-{
+-
+-//! @cond INTERNAL
+-namespace internal
+-{
+-
+-//! Range used in quicksort to split elements into subranges based on a value.
+-/** The split operation selects a splitter and places all elements less than or equal
+-    to the value in the first range and the remaining elements in the second range.
+-    @ingroup algorithms */
+-template <typename RandomAccessIterator, typename Compare>
+-class quick_sort_range : private no_assign
+-{
+-
+-  inline size_t median_of_three(const RandomAccessIterator& array,
+-                                size_t l,
+-                                size_t m,
+-                                size_t r) const
+-  {
+-    return comp(array[l], array[m])
+-      ? (comp(array[m], array[r]) ? m : (comp(array[l], array[r]) ? r : l))
+-      : (comp(array[r], array[m]) ? m : (comp(array[r], array[l]) ? r : l));
+-  }
+-
+-  inline size_t pseudo_median_of_nine(const RandomAccessIterator& array,
+-                                      const quick_sort_range& range) const
+-  {
+-    size_t offset = range.size / 8u;
+-    return median_of_three(array,
+-                           median_of_three(array, 0, offset, offset * 2),
+-                           median_of_three(array, offset * 3, offset * 4, offset * 5),
+-                           median_of_three(array, offset * 6, offset * 7, range.size - 1));
+-  }
+-
+-public:
+-  static const size_t grainsize = 500;
+-  const Compare& comp;
+-  RandomAccessIterator begin;
+-  size_t size;
+-
+-  quick_sort_range(RandomAccessIterator begin_, size_t size_, const Compare& comp_)
+-    : comp(comp_)
+-    , begin(begin_)
+-    , size(size_)
+-  {
+-  }
+-
+-  bool empty() const { return size == 0; }
+-  bool is_divisible() const { return size >= grainsize; }
+-
+-  quick_sort_range(quick_sort_range& range, split)
+-    : comp(range.comp)
+-  {
+-    using std::swap;
+-    RandomAccessIterator array = range.begin;
+-    RandomAccessIterator key0 = range.begin;
+-    size_t m = pseudo_median_of_nine(array, range);
+-    if (m)
+-      swap(array[0], array[m]);
+-
+-    size_t i = 0;
+-    size_t j = range.size;
+-    // Partition interval [i+1,j-1] with key *key0.
+-    for (;;)
+-    {
+-      __TBB_ASSERT(i < j, nullptr);
+-      // Loop must terminate since array[l]==*key0.
+-      do
+-      {
+-        --j;
+-        __TBB_ASSERT(i <= j, "bad ordering relation?");
+-      } while (comp(*key0, array[j]));
+-      do
+-      {
+-        __TBB_ASSERT(i <= j, nullptr);
+-        if (i == j)
+-          goto partition;
+-        ++i;
+-      } while (comp(array[i], *key0));
+-      if (i == j)
+-        goto partition;
+-      swap(array[i], array[j]);
+-    }
+-  partition:
+-    // Put the partition key were it belongs
+-    swap(array[j], *key0);
+-    // array[l..j) is less or equal to key.
+-    // array(j..r) is greater or equal to key.
+-    // array[j] is equal to key
+-    i = j + 1;
+-    begin = array + i;
+-    size = range.size - i;
+-    range.size = j;
+-  }
+-};
+-
+-#if __TBB_TASK_GROUP_CONTEXT
+-//! Body class used to test if elements in a range are presorted
+-/** @ingroup algorithms */
+-template <typename RandomAccessIterator, typename Compare>
+-class quick_sort_pretest_body : internal::no_assign
+-{
+-  const Compare& comp;
+-
+-public:
+-  quick_sort_pretest_body(const Compare& _comp)
+-    : comp(_comp)
+-  {
+-  }
+-
+-  void operator()(const blocked_range<RandomAccessIterator>& range) const
+-  {
+-    task& my_task = task::self();
+-    RandomAccessIterator my_end = range.end();
+-
+-    int i = 0;
+-    for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i)
+-    {
+-      if (i % 64 == 0 && my_task.is_cancelled())
+-        break;
+-
+-      // The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
+-      if (comp(*(k), *(k - 1)))
+-      {
+-        my_task.cancel_group_execution();
+-        break;
+-      }
+-    }
+-  }
+-};
+-#endif /* __TBB_TASK_GROUP_CONTEXT */
+-
+-//! Body class used to sort elements in a range that is smaller than the grainsize.
+-/** @ingroup algorithms */
+-template <typename RandomAccessIterator, typename Compare>
+-struct quick_sort_body
+-{
+-  void operator()(const quick_sort_range<RandomAccessIterator, Compare>& range) const
+-  {
+-    //SerialQuickSort( range.begin, range.size, range.comp );
+-    std::sort(range.begin, range.begin + range.size, range.comp);
+-  }
+-};
+-
+-//! Wrapper method to initiate the sort by calling parallel_for.
+-/** @ingroup algorithms */
+-template <typename RandomAccessIterator, typename Compare>
+-void parallel_quick_sort(RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp)
+-{
+-#if __TBB_TASK_GROUP_CONTEXT
+-  task_group_context my_context;
+-  const int serial_cutoff = 9;
+-
+-  __TBB_ASSERT(begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?");
+-  RandomAccessIterator k;
+-  for (k = begin; k != begin + serial_cutoff; ++k)
+-  {
+-    if (comp(*(k + 1), *k))
+-    {
+-      goto do_parallel_quick_sort;
+-    }
+-  }
+-
+-  parallel_for(blocked_range<RandomAccessIterator>(k + 1, end),
+-               quick_sort_pretest_body<RandomAccessIterator, Compare>(comp),
+-               auto_partitioner(),
+-               my_context);
+-
+-  if (my_context.is_group_execution_cancelled())
+-  do_parallel_quick_sort:
+-#endif /* __TBB_TASK_GROUP_CONTEXT */
+-    parallel_for(quick_sort_range<RandomAccessIterator, Compare>(begin, end - begin, comp),
+-                 quick_sort_body<RandomAccessIterator, Compare>(),
+-                 auto_partitioner());
+-}
+-
+-} // namespace internal
+-//! @endcond
+-
+-//! @cond INTERNAL
+-/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
+-    Requirements on value type \c T of \c RandomAccessIterator for \c parallel_sort:
+-    - \code void swap( T& x, T& y ) \endcode        Swaps \c x and \c y
+-    - \code bool Compare::operator()( const T& x, const T& y ) \endcode
+-                                                    True if x comes before y;
+-**/
+-
+-/** \name parallel_sort
+-    See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
+-//@{
+-
+-//! Sorts the data in [begin,end) using the given comparator
+-/** The compare function object is used for all comparisons between elements during sorting.
+-    The compare object must define a bool operator() function.
+-    @ingroup algorithms **/
+-//! @endcond
+-template <typename RandomAccessIterator, typename Compare>
+-void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp)
+-{
+-  const int min_parallel_size = 500;
+-  if (end > begin)
+-  {
+-    if (end - begin < min_parallel_size)
+-    {
+-      std::sort(begin, end, comp);
+-    }
+-    else
+-    {
+-      internal::parallel_quick_sort(begin, end, comp);
+-    }
+-  }
+-}
+-
+-//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
+-/** @ingroup algorithms **/
+-template <typename RandomAccessIterator>
+-inline void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end)
+-{
+-  parallel_sort(
+-    begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>());
+-}
+-
+-//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
+-/** @ingroup algorithms **/
+-template <typename T>
+-inline void parallel_sort(T* begin, T* end)
+-{
+-  parallel_sort(begin, end, std::less<T>());
+-}
+-//@}
+-
+-} // namespace tbb
+-
+-#endif
+-- 
+GitLab
+
+
+From 1eea0bee122fa3ef47cc488d97f82e008d69c8bd Mon Sep 17 00:00:00 2001
+From: Kenneth Moreland <morelandkd at ornl.gov>
+Date: Fri, 4 Jun 2021 09:34:30 -0600
+Subject: [PATCH 3/6] Use TBB task_group for radix sort
+
+TBB 2020 introduced a new class called `task_group`. TBB 2021 removed
+the old class `task` as its functionality was replaced by `task_group`.
+Our parallel radix sort for TBB was implemented using `task`s, so change
+it to use `task_group` (which actually cleans up the code a bit).
+---
+ vtkm/cont/internal/ParallelRadixSort.h     |  2 +-
+ vtkm/cont/tbb/internal/ParallelSortTBB.cxx | 35 ++++++++++++++++++++--
+ 2 files changed, 33 insertions(+), 4 deletions(-)
+
+diff --git a/vtkm/cont/internal/ParallelRadixSort.h b/vtkm/cont/internal/ParallelRadixSort.h
+index 7604aacd7..cca8f6b72 100644
+--- a/vtkm/cont/internal/ParallelRadixSort.h
++++ b/vtkm/cont/internal/ParallelRadixSort.h
+@@ -485,7 +485,7 @@ struct RunTask
+   }
+ 
+   template <typename ThreaderData = void*>
+-  void operator()(ThreaderData tData = nullptr)
++  void operator()(ThreaderData tData = nullptr) const
+   {
+     size_t num_nodes_at_current_height = (size_t)pow(2, (double)binary_tree_height_);
+     if (num_threads_ <= num_nodes_at_current_height)
+diff --git a/vtkm/cont/tbb/internal/ParallelSortTBB.cxx b/vtkm/cont/tbb/internal/ParallelSortTBB.cxx
+index 9243017e6..d99406954 100644
+--- a/vtkm/cont/tbb/internal/ParallelSortTBB.cxx
++++ b/vtkm/cont/tbb/internal/ParallelSortTBB.cxx
+@@ -53,7 +53,7 @@
+ // correct settings so that we don't clobber any existing function
+ #include <vtkm/internal/Windows.h>
+ 
+-#include <tbb/task.h>
++#include <tbb/tbb.h>
+ #include <thread>
+ 
+ #if defined(VTKM_MSVC)
+@@ -71,6 +71,7 @@ namespace sort
+ 
+ const size_t MAX_CORES = std::thread::hardware_concurrency();
+ 
++#if TBB_VERSION_MAJOR < 2020
+ // Simple TBB task wrapper around a generic functor.
+ template <typename FunctorType>
+ struct TaskWrapper : public ::tbb::task
+@@ -94,7 +95,7 @@ struct RadixThreaderTBB
+   size_t GetAvailableCores() const { return MAX_CORES; }
+ 
+   template <typename TaskType>
+-  void RunParentTask(TaskType task)
++  void RunParentTask(TaskType task) const
+   {
+     using Task = TaskWrapper<TaskType>;
+     Task& root = *new (::tbb::task::allocate_root()) Task(task);
+@@ -102,7 +103,7 @@ struct RadixThreaderTBB
+   }
+ 
+   template <typename TaskType>
+-  void RunChildTasks(TaskWrapper<TaskType>* wrapper, TaskType left, TaskType right)
++  void RunChildTasks(TaskWrapper<TaskType>* wrapper, TaskType left, TaskType right) const
+   {
+     using Task = TaskWrapper<TaskType>;
+     ::tbb::empty_task& p = *new (wrapper->allocate_continuation())::tbb::empty_task();
+@@ -115,6 +116,34 @@ struct RadixThreaderTBB
+   }
+ };
+ 
++#else // TBB_VERSION_MAJOR >= 2020
++
++// In TBB version 2020, the task class was deprecated. Instead, we use the simpler task_group.
++
++struct RadixThreaderTBB
++{
++  std::shared_ptr<::tbb::task_group> TaskGroup =
++    std::shared_ptr<::tbb::task_group>(new ::tbb::task_group);
++
++  size_t GetAvailableCores() const { return MAX_CORES; }
++
++  template <typename TaskType>
++  void RunParentTask(TaskType task) const
++  {
++    this->TaskGroup->run_and_wait(task);
++    // All tasks should be complete at this point.
++  }
++
++  template <typename TaskType>
++  void RunChildTasks(void*, TaskType left, TaskType right) const
++  {
++    this->TaskGroup->run(left);
++    this->TaskGroup->run(right);
++  }
++};
++
++#endif
++
+ VTKM_INSTANTIATE_RADIX_SORT_FOR_THREADER(RadixThreaderTBB)
+ }
+ }
+-- 
+GitLab
+
+
+From 5eb688da2a39a9db2245a96ebef4811cc6de778e Mon Sep 17 00:00:00 2001
+From: Kenneth Moreland <morelandkd at ornl.gov>
+Date: Fri, 4 Jun 2021 10:05:28 -0600
+Subject: [PATCH 4/6] Update parallel radix sort for OpenMP
+
+Some changes required for TBB bled into the implementation of OpenMP.
+---
+ vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx b/vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
+index 891d92aba..1bad0bc5a 100644
+--- a/vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
++++ b/vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
+@@ -44,7 +44,7 @@ struct RadixThreaderOpenMP
+   }
+ 
+   template <typename TaskType>
+-  void RunParentTask(TaskType task)
++  void RunParentTask(TaskType task) const
+   {
+     assert(!omp_in_parallel());
+ #pragma omp parallel default(none) shared(task)
+@@ -57,7 +57,7 @@ struct RadixThreaderOpenMP
+   }
+ 
+   template <typename TaskType, typename ThreadData>
+-  void RunChildTasks(ThreadData, TaskType left, TaskType right)
++  void RunChildTasks(ThreadData, TaskType left, TaskType right) const
+   {
+     assert(omp_in_parallel());
+ #pragma omp task default(none) firstprivate(right)
+diff --git a/CMake/VTKmDeviceAdapters.cmake b/CMake/VTKmDeviceAdapters.cmake
+index eb98d5c03..5523eac0b 100644
+--- a/CMake/VTKmDeviceAdapters.cmake
++++ b/CMake/VTKmDeviceAdapters.cmake
+@@ -43,38 +43,7 @@ endfunction()
+ 
+ if(VTKm_ENABLE_TBB AND NOT TARGET vtkm::tbb)
+   find_package(TBB REQUIRED)
+-  add_library(vtkm::tbb UNKNOWN IMPORTED GLOBAL)
+-
+-  set_target_properties(vtkm::tbb PROPERTIES
+-      INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIRS}")
+-
+-  if(EXISTS "${TBB_LIBRARY_RELEASE}")
+-    vtkm_extract_real_library("${TBB_LIBRARY_RELEASE}" real_path)
+-    set_property(TARGET vtkm::tbb APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+-    set_target_properties(vtkm::tbb PROPERTIES
+-      IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+-      IMPORTED_LOCATION_RELEASE "${real_path}"
+-      )
+-  elseif(EXISTS "${TBB_LIBRARY}")
+-    #When VTK-m is mixed with OSPray we could use the OSPray FindTBB file
+-    #which doesn't define TBB_LIBRARY_RELEASE but instead defined only
+-    #TBB_LIBRARY
+-    vtkm_extract_real_library("${TBB_LIBRARY}" real_path)
+-    set_property(TARGET vtkm::tbb APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
+-    set_target_properties(vtkm::tbb PROPERTIES
+-      IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+-      IMPORTED_LOCATION_RELEASE "${real_path}"
+-      )
+-  endif()
+-
+-  if(EXISTS "${TBB_LIBRARY_DEBUG}")
+-    vtkm_extract_real_library("${TBB_LIBRARY_DEBUG}" real_path)
+-    set_property(TARGET vtkm::tbb APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
+-    set_target_properties(vtkm::tbb PROPERTIES
+-      IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+-      IMPORTED_LOCATION_DEBUG "${real_path}"
+-      )
+-  endif()
++  add_library(vtkm::tbb ALIAS TBB::tbb)
+ endif()
+ 
+ 



More information about the arch-commits mailing list