[arch-commits] Commit in root/repos (11 files)
Konstantin Gizdov
kgizdov at archlinux.org
Thu Jul 23 20:24:25 UTC 2020
Date: Thursday, July 23, 2020 @ 20:24:24
Author: kgizdov
Revision: 665091
archrelease: copy trunk to community-testing-x86_64
Added:
root/repos/community-testing-x86_64/
root/repos/community-testing-x86_64/PKGBUILD
(from rev 665090, root/trunk/PKGBUILD)
root/repos/community-testing-x86_64/ROOFIT_LICENSE
(from rev 665090, root/trunk/ROOFIT_LICENSE)
root/repos/community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch
(from rev 665090, root/trunk/adapt_tmva_to_support_cudnn8.patch)
root/repos/community-testing-x86_64/jupyter_notebook_config.py
(from rev 665090, root/trunk/jupyter_notebook_config.py)
root/repos/community-testing-x86_64/nbman-for-arch.patch
(from rev 665090, root/trunk/nbman-for-arch.patch)
root/repos/community-testing-x86_64/root.pc.tpl
(from rev 665090, root/trunk/root.pc.tpl)
root/repos/community-testing-x86_64/root.xml
(from rev 665090, root/trunk/root.xml)
root/repos/community-testing-x86_64/settings-cuda.cmake
(from rev 665090, root/trunk/settings-cuda.cmake)
root/repos/community-testing-x86_64/settings.cmake
(from rev 665090, root/trunk/settings.cmake)
root/repos/community-testing-x86_64/thisroot.fail
(from rev 665090, root/trunk/thisroot.fail)
------------------------------------+
PKGBUILD | 281 ++++++++
ROOFIT_LICENSE | 22
adapt_tmva_to_support_cudnn8.patch | 1130 +++++++++++++++++++++++++++++++++++
jupyter_notebook_config.py | 1
nbman-for-arch.patch | 177 +++++
root.pc.tpl | 12
root.xml | 14
settings-cuda.cmake | 110 +++
settings.cmake | 110 +++
thisroot.fail | 12
10 files changed, 1869 insertions(+)
Copied: root/repos/community-testing-x86_64/PKGBUILD (from rev 665090, root/trunk/PKGBUILD)
===================================================================
--- community-testing-x86_64/PKGBUILD (rev 0)
+++ community-testing-x86_64/PKGBUILD 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,281 @@
+# Maintainer: Konstantin Gizdov < arch at kge dot pw >
+# Contributor: Frank Siegert < frank.siegert at googlemail dot com >
+# Contributor: Scott Lawrence < bytbox at gmail dot com >
+# Contributor: Thomas Dziedzic < gostrc at gmail dot com >
+# Contributor: Sebastian Voecking < voeck at web dot de >
+
+pkgbase=root
+pkgname=('root' 'root-cuda')
+pkgver=6.22.00
+pkgrel=1
+pkgdesc='C++ data analysis framework and interpreter from CERN'
+arch=('x86_64')
+url='https://root.cern'
+license=('LGPL2.1' 'GPL' 'custom:University of California and Stanford University License')
+makedepends=(
+ 'ccache'
+ 'cern-vdt'
+ 'chromium'
+ 'cfitsio'
+ 'cmake'
+ 'cuda'
+ 'cudnn'
+ 'gcc-fortran'
+ 'gcc9-fortran'
+ 'git'
+ 'go'
+ 'libxml2'
+ 'libmariadbclient'
+ 'ocaml'
+ 'ocaml-ctypes'
+ 'openmp'
+ 'openmpi'
+ 'openssl'
+ 'postgresql-libs'
+ 'pythia8>=8.2.40-1'
+ 'qt5-webengine'
+ 'sqlite'
+ 'unuran'
+ 'vc'
+ 'xrootd>=4.6.0-2'
+ 'z3'
+)
+depends=(
+ 'blas'
+ 'desktop-file-utils'
+ 'fcgi'
+ 'fftw'
+ 'ftgl'
+ 'giflib'
+ 'gl2ps'
+ 'glew'
+ 'graphviz'
+ 'gsl'
+ 'hicolor-icon-theme'
+ 'intel-tbb'
+ 'libafterimage'
+ 'librsvg'
+ 'libxpm'
+ 'python'
+ 'python-numpy'
+ 'tex-gyre-fonts'
+ 'unixodbc'
+ 'xxhash>=0.6.5-1'
+ 'zstd'
+)
+optdepends=(
+ 'cern-vdt: Add a set of fast and vectorisable mathematical functions'
+ 'chromium: Support for WebGUI'
+ 'cfitsio: Read images and data from FITS files'
+ 'libmariadbclient: MySQL support'
+ 'libxml2: XML parser interface'
+ 'openmp: Support OpenMP extensions in Minuit2'
+ 'openmpi: Support OpenMPI extensions in Minuit2'
+ 'openssl: OpenSSL support'
+ 'postgresql-libs: PostgreSQL support'
+ 'pythia8>=8.2.40-1: Pythia8 EG support'
+ 'qt5-webengine: Support for WebGUI'
+ 'sqlite: SQLite support'
+ 'tcsh: Legacy CSH support'
+ 'unuran: Support non-uniform random numbers'
+ 'vc: Add types for portable and intuitive SIMD programming'
+ 'xrootd: Support remote file server and client'
+ 'z3: Suuport the Z3 theorem prover'
+)
+source=(
+ "https://root.cern.ch/download/root_v${pkgver}.source.tar.gz"
+ 'ROOFIT_LICENSE'
+ 'root.xml'
+ 'root.pc.tpl'
+ 'settings.cmake'
+ 'settings-cuda.cmake'
+ 'jupyter_notebook_config.py'
+ 'nbman-for-arch.patch'
+ 'thisroot.fail'
+ 'adapt_tmva_to_support_cudnn8.patch'
+)
+sha512sums=('9e3c54bbc146b0abb0a2d960af380255ec59d0b3a11a4a97a2a25cb7ac567b07280c4eb48dddf99c1fa2e692881f6396a842ce125d3a253037e52f719739f01e'
+ 'af8f178fc9df66997d5495b271e38adcd1636aab4c8fc994c6600c2496127829d831250d73d3fc229b02dfe49b9867d0be979beacb959f2f3a05351b8118a4a6'
+ '1fe6f4aa09d583d33f27cc766f4935510bb7ab6bbb8d4700baa1aaab92ea6c876500b67da1e4f6e0b510aa5616e4e193b860264b86925de85f2d9f558d75d5dc'
+ '3c81d255a17b902ffac0187af1752847036137e16641a88b17eef0d9c944e6f0d3c954bc93307d6270603f43f6c23f2e04f98dc7a68f9d076dbaa8006a2527d6'
+ '9ee5b6606dbd352608a2a4998344ca4026d677c86823e62fff615f6e84efcecdffc07a1e9182a356aa35035e7f35df5a107127722a6bad4b97d1f49cffebf5b9'
+ '7665bc8cbe79162e0b969b08802e1b7b2ed22ed8b1402d50cf194172a644f647dcaf0f5abb76f8b6007dfab8dbc811604479be826b345d8fd77edfb51032110b'
+ '1c905ee7a3f8f5f3f567d957f9be6b503a8631565d4d9b9bfea5e496ef86865c5a8be1a1f8c7842754029879cf0afd2465249f532a116cc43660aa2e460ae682'
+ '12814f50b7016bd86d3f91e0e31c052783a0c0fa72b7d6a072d3ae6f86c2437323d585e531235377ebbfdd9cb76abd7da84d9631de821151547f1d4b13417e69'
+ 'ff555ac4db568affe139701907f86d919a2206f3e304f69dd317b756ea0904b5934d9364a524060778aa507809ce78448621619bb34039ba34c5a71af71a4a8c'
+ '2ae126795df4127c27a6287a1499bdb8b2bacb74cfbec17dabe378a5fb9fc7c755644e4090a4da1d0045bf5d4f542f06da827a0f48a5927ee8509874045f18b6')
+
+get_pyver () {
+ python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))'
+}
+
+prepare() {
+ local src
+ for src in "${source[@]}"; do
+ src="${src%%::*}"
+ src="${src##*/}"
+ [[ $src = *.patch ]] || continue
+ echo " -> Applying patch $src..."
+ patch -Np1 -i "../$src" -d "${srcdir}/${pkgbase}-${pkgver}"
+ done
+
+ # specify some custom flags
+ # needed by vc to link properly
+ CUSTOM_CMAKE_FLAGS="-DTARGET_ARCHITECTURE:STRING=generic"
+ # make sure it finds python
+ CUSTOM_CMAKE_FLAGS+=" -DPYTHON_EXECUTABLE:PATH=/usr/bin/python"
+ # need to set install prefix like so
+ CUSTOM_CMAKE_FLAGS+=" -DINSTALL_PREFIX=/usr"
+ export CUSTOM_CMAKE_FLAGS
+
+ # update system flags
+ # don't let ROOT play around with lib paths
+ # the following is no longer necessary
+ # sed -i -e 's at SetLibraryPath();@@g' \
+ # "${srcdir}/${pkgbase}-${pkgver}/rootx/src/rootx.cxx"
+ # now only depends on IS_RPATH_BUILD being set
+ # so pass it to GCC
+ export CPPFLAGS="${CPPFLAGS} -DIS_RPATH_BUILD=1"
+ # make sure pthread gets detected
+ CUSTOM_COMPILER_FLAGS="${CPPFLAGS} -pthread"
+ export CFLAGS="${CFLAGS} ${CUSTOM_COMPILER_FLAGS}"
+ export CXXFLAGS="${CXXFLAGS} ${CUSTOM_COMPILER_FLAGS}"
+ # do not link undefined
+ CUSTOM_COMPILER_FLAGS+=" -Wl,--no-undefined"
+ export LDFLAGS="${LDFLAGS} ${CUSTOM_COMPILER_FLAGS}"
+
+ # go flags for built-in clang
+ export CGO_LDFLAGS="${LDFLAGS}"
+ export GOFLAGS="-buildmode=pie -trimpath -modcacherw"
+
+ cp -r "${pkgbase}-${pkgver}" "${pkgbase}-${pkgver}-cuda"
+}
+
+build() {
+ ## ROOT
+ mkdir -p "${srcdir}/build"
+ cd "${srcdir}/build"
+
+ cmake -C "${srcdir}/settings.cmake" \
+ ${CUSTOM_CMAKE_FLAGS} \
+ "${srcdir}/${pkgbase}-${pkgver}"
+ make
+
+ ## ROOT with CUDA
+ mkdir -p "${srcdir}/build-cuda"
+ cd "${srcdir}/build-cuda"
+
+ CC=/usr/bin/gcc-9 \
+ CXX=/usr/bin/g++-9 \
+ cmake -C "${srcdir}/settings-cuda.cmake" \
+ ${CUSTOM_CMAKE_FLAGS} \
+ "${srcdir}/${pkgbase}-${pkgver}-cuda"
+ make
+}
+
+_package() {
+ local bld_dir="${srcdir}/${1}"
+ cd "${bld_dir}"
+
+ make DESTDIR="${pkgdir}" install
+
+ # fix missing hardlinks for genreflex and rootcint
+ cd "${pkgdir}"/usr/bin
+ ln -f rootcling rootcint
+ ln -f rootcling genreflex
+ cd "${bld_dir}" # go back
+
+ # fix python env call
+ sed -e 's/@python@/python/' -i "${pkgdir}/usr/lib/root/cmdLineUtils.py"
+
+ # try to deal with weird PyROOT, PyMVA and JupyROOT stuff
+ rm -rf "${pkgdir}/usr/lib/root/__pycache__"
+ local _pyver=$(get_pyver)
+ local _pydir="${pkgdir}/usr/lib/python${_pyver}/site-packages"
+ install -d "${_pydir}"
+ find "${pkgdir}/usr/lib/root" -maxdepth 1 -mindepth 1 \( -iname "*py*" -or -name "*Js*" \) \
+ ! \( -name "*EGPythia8*" -or -iname "*.rootmap" -or -iname "*.pcm" \) -print0 | while read -rd $'\0' _lib; do
+ _base=$(basename "${_lib}")
+ ln -sf "/usr/lib/root/${_base}" "${pkgdir}/usr/lib/python${_pyver}/site-packages/${_base}"
+ done
+
+ # recompile pycache to strip $pkgdir from embedded paths
+ python -m compileall -d "/usr/lib/python${_pyver}" \
+ "${pkgdir}/usr/lib/python${_pyver}"
+ python -O -m compileall -d "/usr/lib/python${_pyver}" \
+ "${pkgdir}/usr/lib/python${_pyver}"
+
+ # icon, shortcut and mime
+ install -Dm644 "${srcdir}/${pkgbase}-${pkgver}/icons/Root6Icon.png" \
+ "${pkgdir}/usr/share/icons/hicolor/48x48/apps/root.png"
+ install -Dm644 "${srcdir}/${pkgbase}-${pkgver}/etc/root.desktop" \
+ "${pkgdir}/usr/share/applications/root.desktop"
+ echo 'Icon=root.png' >> "${pkgdir}/usr/share/applications/root.desktop"
+ install -Dm644 "${srcdir}/root.xml" \
+ "${pkgdir}/usr/share/mime/packages/root.xml"
+
+ # use a file that pacman can track instead of adding directly to ld.so.conf
+ install -d "${pkgdir}/etc/ld.so.conf.d"
+ echo '/usr/lib/root' > "${pkgdir}/etc/ld.so.conf.d/root.conf"
+
+ # create pkg-config file
+ local _prefix _exec_prefix _bindir _libdir _incdir _pkg_ver _libs _cflags _requires
+ _prefix="$("${pkgdir}"/usr/bin/root-config --prefix)"
+ _exec_prefix="$("${pkgdir}"/usr/bin/root-config --exec-prefix)"
+ _bindir="$("${pkgdir}"/usr/bin/root-config --bindir)"
+ _libdir="$("${pkgdir}"/usr/bin/root-config --libdir)"
+ _incdir="$("${pkgdir}"/usr/bin/root-config --incdir)"
+ _pkg_ver="$(sed -n 's,.*ROOT_RELEASE *\"\(.*\)\".*,\1,p' < "${pkgdir}"/usr/include/RVersion.h)"
+ _libs="$("${pkgdir}"/usr/bin/root-config --libs)"
+ _cflags="$("${pkgdir}"/usr/bin/root-config --cflags)"
+ printf -v _requires '%s,' "${depends[@]}"
+ cp "${srcdir}/root.pc.tpl" "${bld_dir}"/
+ sed -e "s at _PREFIX@${_prefix}@" -e "s at _EXECPREFIX@${_exec_prefix}@" \
+ -e "s at _LIBDIR@${_libdir}@" -e "s at _INCDIR@${_incdir}@" \
+ -e "s at _PKGVERSION@${_pkg_ver}@" -e "s at _LIBRARIES@${_libs}@" \
+ -e "s at _CFLAGS@${_cflags}@" -e "s at _UPSTREAM_URL@${url}@" \
+ -e "s at _REQUIRES@${_requires}@" \
+ -i "${bld_dir}/root.pc.tpl"
+ install -Dm644 "${bld_dir}/root.pc.tpl" "${pkgdir}/usr/lib/pkgconfig/root.pc"
+
+ # install all licenses & docs
+ install -d "${pkgdir}/usr/share/licenses/roofit"
+ install "${srcdir}/ROOFIT_LICENSE" "${pkgdir}/usr/share/licenses/roofit/LICENSE"
+ install -d "${pkgdir}/usr/share/licenses/${pkgname}"
+ ln -s '/usr/share/doc/root/LICENSE' "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE"
+ for fold in fonts js; do
+ install -d "${pkgdir}/usr/share/licenses/${pkgname}/${fold}"
+ ln -s "/usr/share/root/${fold}/LICENSE" "${pkgdir}/usr/share/licenses/${pkgname}/${fold}"/
+ done
+ ln -s '/usr/share/licenses/roofit' "${pkgdir}/usr/share/licenses/${pkgname}/roofit"
+ if [ "${pkgname}" != "root" ]; then
+ ln -s "/usr/share/licenses/${pkgname}" "${pkgdir}/usr/share/licenses/root"
+ ln -s "/usr/share/doc/root" "${pkgdir}/usr/share/doc/${pkgname}"
+ fi
+
+ # install jupyter kernels and `root --notebook` config
+ install -d "${pkgdir}/usr/share/jupyter/kernels"
+ ln -s '/etc/root/notebook/kernels/root' "${pkgdir}/usr/share/jupyter/kernels/root"
+ install "${srcdir}/jupyter_notebook_config.py" "${pkgdir}/etc/root/notebook"/
+
+ # drop thisroot.* shell files
+ rm -rf "${pkgdir}"/usr/bin/thisroot.*
+ install -Dm755 "${srcdir}/thisroot.fail" "${pkgdir}/usr/bin/thisroot.sh"
+ for suffix in csh fish; do
+ ln -s '/usr/bin/thisroot.sh' "${pkgdir}/usr/bin/thisroot.${suffix}"
+ done
+}
+
+package_root() {
+ optdepends+=('gcc-fortran: Enable the Fortran components of ROOT')
+ _package build
+}
+
+package_root-cuda() {
+ pkgdesc='C++ data analysis framework and interpreter from CERN with GPU (CUDA) features enabled'
+ provides=('root')
+ conflicts=('root')
+ depends+=('cuda' 'cudnn')
+ optdepends+=('gcc8-fortran: Enable the Fortran components of ROOT')
+ _package build-cuda
+}
Copied: root/repos/community-testing-x86_64/ROOFIT_LICENSE (from rev 665090, root/trunk/ROOFIT_LICENSE)
===================================================================
--- community-testing-x86_64/ROOFIT_LICENSE (rev 0)
+++ community-testing-x86_64/ROOFIT_LICENSE 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,22 @@
+RooFit --- Copyright (c) 2000-2005, Regents of the University of California and Stanford University
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ - Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Copied: root/repos/community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch (from rev 665090, root/trunk/adapt_tmva_to_support_cudnn8.patch)
===================================================================
--- community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch (rev 0)
+++ community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,1130 @@
+From 05739e6b01fb34b5ef40e1a584107876e68e4b77 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Tue, 21 Jul 2020 15:13:57 +0300
+Subject: [PATCH 01/10] update deprecated function call name to backward
+ compatible one
+
+---
+ tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu
+index 058cee28424..60289ec2fdd 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu
+@@ -132,7 +132,11 @@ void TCudnn<AFloat>::InitializeRecurrentDescriptors(TDescriptors *&descriptors,
+ cudnnDataType_t mathPrec = CUDNN_DATA_FLOAT;
+ if (std::is_same<AFloat, double>::value) { mathPrec = CUDNN_DATA_DOUBLE;}
+
++#if (CUDNN_VERSION >= 8000)
++ CUDNNCHECK(cudnnSetRNNDescriptor_v6(handle, rnnDescriptors->LayerDescriptor, hiddenSize, numLayers, rnnDescriptors->HelperDescriptor,
++#else
+ CUDNNCHECK(cudnnSetRNNDescriptor(handle, rnnDescriptors->LayerDescriptor, hiddenSize, numLayers, rnnDescriptors->HelperDescriptor,
++#endif
+ inputMode, direction, mode, algo, mathPrec) );
+
+
+
+From 90baa4f6ad10076fa148f5aa06ef432bd0f34208 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Tue, 21 Jul 2020 19:06:09 +0300
+Subject: [PATCH 02/10] adapt convolution forward to cuDNN 8
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 77 ++++++++++++++++++-
+ 1 file changed, 76 insertions(+), 1 deletion(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 7a57b6bf104..cc953ee45f9 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -27,6 +27,9 @@
+ // #include "Kernels.cuh"*/
+ // #include <math.h>
+
++// for std::numeric_limits<T>::max()
++#include <limits>
++
+ namespace TMVA {
+ namespace DNN {
+
+@@ -378,7 +381,78 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ cudnnHandle_t cudnnHandle = outputTensor.GetCudnnHandle();
+
+ // cuDNN decides which algorithm to use
+- // More detailed alternative: cudnnFindConvolutionForwardAlgorithm
++#if (CUDNN_VERSION >= 8000)
++ /**
++ * I'm sure there may be a faster way, but this works
++ */
++ int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available
++ cudnnConvolutionDescriptor_t tempConvDescriptor;
++ CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor));
++ cudnnTensorDescriptor_t outputTensorDescriptor;
++ CUDNNCHECK(cudnnCreateTensorDescriptor(&outputTensorDescriptor));
++ CUDNNCHECK(cudnnSetTensor4dDescriptor(outputTensorDescriptor,
++ CUDNN_TENSOR_NCHW, // Layout of the tensor in memory
++ Tensor_t::GetDataType(),
++ (int)L->GetBatchSize(),
++ (int)L->GetDepth(),
++ (int)L->GetHeight(),
++ (int)L->GetWidth()));
++ int algoCount;
++ cudnnConvolutionFwdAlgoPerf_t convPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ CUDNNCHECK(cudnnFindConvolutionForwardAlgorithm(
++ cudnnHandle,
++ inputTensorDescriptor,
++ convDescriptors->WeightsDescriptor,
++ tempConvDescriptor,
++ outputTensorDescriptor,
++ convRequestedAlgoCount,
++ &algoCount,
++ &convPerfResults));
++ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
++ // but we arrive at an chicken or egg problem:
++ // workspace size is calculated from chosen forward algorithm,
++ // but finding a forward algorithm depends on workspace size...
++ // i.e.
++ // Tensor_t & inputTensor = L->GetInput();
++ // inputTensor = Tensor_t(inputTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0);
++ // CUDNNCHECK(cudnnFindConvolutionForwardAlgorithmEx(
++ // cudnnHandle,
++ // inputTensorDescriptor,
++ // &inputTensor,
++ // convDescriptors->WeightsDescriptor,
++ // &filters,
++ // tempConvDescriptor,
++ // outputTensorDescriptor,
++ // &outputTensor,
++ // convRequestedAlgoCount,
++ // &algoCount,
++ // &convPerfResults,
++ // &convWorkspace,
++ // convWorkspace->ForwardWorkspaceSize));
++ // instead choose either fastest or lowest memory algo as per preference
++ int algoIdx{0};
++ if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest
++ float temp_runtime{std::numeric_limits<float>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (convPerfResults[i].status != 0) continue;
++ if (convPerfResults[i].time < temp_runtime) {
++ temp_runtime = convPerfResults[i].time;
++ algoIdx = i;
++ }
++ }
++ } else { // prefer smallest workspace size
++ size_t temp_memsize{std::numeric_limits<size_t>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (convPerfResults[i].status != 0) continue;
++ if (convPerfResults[i].memory < temp_memsize) {
++ temp_memsize = convPerfResults[i].memory;
++ algoIdx = i;
++ }
++ }
++ }
++ convWorkspace->AlgorithmForward = convPerfResults[algoIdx].algo;
++#else
++ // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions)
+ cudnnConvolutionFwdPreference_t preferenceFwd = (CNNOptions::ConvMaxWorkspaceSize !=0) ? CUDNN_CONVOLUTION_FWD_PREFER_FASTEST :
+ CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
+
+@@ -389,6 +463,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ outputTensor.GetTensorDescriptor(), preferenceFwd,
+ memLimit, // Memory limit in bytes for mode CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
+ &convWorkspace->AlgorithmForward));
++#endif
+
+ // Allocate memory for the convolution
+ //size_t workSpaceSizeInBytes = 0;
+
+From d9b5e2f82917e7183b9f45a49135641981741477 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Tue, 21 Jul 2020 19:34:00 +0300
+Subject: [PATCH 03/10] adapt convolution backward to cuDNN 8
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 72 +++++++++++++++++++
+ 1 file changed, 72 insertions(+)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index cc953ee45f9..85a5c3aa175 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -515,6 +515,77 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // dx : Activation gradient to be computed -> activationGradients [in place op]
+ // dy : Gradient of activation from the following layer (backpropagation)-> activationGradients
+
++#if (CUDNN_VERSION >= 8000)
++ /**
++ * I'm sure there may be a faster way, but this works
++ */
++ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
++ cudnnConvolutionDescriptor_t tempConvBwdDescriptor;
++ CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor));
++ cudnnTensorDescriptor_t outputBwdTensorDescriptor;
++ CUDNNCHECK(cudnnCreateTensorDescriptor(&outputBwdTensorDescriptor));
++ CUDNNCHECK(cudnnSetTensor4dDescriptor(outputBwdTensorDescriptor,
++ CUDNN_TENSOR_NCHW, // Layout of the tensor in memory
++ Tensor_t::GetDataType(),
++ (int)L->GetBatchSize(),
++ (int)L->GetInputDepth(),
++ (int)L->GetInputHeight(),
++ (int)L->GetInputWidth()));
++ int algoCount;
++ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm(
++ cudnnHandle,
++ convDescriptors->WeightsDescriptor,
++ activationGradientsBackwardDescriptor,
++ tempConvBwdDescriptor,
++ outputBwdTensorDescriptor,
++ convRequestedAlgoCount,
++ &algoCount,
++ &convPerfBwdResults));
++ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
++ // but we arrive at an chicken or egg problem:
++ // workspace size is calculated from chosen forward algorithm,
++ // but finding a forward algorithm depends on workspace size...
++ // i.e.
++ // Tensor_t & outputBwdTensor = L->GetInput();
++ // outputBwdTensor = Tensor_t(outputBwdTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0);
++ // CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithmEx(
++ // cudnnHandle,
++ // convDescriptors->WeightsDescriptor,
++ // &filters,
++ // activationGradientsBackwardDescriptor,
++ // &activationGradientsBackwardTensor,
++ // tempConvBwdDescriptor,
++ // outputBwdTensorDescriptor,
++ // &outputBwdTensor,
++ // convRequestedAlgoCount,
++ // &algoCount,
++ // &convPerfBwdResults,
++ // &convWorkspace,
++ // convWorkspace->ForwardWorkspaceSize));
++ // instead choose either fastest or lowest memory algo as per preference
++ int algoIdx{0};
++ if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest
++ float temp_runtime{std::numeric_limits<float>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (convPerfBwdResults[i].status != 0) continue;
++ if (convPerfBwdResults[i].time < temp_runtime) {
++ temp_runtime = convPerfBwdResults[i].time;
++ algoIdx = i;
++ }
++ }
++ } else { // prefer smallest workspace size
++ size_t temp_memsize{std::numeric_limits<size_t>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (convPerfBwdResults[i].status != 0) continue;
++ if (convPerfBwdResults[i].memory < temp_memsize) {
++ temp_memsize = convPerfBwdResults[i].memory;
++ algoIdx = i;
++ }
++ }
++ }
++ convWorkspace->AlgorithmBackward = convPerfBwdResults[algoIdx].algo;
++#else
+ cudnnConvolutionBwdDataPreference_t preferenceBwdData =
+ (CNNOptions::ConvMaxWorkspaceSize != 0) ? CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST : CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
+
+@@ -525,6 +596,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ activationGradientsBackwardDescriptor,
+ preferenceBwdData, memLimit,
+ &convWorkspace->AlgorithmBackward));
++#endif
+
+ std::cout << "CONV BWD Data Algo used is " << convWorkspace->AlgorithmBackward << std::endl;
+ //CUDNNCHECK(cudnnSetConvolutionMathType(convDescriptors->LayerDescriptor, CUDNN_TENSOR_OP_MATH));
+
+From 526b7177c0201be1d0c6b36de0772b7d2ecb90d5 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Wed, 22 Jul 2020 11:50:29 +0300
+Subject: [PATCH 04/10] fix typo and re-declarations
+
+---
+ tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 85a5c3aa175..1b7e3e845d8 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -387,7 +387,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ */
+ int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available
+ cudnnConvolutionDescriptor_t tempConvDescriptor;
+- CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor));
++ CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor));
+ cudnnTensorDescriptor_t outputTensorDescriptor;
+ CUDNNCHECK(cudnnCreateTensorDescriptor(&outputTensorDescriptor));
+ CUDNNCHECK(cudnnSetTensor4dDescriptor(outputTensorDescriptor,
+@@ -407,7 +407,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ outputTensorDescriptor,
+ convRequestedAlgoCount,
+ &algoCount,
+- &convPerfResults));
++ convPerfResults));
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+ // but we arrive at an chicken or egg problem:
+ // workspace size is calculated from chosen forward algorithm,
+@@ -521,7 +521,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ */
+ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
+ cudnnConvolutionDescriptor_t tempConvBwdDescriptor;
+- CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor));
++ CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor));
+ cudnnTensorDescriptor_t outputBwdTensorDescriptor;
+ CUDNNCHECK(cudnnCreateTensorDescriptor(&outputBwdTensorDescriptor));
+ CUDNNCHECK(cudnnSetTensor4dDescriptor(outputBwdTensorDescriptor,
+@@ -531,7 +531,6 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ (int)L->GetInputDepth(),
+ (int)L->GetInputHeight(),
+ (int)L->GetInputWidth()));
+- int algoCount;
+ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+ CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm(
+ cudnnHandle,
+@@ -541,7 +540,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ outputBwdTensorDescriptor,
+ convRequestedAlgoCount,
+ &algoCount,
+- &convPerfBwdResults));
++ convPerfBwdResults));
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+ // but we arrive at an chicken or egg problem:
+ // workspace size is calculated from chosen forward algorithm,
+@@ -564,7 +563,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // convWorkspace->ForwardWorkspaceSize));
+ // instead choose either fastest or lowest memory algo as per preference
+- int algoIdx{0};
++ algoIdx = 0;
+ if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest
+ float temp_runtime{std::numeric_limits<float>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+
+From 6d84e765322a72c48de00b4a9b7471da8a15fece Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Wed, 22 Jul 2020 17:00:01 +0300
+Subject: [PATCH 05/10] implement workspace limits, fix an algoruthm preference
+ bug and rewrite relevant sections
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 273 ++++++++++--------
+ 1 file changed, 151 insertions(+), 122 deletions(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 1b7e3e845d8..2049e2b9195 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -333,35 +333,108 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ TDescriptors * & descriptors,
+ const DNN::CNN::TConvParams & /*params*/,
+ ConvLayer_t *L) {
+- auto convWorkspace = new ConvWorkspace_t ();
++ auto convWorkspace = new ConvWorkspace_t();
++ size_t memLimit = (CNNOptions::ConvMaxWorkspaceSize > 0) ? static_cast<size_t>(CNNOptions::ConvMaxWorkspaceSize) : 0;
+ auto convDescriptors = static_cast<ConvDescriptors_t *>(descriptors);
++ // can we do the following and substitute below???
++ // auto weightsDescriptor{convDescriptors->WeightsDescriptor};
++ // auto convDescriptor{convDescriptors->LayerDescriptor};
+
++#if (CUDNN_VERSION >= 8000)
++ enum algoPreference { no_workspace, fastest, workspace_limit };
++ algoPreference algoChoice;
++ auto choose_algo = [](algoPreference const& algoPref, auto&& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int {
++ int algoIdx{0};
++ if (algoPref == algoPreference::fastest) { // prefer fastest
++ float temp_runtime{std::numeric_limits<float>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime) {
++ temp_runtime = PerfResults[i].time;
++ algoIdx = i;
++ }
++ }
++ } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size
++ float temp_runtime{std::numeric_limits<float>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime && PerfResults[i].memory <= memLim) {
++ temp_runtime = PerfResults[i].time;
++ algoIdx = i;
++ }
++ }
++ } else { // prefer smallest workspace size
++ size_t temp_memsize{std::numeric_limits<size_t>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].memory < temp_memsize) {
++ temp_memsize = PerfResults[i].memory;
++ algoIdx = i;
++ }
++ }
++ }
++ return algoIdx;
++ };
++#else
++ // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions)
++ cudnnConvolutionFwdPreference_t preferenceFwd;
++ cudnnConvolutionBwdDataPreference_t preferenceBwdData;
++ cudnnConvolutionBwdFilterPreference_t preferenceBwdFilter;
++#endif
++ // decide on algorithm preference early
++ if (CNNOptions::ConvMaxWorkspaceSize < 0) {
++ // no workspace case
++#if (CUDNN_VERSION >= 8000)
++ algoChoice = no_workspace;
++#else
++ preferenceFwd = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
++#endif
++
++ } else if (CNNOptions::ConvMaxWorkspaceSize == 0) {
++ // fastest overall
++#if (CUDNN_VERSION >= 8000)
++ algoChoice = fastest;
++#else
++ preferenceFwd = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST;
++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
++#endif
++
++ } else {
++ // fastest in memory limit
++#if (CUDNN_VERSION >= 8000)
++ algoChoice = workspace_limit;
++#else
++ preferenceFwd = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT;
++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT;
++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT;
++#endif
++ }
+ // fix the weight tensor shapes
+ // by default the weights are columnmajor, set them to be row major . At this points
+ // they are not yet initialized
+ Tensor_t & filters = L->GetWeightsAt(0);
+- filters = Tensor_t (filters.GetDeviceBuffer(), {L->GetDepth(),L->GetInputDepth(), L->GetFilterHeight(),L->GetFilterWidth()}, MemoryLayout::RowMajor, 0, 0 );
+- //PrintTensor(L->GetWeightsAt(0));
++ filters = Tensor_t(filters.GetDeviceBuffer(), {L->GetDepth(), L->GetInputDepth(), L->GetFilterHeight(), L->GetFilterWidth()}, MemoryLayout::RowMajor, 0, 0);
++ // PrintTensor(L->GetWeightsAt(0));
+ Tensor_t & biases = L->GetBiasesAt(0);
+- biases = Tensor_t (biases.GetDeviceBuffer(), {1, L->GetDepth(),1,1}, GetTensorLayout(), 0, 0 );
++ biases = Tensor_t(biases.GetDeviceBuffer(), {1, L->GetDepth(), 1, 1}, GetTensorLayout(), 0, 0);
+
+ Tensor_t & outputTensor = L->GetOutput();
+- outputTensor = Tensor_t(outputTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth() },GetTensorLayout(),0,0 );
++ outputTensor = Tensor_t(outputTensor.GetDeviceBuffer(), {L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth()}, GetTensorLayout(), 0, 0);
+ Tensor_t & inputActivation = L->GetInputActivation();
+- inputActivation = Tensor_t(inputActivation.GetDeviceBuffer(),outputTensor.GetShape() ,GetTensorLayout(),0,0 );
++ inputActivation = Tensor_t(inputActivation.GetDeviceBuffer(),outputTensor.GetShape() ,GetTensorLayout(), 0, 0);
+
+ Tensor_t & activationGradients = L->GetActivationGradients();
+- activationGradients = Tensor_t(activationGradients.GetDeviceBuffer(),outputTensor.GetShape() ,GetTensorLayout(),0,0 );
++ activationGradients = Tensor_t(activationGradients.GetDeviceBuffer(),outputTensor.GetShape(), GetTensorLayout(), 0, 0);
+
+ Tensor_t & weightGradients = L->GetWeightGradientsAt(0);
+- weightGradients = Tensor_t( weightGradients.GetDeviceBuffer(), filters.GetShape(), GetTensorLayout(), 0, 0 );
++ weightGradients = Tensor_t(weightGradients.GetDeviceBuffer(), filters.GetShape(), GetTensorLayout(), 0, 0);
+
+ Tensor_t & biasGradients = L->GetBiasGradientsAt(0);
+- biasGradients = Tensor_t( biasGradients.GetDeviceBuffer(), biases.GetShape(), GetTensorLayout(), 0, 0 );
++ biasGradients = Tensor_t(biasGradients.GetDeviceBuffer(), biases.GetShape(), GetTensorLayout(), 0, 0);
+
+
+ // FIXME: Use descriptors instead (Tensor device memory is otherwise allocated during initialization)
+- //Tensor_t inputTensor ({L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth()}, MemoryLayout::RowMajor, 0, 0);
++ // Tensor_t inputTensor ({L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth()}, MemoryLayout::RowMajor, 0, 0);
+ cudnnTensorDescriptor_t inputTensorDescriptor;
+ CUDNNCHECK(cudnnCreateTensorDescriptor(&inputTensorDescriptor) );
+ CUDNNCHECK(cudnnSetTensor4dDescriptor(inputTensorDescriptor,
+@@ -385,79 +458,44 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ /**
+ * I'm sure there may be a faster way, but this works
+ */
+- int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available
+- cudnnConvolutionDescriptor_t tempConvDescriptor;
+- CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor));
+- cudnnTensorDescriptor_t outputTensorDescriptor;
+- CUDNNCHECK(cudnnCreateTensorDescriptor(&outputTensorDescriptor));
+- CUDNNCHECK(cudnnSetTensor4dDescriptor(outputTensorDescriptor,
+- CUDNN_TENSOR_NCHW, // Layout of the tensor in memory
+- Tensor_t::GetDataType(),
+- (int)L->GetBatchSize(),
+- (int)L->GetDepth(),
+- (int)L->GetHeight(),
+- (int)L->GetWidth()));
++ int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available
++
+ int algoCount;
+ cudnnConvolutionFwdAlgoPerf_t convPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+- CUDNNCHECK(cudnnFindConvolutionForwardAlgorithm(
+- cudnnHandle,
+- inputTensorDescriptor,
+- convDescriptors->WeightsDescriptor,
+- tempConvDescriptor,
+- outputTensorDescriptor,
+- convRequestedAlgoCount,
+- &algoCount,
+- convPerfResults));
++ CUDNNCHECK(
++ cudnnFindConvolutionForwardAlgorithm(
++ cudnnHandle,
++ inputTensorDescriptor,
++ convDescriptors->WeightsDescriptor,
++ convDescriptors->LayerDescriptor,
++ outputTensor.GetTensorDescriptor(),
++ convRequestedAlgoCount,
++ &algoCount,
++ convPerfResults
++ )
++ );
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+- // but we arrive at an chicken or egg problem:
+- // workspace size is calculated from chosen forward algorithm,
+- // but finding a forward algorithm depends on workspace size...
+ // i.e.
+- // Tensor_t & inputTensor = L->GetInput();
+- // inputTensor = Tensor_t(inputTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0);
++ // create an input tensor before the inputTensorDescriptor
++ // and get the descriptor from there
++ // Tensor_t inputTensor({L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth()}, MemoryLayout::RowMajor, 0, 0);
+ // CUDNNCHECK(cudnnFindConvolutionForwardAlgorithmEx(
+ // cudnnHandle,
+- // inputTensorDescriptor,
++ // inputTensor.GetTensorDescriptor(),
+ // &inputTensor,
+ // convDescriptors->WeightsDescriptor,
+ // &filters,
+- // tempConvDescriptor,
+- // outputTensorDescriptor,
++ // convDescriptors->LayerDescriptor,
++ // outputTensor.GetTensorDescriptor(),
+ // &outputTensor,
+ // convRequestedAlgoCount,
+ // &algoCount,
+ // &convPerfResults,
+ // &convWorkspace,
+- // convWorkspace->ForwardWorkspaceSize));
++ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- int algoIdx{0};
+- if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest
+- float temp_runtime{std::numeric_limits<float>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (convPerfResults[i].status != 0) continue;
+- if (convPerfResults[i].time < temp_runtime) {
+- temp_runtime = convPerfResults[i].time;
+- algoIdx = i;
+- }
+- }
+- } else { // prefer smallest workspace size
+- size_t temp_memsize{std::numeric_limits<size_t>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (convPerfResults[i].status != 0) continue;
+- if (convPerfResults[i].memory < temp_memsize) {
+- temp_memsize = convPerfResults[i].memory;
+- algoIdx = i;
+- }
+- }
+- }
+- convWorkspace->AlgorithmForward = convPerfResults[algoIdx].algo;
++ convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, convPerfResults, memLimit)].algo;
+ #else
+- // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions)
+- cudnnConvolutionFwdPreference_t preferenceFwd = (CNNOptions::ConvMaxWorkspaceSize !=0) ? CUDNN_CONVOLUTION_FWD_PREFER_FASTEST :
+- CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
+-
+- size_t memLimit = (CNNOptions::ConvMaxWorkspaceSize > 0) ? (size_t) CNNOptions::ConvMaxWorkspaceSize : 0;
+-
+ CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm(
+ cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor,
+ outputTensor.GetTensorDescriptor(), preferenceFwd,
+@@ -519,75 +557,36 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ /**
+ * I'm sure there may be a faster way, but this works
+ */
+- convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
+- cudnnConvolutionDescriptor_t tempConvBwdDescriptor;
+- CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor));
+- cudnnTensorDescriptor_t outputBwdTensorDescriptor;
+- CUDNNCHECK(cudnnCreateTensorDescriptor(&outputBwdTensorDescriptor));
+- CUDNNCHECK(cudnnSetTensor4dDescriptor(outputBwdTensorDescriptor,
+- CUDNN_TENSOR_NCHW, // Layout of the tensor in memory
+- Tensor_t::GetDataType(),
+- (int)L->GetBatchSize(),
+- (int)L->GetInputDepth(),
+- (int)L->GetInputHeight(),
+- (int)L->GetInputWidth()));
+- cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
++ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdDataResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+ CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm(
+ cudnnHandle,
+ convDescriptors->WeightsDescriptor,
++ activationGradients.GetTensorDescriptor(),
++ convDescriptors->LayerDescriptor,
+ activationGradientsBackwardDescriptor,
+- tempConvBwdDescriptor,
+- outputBwdTensorDescriptor,
+ convRequestedAlgoCount,
+ &algoCount,
+- convPerfBwdResults));
++ convPerfBwdDataResults));
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+- // but we arrive at an chicken or egg problem:
+- // workspace size is calculated from chosen forward algorithm,
+- // but finding a forward algorithm depends on workspace size...
+ // i.e.
+- // Tensor_t & outputBwdTensor = L->GetInput();
+- // outputBwdTensor = Tensor_t(outputBwdTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0);
+ // CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithmEx(
+ // cudnnHandle,
+ // convDescriptors->WeightsDescriptor,
+ // &filters,
++ // activationGradients.GetTensorDescriptor(),
++ // &activationGradients,
++ // convDescriptors->LayerDescriptor,
+ // activationGradientsBackwardDescriptor,
+- // &activationGradientsBackwardTensor,
+- // tempConvBwdDescriptor,
+- // outputBwdTensorDescriptor,
+- // &outputBwdTensor,
++ // &inputTensor,
+ // convRequestedAlgoCount,
+ // &algoCount,
+ // &convPerfBwdResults,
+ // &convWorkspace,
+- // convWorkspace->ForwardWorkspaceSize));
++ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- algoIdx = 0;
+- if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest
+- float temp_runtime{std::numeric_limits<float>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (convPerfBwdResults[i].status != 0) continue;
+- if (convPerfBwdResults[i].time < temp_runtime) {
+- temp_runtime = convPerfBwdResults[i].time;
+- algoIdx = i;
+- }
+- }
+- } else { // prefer smallest workspace size
+- size_t temp_memsize{std::numeric_limits<size_t>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (convPerfBwdResults[i].status != 0) continue;
+- if (convPerfBwdResults[i].memory < temp_memsize) {
+- temp_memsize = convPerfBwdResults[i].memory;
+- algoIdx = i;
+- }
+- }
+- }
+- convWorkspace->AlgorithmBackward = convPerfBwdResults[algoIdx].algo;
++ convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, convPerfBwdDataResults, memLimit)].algo;
+ #else
+- cudnnConvolutionBwdDataPreference_t preferenceBwdData =
+- (CNNOptions::ConvMaxWorkspaceSize != 0) ? CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST : CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
+-
+ CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle,
+ convDescriptors->WeightsDescriptor,
+ activationGradients.GetTensorDescriptor(),
+@@ -628,11 +627,40 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // here should be able to use inputTensorDescriptor
+ cudnnTensorDescriptor_t activationBackwardDescriptor = inputTensorDescriptor;
+
+- // cudnnConvolutionBwdFilterPreference_t preference =
+- cudnnConvolutionBwdFilterPreference_t preferenceBwdFilter = (CNNOptions::ConvMaxWorkspaceSize != 0)
+- ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE
+- : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
+-
++#if (CUDNN_VERSION >= 8000)
++ /**
++ * I'm sure there may be a faster way, but this works
++ */
++ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
++ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithm(
++ cudnnHandle,
++ activationBackwardDescriptor,
++ activationGradients.GetTensorDescriptor(),
++ convDescriptors->LayerDescriptor,
++ convDescriptors->WeightsDescriptor,
++ convRequestedAlgoCount,
++ &algoCount,
++ convPerfBwdFilterResults));
++ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
++ // i.e.
++ // CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithmEx(
++ // cudnnHandle,
++ // activationBackwardDescriptor,
++ // &inputTensor,
++ // activationGradients.GetTensorDescriptor(),
++ // &activationGradients,
++ // convDescriptors->LayerDescriptor,
++ // convDescriptors->WeightsDescriptor,
++ // &filters,
++ // convRequestedAlgoCount,
++ // &algoCount,
++ // &convPerfBwdFilterResults,
++ // &convWorkspace,
++ // memLimit)); // use memLimit for workspace size
++ // instead choose either fastest or lowest memory algo as per preference
++ convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, convPerfBwdFilterResults, memLimit)].algo;
++#else
+ CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle,
+ activationBackwardDescriptor,
+ activationGradients.GetTensorDescriptor(),
+@@ -641,6 +669,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ preferenceBwdFilter,
+ memLimit,
+ &convWorkspace->HelperAlgorithm));
++#endif
+
+ std::cout << "CONV BWD Filter Algo used is " << convWorkspace->HelperAlgorithm << std::endl;
+
+
+From a9d39cc9ccf9ae474d90b6671d3e0d69d4cf6872 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Wed, 22 Jul 2020 17:11:30 +0300
+Subject: [PATCH 06/10] implement correct logic behind cudnn logarithm
+ preference
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 20 +++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 2049e2b9195..b74c99d1a99 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -380,18 +380,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ #endif
+ // decide on algorithm preference early
+ if (CNNOptions::ConvMaxWorkspaceSize < 0) {
+- // no workspace case
+ #if (CUDNN_VERSION >= 8000)
+- algoChoice = no_workspace;
+-#else
+- preferenceFwd = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
+- preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
+- preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
+-#endif
+-
+- } else if (CNNOptions::ConvMaxWorkspaceSize == 0) {
+ // fastest overall
+-#if (CUDNN_VERSION >= 8000)
+ algoChoice = fastest;
+ #else
+ preferenceFwd = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST;
+@@ -399,6 +389,16 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST;
+ #endif
+
++ } else if (CNNOptions::ConvMaxWorkspaceSize == 0) {
++ // no workspace case
++#if (CUDNN_VERSION >= 8000)
++ algoChoice = no_workspace;
++#else
++ preferenceFwd = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE;
++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE;
++#endif
++
+ } else {
+ // fastest in memory limit
+ #if (CUDNN_VERSION >= 8000)
+
+From 6282dfa816c7f51af5c0ecaa0065514e3f627631 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Wed, 22 Jul 2020 18:51:56 +0300
+Subject: [PATCH 07/10] use decltype instead of auto, fix typos
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 22 +++++++++----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index b74c99d1a99..6cefd72c099 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -343,29 +343,29 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ #if (CUDNN_VERSION >= 8000)
+ enum algoPreference { no_workspace, fastest, workspace_limit };
+ algoPreference algoChoice;
+- auto choose_algo = [](algoPreference const& algoPref, auto&& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int {
++ auto choose_algo = [](algoPreference const& algoPref, int const algoCount, decltype(perfResults) const& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int {
+ int algoIdx{0};
+ if (algoPref == algoPreference::fastest) { // prefer fastest
+ float temp_runtime{std::numeric_limits<float>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+- if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime) {
+- temp_runtime = PerfResults[i].time;
++ if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime) {
++ temp_runtime = perfResults[i].time;
+ algoIdx = i;
+ }
+ }
+ } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size
+ float temp_runtime{std::numeric_limits<float>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+- if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime && PerfResults[i].memory <= memLim) {
+- temp_runtime = PerfResults[i].time;
++ if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime && perfResults[i].memory <= memLim) {
++ temp_runtime = perfResults[i].time;
+ algoIdx = i;
+ }
+ }
+ } else { // prefer smallest workspace size
+ size_t temp_memsize{std::numeric_limits<size_t>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+- if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].memory < temp_memsize) {
+- temp_memsize = PerfResults[i].memory;
++ if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].memory < temp_memsize) {
++ temp_memsize = perfResults[i].memory;
+ algoIdx = i;
+ }
+ }
+@@ -494,7 +494,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, convPerfResults, memLimit)].algo;
++ convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, algoCount, convPerfResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm(
+ cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor,
+@@ -585,7 +585,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, convPerfBwdDataResults, memLimit)].algo;
++ convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, algoCount, convPerfBwdDataResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle,
+ convDescriptors->WeightsDescriptor,
+@@ -632,7 +632,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ * I'm sure there may be a faster way, but this works
+ */
+ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
+- cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ cudnnConvolutionBwdFilterAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+ CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithm(
+ cudnnHandle,
+ activationBackwardDescriptor,
+@@ -659,7 +659,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, convPerfBwdFilterResults, memLimit)].algo;
++ convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle,
+ activationBackwardDescriptor,
+
+From 259c1c9c4d86391d1987f6635a2aece8cae587ac Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Wed, 22 Jul 2020 19:39:40 +0300
+Subject: [PATCH 08/10] assign backward filter algo to correct place
+
+---
+ tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 6cefd72c099..5a80dfbc03d 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -659,7 +659,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo;
++ convWorkspace->HelperAlgorithm = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle,
+ activationBackwardDescriptor,
+
+From 2c109efea0e970b380a62f6102a286542676912a Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Thu, 23 Jul 2020 17:58:58 +0300
+Subject: [PATCH 09/10] make it compile and support C++11
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 49 ++++++++++++-------
+ 1 file changed, 30 insertions(+), 19 deletions(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 5a80dfbc03d..66ce64a5efc 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -343,29 +343,37 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ #if (CUDNN_VERSION >= 8000)
+ enum algoPreference { no_workspace, fastest, workspace_limit };
+ algoPreference algoChoice;
+- auto choose_algo = [](algoPreference const& algoPref, int const algoCount, decltype(perfResults) const& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int {
++ // C++11 lambdas cannot be templated, so we have to do this HORRIBLE stuff...
++ union LocalPerf_t {
++ // these three type are absolutely equivalent
++ // and one can access them as they wish to get info
++ cudnnConvolutionFwdAlgoPerf_t * fwd;
++ cudnnConvolutionBwdFilterAlgoPerf_t * bwdFilter;
++ cudnnConvolutionBwdDataAlgoPerf_t * bwdData;
++ };
++ auto choose_algo = [](algoPreference const & algoPref, int const algoCount, LocalPerf_t const & perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int {
+ int algoIdx{0};
+ if (algoPref == algoPreference::fastest) { // prefer fastest
+ float temp_runtime{std::numeric_limits<float>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+- if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime) {
+- temp_runtime = perfResults[i].time;
++ if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime) {
++ temp_runtime = perfResults.fwd[i].time;
+ algoIdx = i;
+ }
+ }
+ } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size
+ float temp_runtime{std::numeric_limits<float>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+- if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime && perfResults[i].memory <= memLim) {
+- temp_runtime = perfResults[i].time;
++ if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime && perfResults.fwd[i].memory <= memLim) {
++ temp_runtime = perfResults.fwd[i].time;
+ algoIdx = i;
+ }
+ }
+ } else { // prefer smallest workspace size
+ size_t temp_memsize{std::numeric_limits<size_t>::max()};
+ for (int i = 0; i < algoCount; ++i) {
+- if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].memory < temp_memsize) {
+- temp_memsize = perfResults[i].memory;
++ if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].memory < temp_memsize) {
++ temp_memsize = perfResults.fwd[i].memory;
+ algoIdx = i;
+ }
+ }
+@@ -461,7 +469,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available
+
+ int algoCount;
+- cudnnConvolutionFwdAlgoPerf_t convPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ cudnnConvolutionFwdAlgoPerf_t convFwdPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+ CUDNNCHECK(
+ cudnnFindConvolutionForwardAlgorithm(
+ cudnnHandle,
+@@ -471,7 +479,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ outputTensor.GetTensorDescriptor(),
+ convRequestedAlgoCount,
+ &algoCount,
+- convPerfResults
++ convFwdPerfResults
+ )
+ );
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+@@ -490,11 +498,12 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &outputTensor,
+ // convRequestedAlgoCount,
+ // &algoCount,
+- // &convPerfResults,
++ // &convFwdPerfResults,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, algoCount, convPerfResults, memLimit)].algo;
++ LocalPerf_t fwdPerfResults{convFwdPerfResults};
++ convWorkspace->AlgorithmForward = convFwdPerfResults[choose_algo(algoChoice, algoCount, fwdPerfResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm(
+ cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor,
+@@ -558,7 +567,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ * I'm sure there may be a faster way, but this works
+ */
+ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
+- cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdDataResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ cudnnConvolutionBwdDataAlgoPerf_t convBwdDataPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+ CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm(
+ cudnnHandle,
+ convDescriptors->WeightsDescriptor,
+@@ -567,7 +576,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ activationGradientsBackwardDescriptor,
+ convRequestedAlgoCount,
+ &algoCount,
+- convPerfBwdDataResults));
++ convBwdDataPerfResults));
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+ // i.e.
+ // CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithmEx(
+@@ -581,11 +590,12 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &inputTensor,
+ // convRequestedAlgoCount,
+ // &algoCount,
+- // &convPerfBwdResults,
++ // &convBwdDataPerfResults,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, algoCount, convPerfBwdDataResults, memLimit)].algo;
++ LocalPerf_t bwdDataPerfResults{convBwdDataPerfResults};
++ convWorkspace->AlgorithmBackward = convBwdDataPerfResults[choose_algo(algoChoice, algoCount, bwdDataPerfResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle,
+ convDescriptors->WeightsDescriptor,
+@@ -632,7 +642,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ * I'm sure there may be a faster way, but this works
+ */
+ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms
+- cudnnConvolutionBwdFilterAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
++ cudnnConvolutionBwdFilterAlgoPerf_t convBwdFilterPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm
+ CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithm(
+ cudnnHandle,
+ activationBackwardDescriptor,
+@@ -641,7 +651,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ convDescriptors->WeightsDescriptor,
+ convRequestedAlgoCount,
+ &algoCount,
+- convPerfBwdFilterResults));
++ convBwdFilterPerfResults));
+ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx),
+ // i.e.
+ // CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithmEx(
+@@ -655,11 +665,12 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &filters,
+ // convRequestedAlgoCount,
+ // &algoCount,
+- // &convPerfBwdFilterResults,
++ // &convBwdFilterPerfResults,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- convWorkspace->HelperAlgorithm = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo;
++ LocalPerf_t bwdFilterPerfResults{convBwdFilterPerfResults};
++ convWorkspace->HelperAlgorithm = convBwdFilterPerfResults[choose_algo(algoChoice, algoCount, bwdFilterPerfResults, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle,
+ activationBackwardDescriptor,
+
+From 1f1dfbbac06c29df98bdebdd9367bf566f2e7ce8 Mon Sep 17 00:00:00 2001
+From: Konstantin Gizdov <kgizdov at gmail.com>
+Date: Thu, 23 Jul 2020 21:37:33 +0300
+Subject: [PATCH 10/10] compiles completely
+
+---
+ .../src/DNN/Architectures/Cudnn/Propagate.cu | 83 ++++++++++---------
+ 1 file changed, 46 insertions(+), 37 deletions(-)
+
+diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+index 66ce64a5efc..0694369860a 100644
+--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu
+@@ -344,41 +344,50 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ enum algoPreference { no_workspace, fastest, workspace_limit };
+ algoPreference algoChoice;
+ // C++11 lambdas cannot be templated, so we have to do this HORRIBLE stuff...
+- union LocalPerf_t {
+- // these three type are absolutely equivalent
+- // and one can access them as they wish to get info
+- cudnnConvolutionFwdAlgoPerf_t * fwd;
+- cudnnConvolutionBwdFilterAlgoPerf_t * bwdFilter;
+- cudnnConvolutionBwdDataAlgoPerf_t * bwdData;
+- };
+- auto choose_algo = [](algoPreference const & algoPref, int const algoCount, LocalPerf_t const & perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int {
+- int algoIdx{0};
+- if (algoPref == algoPreference::fastest) { // prefer fastest
+- float temp_runtime{std::numeric_limits<float>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime) {
+- temp_runtime = perfResults.fwd[i].time;
+- algoIdx = i;
++ class LocalPerf {
++ public:
++ LocalPerf(cudnnConvolutionFwdAlgoPerf_t * fwd) {m_fwd = fwd;}
++ LocalPerf(cudnnConvolutionBwdFilterAlgoPerf_t * bwdFilter) {m_bwdFilter = bwdFilter;}
++ LocalPerf(cudnnConvolutionBwdDataAlgoPerf_t * bwdData) {m_bwdData = bwdData;}
++ size_t getMemory(int i) {return m_fwd != nullptr ? m_fwd[i].memory : m_bwdFilter != nullptr ? m_bwdFilter[i].memory : m_bwdData != nullptr ? m_bwdData[i].memory : 0;}
++ float getTime(int i) {return m_fwd != nullptr ? m_fwd[i].time : m_bwdFilter != nullptr ? m_bwdFilter[i].time : m_bwdData != nullptr ? m_bwdData[i].time : 0;}
++ cudnnStatus_t getStatus(int i) {return m_fwd != nullptr ? m_fwd[i].status : m_bwdFilter != nullptr ? m_bwdFilter[i].status : m_bwdData != nullptr ? m_bwdData[i].status : CUDNN_STATUS_BAD_PARAM;}
++ int getIdx(algoPreference const & algoPref, int const algoCount, size_t memLim = std::numeric_limits<size_t>::max()) {
++ int algoIdx{0};
++ if (algoPref == algoPreference::fastest) { // prefer fastest
++ float temp_runtime{std::numeric_limits<float>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (getStatus(i) == CUDNN_STATUS_SUCCESS && getTime(i) < temp_runtime) {
++ temp_runtime = getTime(i);
++ algoIdx = i;
++ }
+ }
+- }
+- } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size
+- float temp_runtime{std::numeric_limits<float>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime && perfResults.fwd[i].memory <= memLim) {
+- temp_runtime = perfResults.fwd[i].time;
+- algoIdx = i;
++ } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size
++ float temp_runtime{std::numeric_limits<float>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (getStatus(i) == CUDNN_STATUS_SUCCESS && getTime(i) < temp_runtime && getMemory(i) <= memLim) {
++ temp_runtime = getTime(i);
++ algoIdx = i;
++ }
+ }
+- }
+- } else { // prefer smallest workspace size
+- size_t temp_memsize{std::numeric_limits<size_t>::max()};
+- for (int i = 0; i < algoCount; ++i) {
+- if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].memory < temp_memsize) {
+- temp_memsize = perfResults.fwd[i].memory;
+- algoIdx = i;
++ } else { // prefer smallest workspace size
++ size_t temp_memsize{std::numeric_limits<size_t>::max()};
++ for (int i = 0; i < algoCount; ++i) {
++ if (getStatus(i) == CUDNN_STATUS_SUCCESS && getMemory(i) < temp_memsize) {
++ temp_memsize = getMemory(i);
++ algoIdx = i;
++ }
+ }
+ }
+- }
+- return algoIdx;
++ return algoIdx;
++ };
++ private:
++ LocalPerf();
++ // these three type are absolutely equivalent
++ // and one can access them as they wish to get info
++ cudnnConvolutionFwdAlgoPerf_t * m_fwd;
++ cudnnConvolutionBwdFilterAlgoPerf_t * m_bwdFilter;
++ cudnnConvolutionBwdDataAlgoPerf_t * m_bwdData;
+ };
+ #else
+ // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions)
+@@ -502,8 +511,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- LocalPerf_t fwdPerfResults{convFwdPerfResults};
+- convWorkspace->AlgorithmForward = convFwdPerfResults[choose_algo(algoChoice, algoCount, fwdPerfResults, memLimit)].algo;
++ LocalPerf fwdPerfResults{convFwdPerfResults};
++ convWorkspace->AlgorithmForward = convFwdPerfResults[fwdPerfResults.getIdx(algoChoice, algoCount, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm(
+ cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor,
+@@ -594,8 +603,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- LocalPerf_t bwdDataPerfResults{convBwdDataPerfResults};
+- convWorkspace->AlgorithmBackward = convBwdDataPerfResults[choose_algo(algoChoice, algoCount, bwdDataPerfResults, memLimit)].algo;
++ LocalPerf bwdDataPerfResults{convBwdDataPerfResults};
++ convWorkspace->AlgorithmBackward = convBwdDataPerfResults[bwdDataPerfResults.getIdx(algoChoice, algoCount, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle,
+ convDescriptors->WeightsDescriptor,
+@@ -669,8 +678,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace,
+ // &convWorkspace,
+ // memLimit)); // use memLimit for workspace size
+ // instead choose either fastest or lowest memory algo as per preference
+- LocalPerf_t bwdFilterPerfResults{convBwdFilterPerfResults};
+- convWorkspace->HelperAlgorithm = convBwdFilterPerfResults[choose_algo(algoChoice, algoCount, bwdFilterPerfResults, memLimit)].algo;
++ LocalPerf bwdFilterPerfResults{convBwdFilterPerfResults};
++ convWorkspace->HelperAlgorithm = convBwdFilterPerfResults[bwdFilterPerfResults.getIdx(algoChoice, algoCount, memLimit)].algo;
+ #else
+ CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle,
+ activationBackwardDescriptor,
Copied: root/repos/community-testing-x86_64/jupyter_notebook_config.py (from rev 665090, root/trunk/jupyter_notebook_config.py)
===================================================================
--- community-testing-x86_64/jupyter_notebook_config.py (rev 0)
+++ community-testing-x86_64/jupyter_notebook_config.py 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1 @@
+c.NotebookApp.ip = '*'
Copied: root/repos/community-testing-x86_64/nbman-for-arch.patch (from rev 665090, root/trunk/nbman-for-arch.patch)
===================================================================
--- community-testing-x86_64/nbman-for-arch.patch (rev 0)
+++ community-testing-x86_64/nbman-for-arch.patch 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,177 @@
+diff --color -aur root-6.22.00-old/main/src/nbmain.cxx root-6.22.00-new/main/src/nbmain.cxx
+--- root-6.22.00-old/main/src/nbmain.cxx 2020-07-20 15:26:53.983725609 +0300
++++ root-6.22.00-new/main/src/nbmain.cxx 2020-07-20 15:29:53.940386060 +0300
+@@ -33,10 +33,6 @@
+ #define NB_OPT "notebook"
+ #define JUPYTER_CONF_DIR_V "JUPYTER_CONFIG_DIR"
+ #define JUPYTER_PATH_V "JUPYTER_PATH"
+-#define NB_CONF_DIR "notebook"
+-#define ROOTNB_DIR ".rootnb"
+-#define COMMIT_FILE ".rootcommit"
+-#define JUPYTER_CONFIG "jupyter_notebook_config.py"
+
+ using namespace std;
+
+@@ -46,161 +46,12 @@
+ #endif
+
+ ////////////////////////////////////////////////////////////////////////////////
+-/// Checks whether ROOT notebook files are installed and they are
+-/// the current version.
+-
+-static int CheckNbInstallation(string dir)
+-{
+- string commit(gROOT->GetGitCommit());
+- string inputfname(dir + pathsep + ROOTNB_DIR + pathsep + COMMIT_FILE);
+- ifstream in(inputfname);
+- if (in.is_open()) {
+- string line;
+- in >> line;
+- in.close();
+- if (line.compare(commit) == 0) return 0; // already installed
+- else return -1; // install, it's outdated
+- }
+- else if (gSystem->AccessPathName(inputfname.c_str())) {
+- // There is no installation
+- return -1;
+- }
+- else {
+- fprintf(stderr,
+- "Error checking notebook installation -- cannot open %s\n",
+- inputfname.c_str());
+- return -2;
+- }
+-}
+-
+-////////////////////////////////////////////////////////////////////////////////
+-/// Installs ROOT notebook files in the user's home directory.
+-
+-static bool InstallNbFiles(string source, string dest)
+-{
+- // Create installation directory
+- if (gSystem->AccessPathName(dest.c_str())) {
+- if (gSystem->mkdir(dest.c_str())) {
+- fprintf(stderr,
+- "Error installing notebook configuration files -- cannot create directory %s\n",
+- dest.c_str());
+- return false;
+- }
+- }
+-
+- // Copy files in source to dest
+- TSystemDirectory dir(source.c_str(), source.c_str());
+- std::unique_ptr<TList> files;
+- files.reset(dir.GetListOfFiles());
+- if (files) {
+- TSystemFile *file;
+- TListIter it(files.get());
+- while ((file = (TSystemFile*)it())) {
+- TString s = file->GetName();
+- string fname(s.Data());
+- string sourcefile = source + pathsep + fname;
+- string destfile = dest + pathsep + fname;
+- if (!file->IsDirectory()) {
+- if (gSystem->CopyFile(sourcefile.c_str(), destfile.c_str(), true)) {
+- fprintf(stderr,
+- "Error installing notebook configuration files -- cannot copy file %s to %s\n",
+- sourcefile.c_str(), destfile.c_str());
+- return false;
+- }
+- }
+- else if (fname.compare(".") && fname.compare("..") && fname.compare("html")) {
+- if (!InstallNbFiles(sourcefile, destfile))
+- return false;
+- }
+- }
+- }
+-
+- return true;
+-}
+-
+-////////////////////////////////////////////////////////////////////////////////
+-/// Creates the Jupyter notebook configuration file that sets the
+-/// necessary environment.
+-
+-static bool CreateJupyterConfig(string dest, string rootbin, string rootlib, string rootdata)
+-{
+- string jupyconfig = dest + pathsep + JUPYTER_CONFIG;
+- ofstream out(jupyconfig, ios::trunc);
+- if (out.is_open()) {
+- out << "import os" << endl;
+- out << "rootbin = '" << rootbin << "'" << endl;
+- out << "rootlib = '" << rootlib << "'" << endl;
+-#ifdef WIN32
+- string jsrootsys = rootdata + "\\js\\";
+- out << "os.environ['PYTHONPATH'] = '%s' % rootlib + ':' + os.getenv('PYTHONPATH', '')" << endl;
+- out << "os.environ['PATH'] = '%s:%s\\bin' % (rootbin,rootbin) + ':' + '%s' % rootlib + ':' + os.getenv('PATH', '')" << endl;
+-#else
+- string jsrootsys = rootdata + "/js/";
+- out << "os.environ['PYTHONPATH'] = '%s' % rootlib + ':' + os.getenv('PYTHONPATH', '')" << endl;
+- out << "os.environ['PATH'] = '%s:%s/bin' % (rootbin,rootbin) + ':' + os.getenv('PATH', '')" << endl;
+- out << "os.environ['LD_LIBRARY_PATH'] = '%s' % rootlib + ':' + os.getenv('LD_LIBRARY_PATH', '')" << endl;
+-#endif
+- out << "c.NotebookApp.extra_static_paths = ['" << jsrootsys << "']" << endl;
+- out.close();
+- return true;
+- }
+- else {
+- fprintf(stderr,
+- "Error installing notebook configuration files -- cannot create IPython config file at %s\n",
+- jupyconfig.c_str());
+- return false;
+- }
+-}
+-
+-////////////////////////////////////////////////////////////////////////////////
+-/// Creates a file that stores the current commit id in it.
+-
+-static bool CreateStamp(string dest)
+-{
+- ofstream out(dest + pathsep + COMMIT_FILE, ios::trunc);
+- if (out.is_open()) {
+- out << gROOT->GetGitCommit();
+- out.close();
+- return true;
+- }
+- else {
+- fprintf(stderr,
+- "Error installing notebook configuration files -- cannot create %s\n",
+- COMMIT_FILE);
+- return false;
+- }
+-}
+-
+-////////////////////////////////////////////////////////////////////////////////
+ /// Spawn a Jupyter notebook customised by ROOT.
+
+ int main(int argc, char **argv)
+ {
+- string rootbin(TROOT::GetBinDir().Data());
+- string rootlib(TROOT::GetLibDir().Data());
+- string rootetc(TROOT::GetEtcDir().Data());
+- string rootdata(TROOT::GetDataDir().Data());
+-
+- // If needed, install ROOT notebook files in the user's home directory
+-#ifdef WIN32
+- string homedir(getenv("USERPROFILE"));
+-#else
+- string homedir(getenv("HOME"));
+-#endif
+- int inst = CheckNbInstallation(homedir);
+- if (inst == -1) {
+- // The etc directory contains the ROOT notebook files to install
+- string source(rootetc + pathsep + NB_CONF_DIR);
+- string dest(homedir + pathsep + ROOTNB_DIR);
+- bool res = InstallNbFiles(source, dest) &&
+- CreateJupyterConfig(dest, rootbin, rootlib, rootdata) &&
+- CreateStamp(dest);
+- if (!res) return 1;
+- }
+- else if (inst == -2) return 1;
+-
+ // Set IPython directory for the ROOT notebook flavour
+- string rootnbpath = homedir + pathsep + ROOTNB_DIR;
++ string rootnbpath = pathsep + string("etc") + pathsep + string("root") + pathsep + string("notebook");
+ string jupyconfdir(JUPYTER_CONF_DIR_V + ("=" + rootnbpath));
+ string jupypathdir(JUPYTER_PATH_V + ("=" + rootnbpath));
+ putenv((char *)jupyconfdir.c_str());
Copied: root/repos/community-testing-x86_64/root.pc.tpl (from rev 665090, root/trunk/root.pc.tpl)
===================================================================
--- community-testing-x86_64/root.pc.tpl (rev 0)
+++ community-testing-x86_64/root.pc.tpl 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,12 @@
+prefix=_PREFIX
+exec_prefix=_EXECPREFIX
+libdir=_LIBDIR
+includedir=_INCDIR
+
+Name: ROOT
+Description: C++ data analysis framework and interpreter from CERN
+Version: _PKGVERSION
+URL: _UPSTREAM_URL
+Requires: _REQUIRES
+Libs: _LIBRARIES
+Cflags: _CFLAGS
Copied: root/repos/community-testing-x86_64/root.xml (from rev 665090, root/trunk/root.xml)
===================================================================
--- community-testing-x86_64/root.xml (rev 0)
+++ community-testing-x86_64/root.xml 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info">
+ <mime-type type="application/x-root">
+ <comment>ROOT file</comment>
+ <comment xml:lang="de">ROOT-Datei</comment>
+ <comment xml:lang="en">ROOT-File</comment>
+ <comment xml:lang="fr">ROOT-Fichier</comment>
+ <comment xml:lang="it">ROOT-File</comment>
+ <glob pattern="*.root"/>
+ <magic priority="80">
+ <match value="root" type="string" offset="0:64"/>
+ </magic>
+ </mime-type>
+</mime-info>
Copied: root/repos/community-testing-x86_64/settings-cuda.cmake (from rev 665090, root/trunk/settings-cuda.cmake)
===================================================================
--- community-testing-x86_64/settings-cuda.cmake (rev 0)
+++ community-testing-x86_64/settings-cuda.cmake 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,110 @@
+set (CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
+set (BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
+set (CMAKE_INSTALL_PREFIX /usr CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_CMAKEDIR /usr/lib/cmake/ROOT CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_BINDIR /usr/bin CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_LIBDIR /usr/lib/root CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_INCLUDEDIR /usr/include CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_SYSCONFDIR /etc/root CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_DATAROOTDIR /usr/share CACHE PATH "" FORCE)
+set (CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE)
+set (CMAKE_CUDA_STANDARD 14 CACHE STRING "" FORCE)
+set (PYTHIA8_DATA /usr/share/pythia8/xmldoc CACHE PATH "" FORCE) # sync with pythia8 package
+set (GLEW_DIR /usr/include/GL CACHE PATH "" FORCE) # need to set manually
+set (alien OFF CACHE BOOL "" FORCE)
+set (all OFF CACHE BOOL "" FORCE)
+set (asimage ON CACHE BOOL "" FORCE)
+set (builtin_afterimage OFF CACHE BOOL "" FORCE)
+set (builtin_clang ON CACHE BOOL "" FORCE)
+set (CLANG_ENABLE_STATIC_ANALYZER ON CACHE BOOL "" FORCE)
+set (CLANG_ANALYZER_BUILD_Z3 ON CACHE BOOL "" FORCE)
+set (builtin_cfitsio OFF CACHE BOOL "" FORCE)
+set (builtin_davix OFF CACHE BOOL "" FORCE)
+set (builtin_fftw3 OFF CACHE BOOL "" FORCE)
+set (builtin_ftgl OFF CACHE BOOL "" FORCE)
+set (builtin_freetype OFF CACHE BOOL "" FORCE)
+set (builtin_gl2ps OFF CACHE BOOL "" FORCE)
+set (builtin_glew OFF CACHE BOOL "" FORCE)
+set (builtin_gsl OFF CACHE BOOL "" FORCE)
+set (builtin_lzma OFF CACHE BOOL "" FORCE)
+set (builtin_llvm ON CACHE BOOL "" FORCE)
+set (builtin_openssl OFF CACHE BOOL "" FORCE)
+set (builtin_pcre OFF CACHE BOOL "" FORCE)
+set (builtin_tbb OFF CACHE BOOL "" FORCE)
+set (builtin_unuran OFF CACHE BOOL "" FORCE)
+set (builtin_vc OFF CACHE BOOL "" FORCE)
+set (builtin_xxhash OFF CACHE BOOL "" FORCE)
+set (builtin_xrootd OFF CACHE BOOL "" FORCE)
+set (builtin_zlib OFF CACHE BOOL "" FORCE)
+set (ccache ON CACHE BOOL "" FORCE)
+set (clad ON CACHE BOOL "" FORCE)
+set (cocoa OFF CACHE BOOL "" FORCE) # MacOS only
+set (cuda ON CACHE BOOL "" FORCE)
+set (cudnn ON CACHE BOOL "" FORCE)
+set (dataframe ON CACHE BOOL "" FORCE)
+set (davix OFF CACHE BOOL "" FORCE)
+set (dcache OFF CACHE BOOL "" FORCE)
+set (exceptions ON CACHE BOOL "" FORCE)
+set (fail-on-missing ON CACHE BOOL "" FORCE)
+set (fcgi ON CACHE BOOL "" FORCE)
+set (fftw3 ON CACHE BOOL "" FORCE)
+set (fitsio ON CACHE BOOL "" FORCE)
+set (fortran ON CACHE BOOL "" FORCE)
+set (gdml ON CACHE BOOL "" FORCE)
+set (genvector ON CACHE BOOL "" FORCE)
+set (gfal OFF CACHE BOOL "" FORCE)
+set (gl2ps ON CACHE BOOL "" FORCE)
+set (gminimal OFF CACHE BOOL "" FORCE)
+set (gnuinstall ON CACHE BOOL "" FORCE)
+set (gsl_shared ON CACHE BOOL "" FORCE)
+set (gviz ON CACHE BOOL "" FORCE)
+set (http ON CACHE BOOL "" FORCE)
+set (imt ON CACHE BOOL "" FORCE)
+set (jemalloc OFF CACHE BOOL "" FORCE)
+set (mathmore ON CACHE BOOL "" FORCE)
+set (minimal OFF CACHE BOOL "" FORCE)
+set (minuit2 ON CACHE BOOL "" FORCE)
+set (minuit2_mpi ON CACHE BOOL "" FORCE)
+set (minuit2_omp ON CACHE BOOL "" FORCE)
+set (mlp ON CACHE BOOL "" FORCE)
+set (monalisa OFF CACHE BOOL "" FORCE)
+set (mpi ON CACHE BOOL "" FORCE)
+set (mt ON CACHE BOOL "" FORCE)
+set (mysql ON CACHE BOOL "" FORCE)
+set (odbc ON CACHE BOOL "" FORCE)
+set (opengl ON CACHE BOOL "" FORCE)
+set (OpenGL_GL_PREFERENCE GLVND CACHE STRING "" FORCE) # use new policy since 3.11
+set (oracle OFF CACHE BOOL "" FORCE)
+set (pgsql ON CACHE BOOL "" FORCE)
+set (pythia6 OFF CACHE BOOL "" FORCE)
+set (pythia6_nolink OFF CACHE BOOL "" FORCE)
+set (pythia8 ON CACHE BOOL "" FORCE)
+set (pyroot ON CACHE BOOL "" FORCE)
+set (qt5web ON CACHE BOOL "" FORCE)
+set (roofit ON CACHE BOOL "" FORCE)
+set (root7 ON CACHE BOOL "" FORCE)
+set (roottest OFF CACHE BOOL "" FORCE)
+set (rpath OFF CACHE BOOL "" FORCE)
+set (runtime_cxxmodules OFF CACHE BOOL "" FORCE) # breaks python
+set (r OFF CACHE BOOL "" FORCE) # requires r-rcpp
+set (shadowpw ON CACHE BOOL "" FORCE)
+set (shared ON CACHE BOOL "" FORCE)
+set (soversion OFF CACHE BOOL "" FORCE)
+set (spectrum ON CACHE BOOL "" FORCE)
+set (sqlite ON CACHE BOOL "" FORCE)
+set (ssl ON CACHE BOOL "" FORCE)
+set (tbb ON CACHE BOOL "" FORCE)
+set (tcmalloc OFF CACHE BOOL "" FORCE)
+set (testing OFF CACHE BOOL "" FORCE)
+set (tmva ON CACHE BOOL "" FORCE)
+set (tmva-cpu OFF CACHE BOOL "" FORCE)
+set (tmva-gpu ON CACHE BOOL "" FORCE)
+set (tmva-pymva ON CACHE BOOL "" FORCE)
+set (unuran ON CACHE BOOL "" FORCE)
+set (vc ON CACHE BOOL "" FORCE)
+set (vdt ON CACHE BOOL "" FORCE)
+set (winrtdebug OFF CACHE BOOL "" FORCE) # windows only
+set (webgui ON CACHE BOOL "" FORCE)
+set (x11 ON CACHE BOOL "" FORCE)
+set (xml ON CACHE BOOL "" FORCE)
+set (xrootd ON CACHE BOOL "" FORCE)
Copied: root/repos/community-testing-x86_64/settings.cmake (from rev 665090, root/trunk/settings.cmake)
===================================================================
--- community-testing-x86_64/settings.cmake (rev 0)
+++ community-testing-x86_64/settings.cmake 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,110 @@
+set (CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
+set (BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
+set (CMAKE_INSTALL_PREFIX /usr CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_CMAKEDIR /usr/lib/cmake/ROOT CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_BINDIR /usr/bin CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_LIBDIR /usr/lib/root CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_INCLUDEDIR /usr/include CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_SYSCONFDIR /etc/root CACHE PATH "" FORCE)
+set (CMAKE_INSTALL_DATAROOTDIR /usr/share CACHE PATH "" FORCE)
+set (CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE)
+set (CMAKE_CUDA_STANDARD 14 CACHE STRING "" FORCE)
+set (PYTHIA8_DATA /usr/share/pythia8/xmldoc CACHE PATH "" FORCE) # sync with pythia8 package
+set (GLEW_DIR /usr/include/GL CACHE PATH "" FORCE) # need to set manually
+set (alien OFF CACHE BOOL "" FORCE)
+set (all OFF CACHE BOOL "" FORCE)
+set (asimage ON CACHE BOOL "" FORCE)
+set (builtin_afterimage OFF CACHE BOOL "" FORCE)
+set (builtin_clang ON CACHE BOOL "" FORCE)
+set (CLANG_ENABLE_STATIC_ANALYZER ON CACHE BOOL "" FORCE)
+set (CLANG_ANALYZER_BUILD_Z3 ON CACHE BOOL "" FORCE)
+set (builtin_cfitsio OFF CACHE BOOL "" FORCE)
+set (builtin_davix OFF CACHE BOOL "" FORCE)
+set (builtin_fftw3 OFF CACHE BOOL "" FORCE)
+set (builtin_ftgl OFF CACHE BOOL "" FORCE)
+set (builtin_freetype OFF CACHE BOOL "" FORCE)
+set (builtin_gl2ps OFF CACHE BOOL "" FORCE)
+set (builtin_glew OFF CACHE BOOL "" FORCE)
+set (builtin_gsl OFF CACHE BOOL "" FORCE)
+set (builtin_lzma OFF CACHE BOOL "" FORCE)
+set (builtin_llvm ON CACHE BOOL "" FORCE)
+set (builtin_openssl OFF CACHE BOOL "" FORCE)
+set (builtin_pcre OFF CACHE BOOL "" FORCE)
+set (builtin_tbb OFF CACHE BOOL "" FORCE)
+set (builtin_unuran OFF CACHE BOOL "" FORCE)
+set (builtin_vc OFF CACHE BOOL "" FORCE)
+set (builtin_xxhash OFF CACHE BOOL "" FORCE)
+set (builtin_xrootd OFF CACHE BOOL "" FORCE)
+set (builtin_zlib OFF CACHE BOOL "" FORCE)
+set (ccache ON CACHE BOOL "" FORCE)
+set (clad ON CACHE BOOL "" FORCE)
+set (cocoa OFF CACHE BOOL "" FORCE) # MacOS only
+set (cuda OFF CACHE BOOL "" FORCE)
+set (cudnn OFF CACHE BOOL "" FORCE)
+set (dataframe ON CACHE BOOL "" FORCE)
+set (davix OFF CACHE BOOL "" FORCE)
+set (dcache OFF CACHE BOOL "" FORCE)
+set (exceptions ON CACHE BOOL "" FORCE)
+set (fail-on-missing ON CACHE BOOL "" FORCE)
+set (fcgi ON CACHE BOOL "" FORCE)
+set (fftw3 ON CACHE BOOL "" FORCE)
+set (fitsio ON CACHE BOOL "" FORCE)
+set (fortran ON CACHE BOOL "" FORCE)
+set (gdml ON CACHE BOOL "" FORCE)
+set (genvector ON CACHE BOOL "" FORCE)
+set (gfal OFF CACHE BOOL "" FORCE)
+set (gl2ps ON CACHE BOOL "" FORCE)
+set (gminimal OFF CACHE BOOL "" FORCE)
+set (gnuinstall ON CACHE BOOL "" FORCE)
+set (gsl_shared ON CACHE BOOL "" FORCE)
+set (gviz ON CACHE BOOL "" FORCE)
+set (http ON CACHE BOOL "" FORCE)
+set (imt ON CACHE BOOL "" FORCE)
+set (jemalloc OFF CACHE BOOL "" FORCE)
+set (mathmore ON CACHE BOOL "" FORCE)
+set (minimal OFF CACHE BOOL "" FORCE)
+set (minuit2 ON CACHE BOOL "" FORCE)
+set (minuit2_mpi ON CACHE BOOL "" FORCE)
+set (minuit2_omp ON CACHE BOOL "" FORCE)
+set (mlp ON CACHE BOOL "" FORCE)
+set (monalisa OFF CACHE BOOL "" FORCE)
+set (mpi ON CACHE BOOL "" FORCE)
+set (mt ON CACHE BOOL "" FORCE)
+set (mysql ON CACHE BOOL "" FORCE)
+set (odbc ON CACHE BOOL "" FORCE)
+set (opengl ON CACHE BOOL "" FORCE)
+set (OpenGL_GL_PREFERENCE GLVND CACHE STRING "" FORCE) # use new policy since 3.11
+set (oracle OFF CACHE BOOL "" FORCE)
+set (pgsql ON CACHE BOOL "" FORCE)
+set (pythia6 OFF CACHE BOOL "" FORCE)
+set (pythia6_nolink OFF CACHE BOOL "" FORCE)
+set (pythia8 ON CACHE BOOL "" FORCE)
+set (pyroot ON CACHE BOOL "" FORCE)
+set (qt5web ON CACHE BOOL "" FORCE)
+set (roofit ON CACHE BOOL "" FORCE)
+set (root7 ON CACHE BOOL "" FORCE)
+set (roottest OFF CACHE BOOL "" FORCE)
+set (rpath OFF CACHE BOOL "" FORCE)
+set (runtime_cxxmodules OFF CACHE BOOL "" FORCE) # breaks python
+set (r OFF CACHE BOOL "" FORCE) # requires r-rcpp
+set (shadowpw ON CACHE BOOL "" FORCE)
+set (shared ON CACHE BOOL "" FORCE)
+set (soversion OFF CACHE BOOL "" FORCE)
+set (spectrum ON CACHE BOOL "" FORCE)
+set (sqlite ON CACHE BOOL "" FORCE)
+set (ssl ON CACHE BOOL "" FORCE)
+set (tbb ON CACHE BOOL "" FORCE)
+set (tcmalloc OFF CACHE BOOL "" FORCE)
+set (testing OFF CACHE BOOL "" FORCE)
+set (tmva ON CACHE BOOL "" FORCE)
+set (tmva-cpu ON CACHE BOOL "" FORCE)
+set (tmva-gpu OFF CACHE BOOL "" FORCE)
+set (tmva-pymva ON CACHE BOOL "" FORCE)
+set (unuran ON CACHE BOOL "" FORCE)
+set (vc ON CACHE BOOL "" FORCE)
+set (vdt ON CACHE BOOL "" FORCE)
+set (winrtdebug OFF CACHE BOOL "" FORCE) # windows only
+set (webgui ON CACHE BOOL "" FORCE)
+set (x11 ON CACHE BOOL "" FORCE)
+set (xml ON CACHE BOOL "" FORCE)
+set (xrootd ON CACHE BOOL "" FORCE)
Copied: root/repos/community-testing-x86_64/thisroot.fail (from rev 665090, root/trunk/thisroot.fail)
===================================================================
--- community-testing-x86_64/thisroot.fail (rev 0)
+++ community-testing-x86_64/thisroot.fail 2020-07-23 20:24:24 UTC (rev 665091)
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# thisroot.* scripts should not be used to
+# configure ROOT on Arch. Notify user and
+# return an error
+
+function fail {
+ printf '%s\n' "$1" >&2
+ exit "${2:-$1}"
+}
+
+fail "ERROR: $(basename $0) should never be used!" 1
More information about the arch-commits
mailing list