Rework how local toolkits are selected.

JuliaGPU · Aug 24, 2023 · 90fe32a · 90fe32a
1 parent 5cc0ff2
commit 90fe32a
Show file tree

Hide file tree

Showing 14 changed files with 166 additions and 133 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -18,7 +18,7 @@ steps:
           queue: "juliagpu"
           cuda: "*"
         commands: |
-          echo -e "[CUDA_Runtime_jll]\nversion = \"local\"" >LocalPreferences.toml
+          echo -e "[CUDA_Runtime_jll]\nlocal = \"true\"" >LocalPreferences.toml
         if: build.message !~ /\[skip tests\]/ &&
             build.message !~ /\[skip julia\]/
         timeout_in_minutes: 120

diff --git a/Dockerfile b/Dockerfile
@@ -10,14 +10,12 @@ ENV JULIA_DEPOT_PATH=/usr/local/share/julia
 RUN julia -e 'using Pkg; Pkg.add("CUDA")'
 
 # hard-code a CUDA toolkit version
-RUN julia -e 'using CUDA; CUDA.set_runtime_version!(v"11.8")'
+RUN julia -e 'using CUDA; CUDA.set_runtime_version!(v"12.2")'
 # re-importing CUDA.jl below will trigger a download of the relevant artifacts
 
 # generate the device runtime library for all known and supported devices.
 # this is to avoid having to do this over and over at run time.
-RUN julia -e 'using CUDA; CUDA.precompile_runtime()' && \
-    chmod 644 /usr/local/share/julia/compiled/v1.8/GPUCompiler/*/*.bc
-    # TODO: fix this in GPUCompiler.jl
+RUN julia -e 'using CUDA; CUDA.precompile_runtime()'
 
 
 # user environment

diff --git a/LocalPreferences.toml b/LocalPreferences.toml
@@ -1,8 +1,12 @@
 [CUDA_Runtime_jll]
-# which CUDA runtime to use; can be set to a supported
-# version number or to "local" for a local installation.
+# which CUDA runtime to use. in normal cases, this will be auto-detected, but you need to
+# set this preference if you want to precompile CUDA.jl in an envirnonment without CUDA
 #version = "11.8"
 
+# whether to use a local CUDA installation. if CUDA isn't available during precompilation,
+# you will also need to set the "version" preference, matching the local CUDA version.
+#local = "false"
+
 [CUDA]
 # whether to use a nonblocking synchronization mechanism,
 # making it possible to do use cooperative multitasking.

diff --git a/Manifest.toml b/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.8.5"
 manifest_format = "2.0"
-project_hash = "62b8680a215cbfae7c33bbe0a528c176464c0f65"
+project_hash = "e8fd324aba09b72a771344d3910c4e1284ae62a0"
 
 [[deps.AbstractFFTs]]
 deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
@@ -57,9 +57,9 @@ version = "0.2.2"
 
 [[deps.CUDA_Runtime_jll]]
 deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
-git-tree-sha1 = "2d1a9bacfcde420c3b3c16e8f72ee037bf35ba1e"
+git-tree-sha1 = "5b88716bfcb250bc036f9d4fa2915f72ef5bf17e"
 uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
-version = "0.8.0+0"
+version = "0.9.0+0"
 
 [[deps.ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
@@ -272,18 +272,18 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [[deps.LogExpFunctions]]
 deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "5ab83e1679320064c29e8973034357655743d22d"
+git-tree-sha1 = "7d6dd4e9212aebaeed356de34ccf262a3cd415aa"
 uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.25"
+version = "0.3.26"
 
 [[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
 [[deps.MacroTools]]
 deps = ["Markdown", "Random"]
-git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2"
+git-tree-sha1 = "9ee1618cbf5240e6d4e0371d6f24065083f60c48"
 uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.10"
+version = "0.5.11"
 
 [[deps.Markdown]]
 deps = ["Base64"]
@@ -360,9 +360,9 @@ version = "1.4.2"
 
 [[deps.PrecompileTools]]
 deps = ["Preferences"]
-git-tree-sha1 = "9673d39decc5feece56ef3940e5dafba15ba0f81"
+git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
 uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
-version = "1.1.2"
+version = "1.2.0"
 
 [[deps.Preferences]]
 deps = ["TOML"]

diff --git a/Project.toml b/Project.toml
@@ -42,7 +42,7 @@ BFloat16s = "0.2, 0.3, 0.4"
 CEnum = "0.2, 0.3, 0.4"
 CUDA_Driver_jll = "0.6"
 CUDA_Runtime_Discovery = "0.2"
-CUDA_Runtime_jll = "0.8"
+CUDA_Runtime_jll = "0.9"
 Crayons = "4"
 DataFrames = "1"
 ExprTools = "0.1"

diff --git a/docs/src/installation/overview.md b/docs/src/installation/overview.md
@@ -118,8 +118,7 @@ You can choose which version to (try to) download and use by calling
 julia> using CUDA
 
 julia> CUDA.set_runtime_version!(v"11.8")
-┌ Warning: CUDA Runtime version set to 11.8, please re-start Julia for this to take effect.
-└ @ CUDA /usr/local/share/julia/packages/CUDA/irdEw/lib/cudadrv/version.jl:54
+[ Info: Set CUDA.jl toolkit preference to use CUDA 11.8.0 from artifact sources, please re-start Julia for this to take effect.
 ```
 
 This generates the following `LocalPreferences.toml` file in your active environment:
@@ -134,7 +133,8 @@ only select artifacts that are compatible with the configured CUDA runtime.
 
 ### Using a local CUDA
 
-To use a local installation, you can invoke the same API but set the version to `"local"`:
+To use a local installation, you set the `local_toolkit` keyword argument to
+`CUDA.set_runtime_version!`:
 
 ```
 julia> using CUDA
@@ -143,9 +143,8 @@ julia> CUDA.versioninfo()
 CUDA runtime 11.8, artifact installation
 ...
 
-julia> CUDA.set_runtime_version!("local")
-┌ Warning: CUDA Runtime version set to local, please re-start Julia for this to take effect.
-└ @ CUDA ~/Julia/pkg/CUDA/lib/cudadrv/version.jl:73
+julia> CUDA.set_runtime_version!(local_toolkit=true)
+[ Info: Set CUDA.jl toolkit preference to use CUDA from the local system, please re-start Julia for this to take effect.
 ```
 
 After re-launching Julia:
@@ -163,7 +162,7 @@ your active environment:
 
 ```
 [CUDA_Runtime_jll]
-version = "local"
+local = "true"
 ```
 
 This preference not only configures CUDA.jl to use a local toolkit, it also prevents
@@ -174,53 +173,33 @@ If CUDA.jl doesn't properly detect your local toolkit, it may be that certain li
 binaries aren't on a globally-discoverable path. For more information, run Julia with the
 `JULIA_DEBUG` environment variable set to `CUDA_Runtime_Discovery`.
 
-Note that setting the version to `"local"` disables use of *any* CUDA-related JLL, not just
-of `CUDA_Runtime_jll`. This is out of necessity: JLLs are baked in the precompilation image at
-compile time, while local toolkit discovery happens at run time; this inconsistency makes it
-impossible to select a compatible artifact for the JLLs. If you care about other JLLs, use
-CUDA from artifacts.
-
-
-## Containers
-
-CUDA.jl is container friendly: You can install, precompile, and even import the package on a
-system without a GPU:
-
-```
-$ docker run --rm -it julia   # note how we're *not* using `--gpus=all` here,
-                              # so we won't have access to a GPU (or its driver)
-
-pkg> add CUDA
-
-pkg> precompile
-Precompiling project...
-[ Info: Precompiling CUDA [052768ef-5323-5732-b1bb-66c8b64840ba]
-```
+Note that using a local toolkit instead of artifacts *any* CUDA-related JLL, not just of
+`CUDA_Runtime_jll`. Any package that depends on such a JLL needs to inspect
+`CUDA.local_toolkit`, and if set use `CUDA_Runtime_Discovery` to detect libraries and
+binaries instead.
 
-The above is common when building a container (`docker build` does not take a `--gpus`
-argument). It does prevent CUDA.jl from downloading the toolkit artifacts that will be
-required at run time, because it cannot query the driver for the CUDA compatibility level.
 
-To avoid having to download the CUDA toolkit artifacts each time you restart your container,
-it's possible to inform CUDA.jl which toolkit to use. This can be done by calling
-`CUDA.set_runtime_version!` when building the container, after which a subsequent import
-of CUDA.jl will download the necessary artifacts.
+## Precompiling CUDA.jl without CUDA
 
-At run time you obviously do need a CUDA-compatible GPU as well as the CUDA driver library
-to interface with it. Typically, that library is imported from the host system, e.g., by
-launching `docker` using the `--gpus=all` flag:
+CUDA.jl can be precompiled and imported on systems without a GPU or CUDA installation. This
+simplifies the situation where an application optionally uses CUDA. However, when CUDA.jl
+is precompiled in such an environment, it *cannot* be used to run GPU code. This is a
+result of artifacts being selected at precompile time.
 
-```
-$ docker run --rm -it --gpus=all julia
+In some cases, e.g. with containers or HPC log-in nodes, you may want to precompile CUDA.jl
+on a system without CUDA, yet still want to have it download the necessary artifacts and/or
+produce a precompilation image that can be used on a system with CUDA. This can be achieved
+by informing CUDA.jl which CUDA toolkit to run time by calling `CUDA.set_runtime_version!`.
 
-julia> using CUDA
-
-julia> CUDA.versioninfo()
-CUDA runtime 11.8
-CUDA driver 11.8
-NVIDIA driver 520.56.6
-
-...
-```
+When using artifacts, that's as simple as e.g. calling `CUDA.set_runtime_version!(v"11.8")`,
+and afterwards re-starting Julia and re-importing CUDA.jl in order to trigger precompilation
+again and download the necessary artifacts. If you want to use a local CUDA installation,
+you also need to set the `local_toolkit` keyword argument, e.g., by calling
+`CUDA.set_runtime_version!(v"11.8"; local_toolkit=true)`. Note that the version specified
+here needs to match what will be available at run time. In both cases, i.e. when using
+artifacts or a local toolkit, the chosen version needs to be compatible with the available
+driver.
 
-All of the above is demonstrated in the Dockerfile that's part of the CUDA.jl repository.
+Finally, in such a scenario you may also want to call `CUDA.precompile_runtime()` to ensure
+that the GPUCompiler runtime library is precompiled as well. This and all of the above is
+demonstrated in the Dockerfile that's part of the CUDA.jl repository.
diff --git a/lib/cudadrv/version.jl b/lib/cudadrv/version.jl
@@ -43,28 +43,45 @@ function runtime_version()
 end
 
 """
-    set_runtime_version!([version])
+    set_runtime_version!([version::VersionNumber]; local_toolkit=false)
 
-Sets the CUDA Runtime version preference to `version`. This can be a version number, in
-which case such a versioned artifact will be attempted to be used; or "local" for using a
-runtime from the local system. Invoke this function without an argument to reset the
-preference, in which case CUDA.jl will use the most recent compatible runtime available.
+Configures CUDA.jl to use a specific CUDA toolkit version from a specific source.
+
+If `local_toolkit` is set, the CUDA toolkit will be used from the local system, otherwise
+it will be downloaded from an artifact source. In the case of a local toolkit, `version`
+informs CUDA.jl which version that is (this may be useful if auto-detection fails). In
+the case of artifact sources, `version` controls which version will be downloaded and used.
+
+See also: [`reset_runtime_version!`](@ref).
 """
-function set_runtime_version!(version::String)
-    Preferences.set_preferences!(CUDA_Runtime_jll, "version" => version; force=true)
-    @info "Set CUDA Runtime version preference to $version, please re-start Julia for this to take effect."
-    if VERSION <= v"1.6.5" || VERSION == v"1.7.0"
-        @warn """Due to a bug in Julia (until 1.6.5 and 1.7.1) your environment needs to directly include CUDA_Runtime_jll for this to work."""
+function set_runtime_version!(version::Union{Nothing,VersionNumber}=nothing;
+                              local_toolkit::Bool=false)
+    if version !== nothing
+        Preferences.set_preferences!(CUDA_Runtime_jll, "version" => "$(version.major).$(version.minor)"; force=true)
+    else
+        Preferences.delete_preferences!(CUDA_Runtime_jll, "version"; force=true)
+    end
+    if local_toolkit
+        Preferences.set_preferences!(CUDA_Runtime_jll, "local" => "true"; force=true)
+    else
+        # the default is "false"
+        Preferences.delete_preferences!(CUDA_Runtime_jll, "local"; force=true)
     end
+    @info "Set CUDA.jl toolkit preference to use $(version === nothing ? "CUDA" : "CUDA $version") from $(local_toolkit ? "the local system" : "artifact sources"), please re-start Julia for this to take effect."
 end
-set_runtime_version!(version::VersionNumber) =
-    set_runtime_version!("$(version.major).$(version.minor)")
-function set_runtime_version!()
+
+"""
+    reset_runtime_version!()
+
+Resets the CUDA Runtime version preference to the default, which is to use the most recent
+compatible runtime available from an artifact source.
+
+See also: [`set_runtime_version!`](@ref).
+"""
+function reset_runtime_version!()
     Preferences.delete_preferences!(CUDA_Runtime_jll, "version"; force=true)
-    @info "Reset CUDA Runtime version preference, please re-start Julia for this to take effect."
-    if VERSION <= v"1.6.5" || VERSION == v"1.7.0"
-        @warn """Due to a bug in Julia (until 1.6.5 and 1.7.1) your environment needs to directly include CUDA_Runtime_jll for this to work."""
-    end
+    Preferences.delete_preferences!(CUDA_Runtime_jll, "local"; force=true)
+    @info "Reset CUDA.jl toolkit preference, please re-start Julia for this to take effect."
 end
 
 

diff --git a/lib/cudnn/src/cuDNN.jl b/lib/cudnn/src/cuDNN.jl
@@ -11,11 +11,14 @@ using CUDA
 using CUDA.APIUtils
 using CUDA: CUstream, libraryPropertyType
 using CUDA: retry_reclaim, isdebug, initialize_context
-using CUDA: CUDA_Runtime, CUDA_Runtime_jll
 
 using CEnum: @cenum
 
-import CUDNN_jll
+if CUDA.local_toolkit
+    using CUDA_Runtime_Discovery
+else
+    import CUDNN_jll
+end
 
 
 export has_cudnn
@@ -154,21 +157,22 @@ function __init__()
 
     CUDA.functional() || return
 
+    # find the library
     global libcudnn
-    if CUDA_Runtime == CUDA_Runtime_jll
-        if !CUDNN_jll.is_available()
-            precompiling || @error "cuDNN is not available for your platform ($(Base.BinaryPlatforms.triplet(CUDNN_jll.host_platform)))"
-            return
-        end
-        libcudnn = CUDNN_jll.libcudnn
-    else
-        dirs = CUDA_Runtime.find_toolkit()
-        path = CUDA_Runtime.get_library(dirs, "cudnn"; optional=true)
+    if CUDA.local_toolkit
+        dirs = CUDA_Runtime_Discovery.find_toolkit()
+        path = CUDA_Runtime_Discovery.get_library(dirs, "cudnn"; optional=true)
         if path === nothing
             precompiling || @error "cuDNN is not available on your system (looked in $(join(dirs, ", ")))"
             return
         end
         libcudnn = path
+    else
+        if !CUDNN_jll.is_available()
+            precompiling || @error "cuDNN is not available for your platform ($(Base.BinaryPlatforms.triplet(CUDNN_jll.host_platform)))"
+            return
+        end
+        libcudnn = CUDNN_jll.libcudnn
     end
 
     # register a log callback

diff --git a/lib/custatevec/src/cuStateVec.jl b/lib/custatevec/src/cuStateVec.jl
@@ -3,11 +3,14 @@ module cuStateVec
 using CUDA
 using CUDA: CUstream, cudaDataType, @checked, HandleCache, with_workspace, libraryPropertyType
 using CUDA: unsafe_free!, retry_reclaim, initialize_context, isdebug
-using CUDA: CUDA_Runtime, CUDA_Runtime_jll
 
 using CEnum: @cenum
 
-import cuQuantum_jll
+if CUDA.local_toolkit
+    using CUDA_Runtime_Discovery
+else
+    import cuQuantum_jl
+end
 
 
 export has_custatevec
@@ -106,21 +109,22 @@ function __init__()
 
     CUDA.functional() || return
 
+    # find the library
     global libcustatevec
-    if CUDA_Runtime == CUDA_Runtime_jll
-        if !cuQuantum_jll.is_available()
-            precompiling || @error "cuQuantum is not available for your platform ($(Base.BinaryPlatforms.triplet(cuQuantum_jll.host_platform)))"
-            return
-        end
-        libcustatevec = cuQuantum_jll.libcustatevec
-    else
+    if CUDA.local_toolkit
         dirs = CUDA_Runtime.find_toolkit()
         path = CUDA_Runtime.get_library(dirs, "custatevec"; optional=true)
         if path === nothing
             precompiling || @error "cuQuantum is not available on your system (looked for custatevec in $(join(dirs, ", ")))"
             return
         end
         libcustatevec = path
+    else
+        if !cuQuantum_jll.is_available()
+            precompiling || @error "cuQuantum is not available for your platform ($(Base.BinaryPlatforms.triplet(cuQuantum_jll.host_platform)))"
+            return
+        end
+        libcustatevec = cuQuantum_jll.libcustatevec
     end
 
     # register a log callback