|
3 | 3 | # We don't use `using Foo` here.
|
4 | 4 | # We either use `using Foo: hello, world`, or we use `import Foo`.
|
5 | 5 | # https://github.com/JuliaLang/julia/pull/42080
|
6 |
| -using Distributed: addprocs, workers, nworkers, remotecall_fetch |
| 6 | +using Distributed: addprocs, workers, nworkers, remotecall_fetch, @everywhere |
7 | 7 | using SlurmClusterManager: SlurmManager
|
8 | 8 |
|
9 | 9 | addprocs(SlurmManager())
|
10 | 10 |
|
| 11 | +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 12 | +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 13 | +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 14 | + |
| 15 | +# To run tests outside of CI, set e.g. |
| 16 | +# `export JULIA_SLURMCLUSTERMANAGER_IS_CI=false` |
| 17 | +# in your Bash session before you launch the Slurm job. |
| 18 | +function is_ci() |
| 19 | + name = "JULIA_SLURMCLUSTERMANAGER_IS_CI" |
| 20 | + |
| 21 | + # We intentionally default to true. |
| 22 | + # This allows things to work in our CI (which is inside of Docker). |
| 23 | + default_value = "true" |
| 24 | + |
| 25 | + value_str = strip(get(ENV, name, default_value)) |
| 26 | + value_b = parse(Bool, value_str) |
| 27 | + return value_b |
| 28 | +end |
| 29 | + |
| 30 | +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 31 | +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 32 | +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| 33 | + |
11 | 34 | # We intentionally do not use `@assert` here.
|
12 | 35 | # In a future minor release of Julia, `@assert`s may be disabled by default.
|
13 |
| -const SLURM_NTASKS = parse(Int, ENV["SLURM_NTASKS"]) |
| 36 | +const SLURM_NTASKS = parse(Int, strip(ENV["SLURM_NTASKS"])) |
14 | 37 | if nworkers() != SLURM_NTASKS
|
15 | 38 | msg = "Test failed: nworkers=$(nworkers()) does not match SLURM_NTASKS=$(SLURM_NTASKS)"
|
16 | 39 | error(msg)
|
17 | 40 | end
|
| 41 | +if length(workers()) != SLURM_NTASKS |
| 42 | + msg = "Test failed: length(workers())=$(length(workers())) does not match SLURM_NTASKS=$(SLURM_NTASKS)" |
| 43 | + error(msg) |
| 44 | +end |
18 | 45 |
|
19 | 46 | const hosts = map(workers()) do id
|
20 | 47 | remotecall_fetch(() -> gethostname(), id)
|
21 | 48 | end
|
22 | 49 | sort!(hosts)
|
23 | 50 | println("List of hosts: ", hosts)
|
24 | 51 |
|
25 |
| -# We don't use `@assert` here, for reason described above. |
26 |
| -if hosts != ["c1", "c1", "c2", "c2"] |
27 |
| - msg = "Test failed: observed_hosts=$(hosts) does not match expected_hosts=[c1, c1, c2, c2]" |
28 |
| - error(msg) |
| 52 | +if is_ci() |
| 53 | + @info "This is CI, so we will perform the hostname test" |
| 54 | + |
| 55 | + # We don't use `@assert` here, for reason described above. |
| 56 | + if hosts != ["c1", "c1", "c2", "c2"] |
| 57 | + msg = "Test failed: observed_hosts=$(hosts) does not match expected_hosts=[c1, c1, c2, c2]" |
| 58 | + error(msg) |
| 59 | + end |
| 60 | +else |
| 61 | + @warn "This is not CI, so we will skip the hostname test" |
29 | 62 | end
|
| 63 | + |
| 64 | +@everywhere import Distributed |
| 65 | + |
| 66 | +# Workers report in: |
| 67 | +@everywhere println("Host $(Distributed.myid()): $(gethostname())") |
0 commit comments