diff --git a/ci/Dockerfile b/ci/Dockerfile index 94750e7..25a3586 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -1,7 +1,6 @@ -FROM jkleinh/slurm-cluster@sha256:afd20dafc831b0fa781460dc871232579ccf1b54955e434531394c331ce388e4 as base -MAINTAINER Joseph Kleinhenz +FROM ghcr.io/kleinhenz/docker-slurm-cluster@sha256:c62b169970eaab879898a7df5950f888b0417147e06f8c0e32fd15087b22d9f2 -ARG JULIA_VERSION=1.6.0 +ARG JULIA_VERSION=1.11.2 RUN mkdir -p /home/docker/.local/opt/julia \ && cd /home/docker/.local/opt/julia \ diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index e9b60e5..bb06d41 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -1,8 +1,9 @@ -version: "3.3" - services: slurmctld: image: slurm-cluster-julia + build: + context: .. + dockerfile: ci/Dockerfile command: ["slurmctld"] container_name: slurmctld hostname: slurmctld diff --git a/test/runtests.jl b/test/runtests.jl index f03d53d..307f1fe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,33 +10,33 @@ using Distributed, Test, SlurmClusterManager project_path = abspath(joinpath(@__DIR__, "..")) println("project_path = $project_path") jobid = withenv("JULIA_PROJECT"=>project_path) do - read(`sbatch --export=ALL --parsable -n 4 -o test.out script.jl`, String) + strip(read(`sbatch --export=ALL --parsable -n 4 -o test.out script.jl`, String)) end println("jobid = $jobid") # get job state from jobid -getjobstate = jobid -> read(`sacct -j $jobid --format=state --noheader`, String) +getjobstate = jobid -> begin + cmd = Cmd(`scontrol show jobid=$jobid`, ignorestatus=true) + info = read(cmd, String) + state = match(r"JobState=(\S*)", info) + return state === nothing ? nothing : state.captures[1] +end # wait for job to complete status = timedwait(60.0, pollint=1.0) do state = getjobstate(jobid) - state == "" && return false - state = first(split(state)) # don't care about jobsteps + state == nothing && return false println("jobstate = $state") return state == "COMPLETED" || state == "FAILED" end +state = getjobstate(jobid) + # check that job finished running within timelimit (either completed or failed) @test status == :ok +@test state == "COMPLETED" # print job output output = read("test.out", String) println("script output:") println(output) - -state = getjobstate(jobid) |> split -# length should be two because creating the workers creates a job step -@test length(state) == 2 - -# check that everything exited without errors -@test all(state .== "COMPLETED") diff --git a/test/script.jl b/test/script.jl index 060760a..1da6c49 100644 --- a/test/script.jl +++ b/test/script.jl @@ -9,5 +9,6 @@ hosts = map(workers()) do id remotecall_fetch(() -> gethostname(), id) end sort!(hosts) +println(hosts) @assert hosts == ["c1", "c1", "c2", "c2"]