Issue with running Julia batch task

Hello, I am trying to submit a batch task through JuliaHub. I am using the full path to read the dataset like the following:

using Pumas
using CSV
using DataFramesMeta
using DataFrames
using DataFramesMeta
using PumasPlots
using Distributed
using PumasUtilities
using Random
using AlgebraOfGraphics
using CairoMakie
using Serialization

#Pkg.add("Serialization") 

cd(@__DIR__)
println(pwd())

println("nworkers = ", Distributed.nworkers())

mychainscat(x...) = reduce(Pumas.chainscat, x[2:end], init = x[1])

df = CSV.read("/home/jrun/data/code/ECMO_UMMC/UFH_PEDS_PK.csv",DataFrame)

However, the task fails and I receive the following error message:

Error running user code
Timestamp: 23 Dec 2023, 4:06 PM-1703365605160
Worker: jr-qjpkexnwvi-6g58r, id: 1
Thread: 1
Module: Main.JuliaRunJob
Level: Error
Group: master_startup
Exception:
LoadError: ArgumentError: "/home/jrun/data/code/ECMO_UMMC/UFH_PEDS_PK.csv" is not a valid file or doesn't exist
Stacktrace:
  [1] CSV.Context(source::CSV.Arg, header::CSV.Arg, normalizenames::CSV.Arg, datarow::CSV.Arg, skipto::CSV.Arg, footerskip::CSV.Arg, transpose::CSV.Arg, comment::CSV.Arg, ignoreemptyrows::CSV.Arg, ignoreemptylines::CSV.Arg, select::CSV.Arg, drop::CSV.Arg, limit::CSV.Arg, buffer_in_memory::CSV.Arg, threaded::CSV.Arg, ntasks::CSV.Arg, tasks::CSV.Arg, rows_to_check::CSV.Arg, lines_to_check::CSV.Arg, missingstrings::CSV.Arg, missingstring::CSV.Arg, delim::CSV.Arg, ignorerepeated::CSV.Arg, quoted::CSV.Arg, quotechar::CSV.Arg, openquotechar::CSV.Arg, closequotechar::CSV.Arg, escapechar::CSV.Arg, dateformat::CSV.Arg, dateformats::CSV.Arg, decimal::CSV.Arg, truestrings::CSV.Arg, falsestrings::CSV.Arg, stripwhitespace::CSV.Arg, type::CSV.Arg, types::CSV.Arg, typemap::CSV.Arg, pool::CSV.Arg, downcast::CSV.Arg, lazystrings::CSV.Arg, stringtype::CSV.Arg, strict::CSV.Arg, silencewarnings::CSV.Arg, maxwarnings::CSV.Arg, debug::CSV.Arg, parsingdebug::CSV.Arg, validate::CSV.Arg, streaming::CSV.Arg)
    @ CSV /build/_work/PumasSystemImages/PumasSystemImages/julia_depot/packages/CSV/b8ebJ/src/context.jl:306
  [2] #File#28
    @ /build/_work/PumasSystemImages/PumasSystemImages/julia_depot/packages/CSV/b8ebJ/src/file.jl:221 [inlined]
  [3] File
    @ /build/_work/PumasSystemImages/PumasSystemImages/julia_depot/packages/CSV/b8ebJ/src/file.jl:162 [inlined]
  [4] read(source::String, sink::Type; copycols::Bool, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CSV /build/_work/PumasSystemImages/PumasSystemImages/julia_depot/packages/CSV/b8ebJ/src/CSV.jl:117
  [5] read(source::String, sink::Type)
    @ CSV /build/_work/PumasSystemImages/PumasSystemImages/julia_depot/packages/CSV/b8ebJ/src/CSV.jl:113
  [6] top-level scope
    @ /opt/juliahub/code.jl:23
  [7] include(fname::String)
    @ Base.MainInclude ./client.jl:476
  [8] top-level scope
    @ none:1
  [9] eval
    @ ./boot.jl:368 [inlined]
 [10] eval
    @ ./client.jl:478 [inlined]
 [11] eval_user_code(expression::Expr)
    @ Main.JuliaRunJob /opt/juliahub/master_startup.jl:10
 [12] (::Main.JuliaRunJob.var"#71#80"{Main.JuliaRunJob.JobContext})()
    @ Main.JuliaRunJob /opt/juliahub/master_startup.jl:274
 [13] user_code_env(f::Main.JuliaRunJob.var"#71#80"{Main.JuliaRunJob.JobContext}, ctx::Main.JuliaRunJob.JobContext, require::Set{String}, allow::Set{String}, out::Set{String})
    @ Main.JuliaRunJob /opt/juliahub/job_context.jl:410
 [14] (::Main.JuliaRunJob.var"#68#77"{IOBuffer, Main.JuliaRunJob.JobContext, LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}})()
    @ Main.JuliaRunJob /opt/juliahub/master_startup.jl:252
 [15] macro expansion
    @ /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:22 [inlined]
 [16] macro expansion
    @ ./task.jl:454 [inlined]
 [17] _redirect_to_logger(f::Main.JuliaRunJob.var"#68#77"{IOBuffer, Main.JuliaRunJob.JobContext, LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}}, logger::LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}, level_for_logs::Base.CoreLogging.LogLevel, redirect_func::Base.RedirectStdStream, prev_stream::Base.PipeEndpoint, stream_name::Symbol)
    @ Logging2 /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:19
 [18] (::Base.RedirectStdStream)(f::Function, logger::LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}; level::Base.CoreLogging.LogLevel)
    @ Logging2 /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:63
 [19] RedirectStdStream
    @ /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:55 [inlined]
 [20] #67
    @ /opt/juliahub/master_startup.jl:191 [inlined]
 [21] macro expansion
    @ /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:22 [inlined]
 [22] macro expansion
    @ ./task.jl:454 [inlined]
 [23] _redirect_to_logger(f::Main.JuliaRunJob.var"#67#76"{IOBuffer, Main.JuliaRunJob.JobContext, LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}}, logger::LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}, level_for_logs::Base.CoreLogging.LogLevel, redirect_func::Base.RedirectStdStream, prev_stream::Base.PipeEndpoint, stream_name::Symbol)
    @ Logging2 /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:19
 [24] (::Base.RedirectStdStream)(f::Function, logger::LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}; level::Base.CoreLogging.LogLevel)
    @ Logging2 /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:63
 [25] RedirectStdStream
    @ /opt/juliahub/projects/internal_depot/packages/Logging2/EwyL4/src/Logging2.jl:55 [inlined]
 [26] (::Main.JuliaRunJob.var"#66#75"{IOBuffer, Main.JuliaRunJob.JobContext, LoggingExtras.TeeLogger{Tuple{JuliaRunLoggers.JuliaRunLogger}}})()
    @ Main.JuliaRunJob /opt/juliahub/master_startup.jl:190
 [27] with_logstate(f::Function, logstate::Any)
    @ Base.CoreLogging ./logging.jl:511
 [28] with_logger
    @ ./logging.jl:623 [inlined]
 [29] execute_wrapped(config_io::IOBuffer, ctx::Main.JuliaRunJob.JobContext)
    @ Main.JuliaRunJob /opt/juliahub/master_startup.jl:189
 [30] main
    @ /opt/juliahub/master_startup.jl:172 [inlined]
 [31] main(config_io::IOBuffer) (repeats 2 times)
    @ Main.JuliaRunJob /opt/juliahub/master_startup.jl:166
 [32] top-level scope
    @ /opt/juliahub/master_startup.jl:315
 [33] include(mod::Module, _path::String)
    @ Base ./Base.jl:419
 [34] exec_options(opts::Base.JLOptions)
    @ Base ./client.jl:303
 [35] _start()
    @ Base ./client.jl:522
in expression starting at /opt/juliahub/code.jl:23

It seems the dataset cannot be read although I can still run it if I am using the REPL. This code used to run previously. I am wondering if there is another way on how to read the datasets from code submitted as batch job. I attached my batch job specifications.

Hi Ahmed,

When running a batch job you shouldn’t rely on accessing your usual “data” folder. However, you are right that this has sometimes has been working but it was not intentional. The batch job script is send off to the cloud and executed on a completely different system so anything that the script needs to access would have to be made available either through the bundle mechanism (see more here) or as a DataSet.

If you go with the bundle solution then you’d typically select the root folder of your project as the bundle directory. However, be careful to exclude large unrelated folders by including them in a .juliabundleignore file. It works similarly to .gitignore and it should be located in the bundle directory. If you don’t exclude large folders then you will send a lot of unnecessary data around which will slow down the job and you might also exceed the bundle limit.

Alternative, you could upload your data as a JuliaHub DataSet and then access it with dataset function. See the documentation for details. I hope one of these solutions will resolve your issue.

Best
Andreas

1 Like

Thank you so much Andreas for the detailed answer. I tried you suggestion and it worked!