Increase or decrease the number of chunks in the disk.frame

rechunk(
  df,
  nchunks,
  outdir = attr(df, "path", exact = TRUE),
  shardby = NULL,
  overwrite = TRUE,
  shardby_function = "hash",
  sort_splits = NULL,
  desc_vars = NULL
)

Arguments

df

the disk.frame to rechunk

nchunks

number of chunks

outdir

the output directory

shardby

the shardkeys

overwrite

overwrite the output directory

shardby_function

splitting of chunks: "hash" for hash function or "sort" for semi-sorted chunks

sort_splits

for the "sort" shardby function, a dataframe with the split values.

desc_vars

for the "sort" shardby function, the variables to sort descending.

Examples

# create a disk.frame with 2 chunks in tempdir() cars.df = as.disk.frame(cars, nchunks = 2) # re-chunking cars.df to 3 chunks, done "in-place" to the same folder as cars.df rechunk(cars.df, 3)
#> files have been backed up to temporary dir C:\Users\RTX2080\AppData\Local\Temp\RtmpInritK\back_up_tmp_dir187c185718b5. You can recover there files until you restart your R session
#> path: "C:\Users\RTX2080\AppData\Local\Temp\RtmpInritK\file187c36421ee.df" #> nchunks: 3 #> nrow (at source): 50 #> ncol (at source): 2 #> nrow (post operations): ??? #> ncol (post operations): ???
new_path = tempfile(fileext = ".df") # re-chunking cars.df to 4 chunks, shard by speed, and done "out-of-place" to a new directory cars2.df = rechunk(cars.df, 4, outdir=new_path, shardby = "speed")
#> Hashing...
#> Hashing...
#> Hashing...
#> Appending disk.frames:
# clean up cars.df delete(cars.df) delete(cars2.df)