EXPERIMENTAL

foverlaps.disk.frame(
  df1,
  df2,
  by.x = if (identical(shardkey(df1)$shardkey, "")) shardkey(df1)$shardkey else
    shardkey(df2)$shardkey,
  by.y = shardkey(df2)$shardkey,
  ...,
  outdir = tempfile("df_foverlaps_tmp", fileext = ".df"),
  merge_by_chunk_id = FALSE,
  compress = 50,
  overwrite = TRUE
)

Arguments

df1

A disk.frame

df2

A disk.frame or a data.frame

by.x

character/string vector. by.x used in foverlaps

by.y

character/string vector. by.x used in foverlaps

...

passed to data.table::foverlaps and disk.frame::cmap.disk.frame

outdir

The output directory of the disk.frame

merge_by_chunk_id

If TRUE then the merges will happen for chunks in df1 and df2 with the same chunk id which speed up processing. Otherwise every chunk of df1 is merged with every chunk of df2. Ignored with df2 is not a disk.frame

compress

The compression ratio for fst

overwrite

overwrite existing directory

Examples

library(data.table)

## simple example:
x = as.disk.frame(data.table(start=c(5,31,22,16), end=c(8,50,25,18), val2 = 7:10))
y = as.disk.frame(data.table(start=c(10, 20, 30), end=c(15, 35, 45), val1 = 1:3))
byxy = c("start", "end")
xy.df = foverlaps.disk.frame(
   x, y, by.x = byxy, by.y = byxy,
  merge_by_chunk_id = TRUE, overwrite = TRUE)
# clean up
delete(x)
delete(y)
delete(xy.df)