R/anti_join.r
, R/full_join.r
, R/inner_join.r
, and 2 more
join.Rd
Performs join/merge for disk.frames
# S3 method for disk.frame
anti_join(
x,
y,
by = NULL,
copy = FALSE,
...,
outdir = tempfile("tmp_disk_frame_anti_join"),
merge_by_chunk_id = FALSE,
overwrite = TRUE,
.progress = FALSE
)
# S3 method for disk.frame
full_join(
x,
y,
by = NULL,
copy = FALSE,
...,
outdir = tempfile("tmp_disk_frame_full_join"),
overwrite = TRUE,
merge_by_chunk_id,
.progress = FALSE
)
# S3 method for disk.frame
inner_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = FALSE,
outdir = tempfile("tmp_disk_frame_inner_join"),
merge_by_chunk_id = NULL,
overwrite = TRUE,
.progress = FALSE
)
# S3 method for disk.frame
left_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = FALSE,
outdir = tempfile("tmp_disk_frame_left_join"),
merge_by_chunk_id = FALSE,
overwrite = TRUE,
.progress = FALSE
)
# S3 method for disk.frame
semi_join(
x,
y,
by = NULL,
copy = FALSE,
...,
outdir = tempfile("tmp_disk_frame_semi_join"),
merge_by_chunk_id = FALSE,
overwrite = TRUE,
.progress = FALSE
)
a disk.frame
a data.frame or disk.frame. If data.frame then returns lazily; if disk.frame it performs the join eagerly and return a disk.frame
join by
same as dplyr::anti_join
same as dplyr's joins
output directory for disk.frame
the merge is performed by chunk id
overwrite output directory
Show progress or not. Defaults to FALSE
see dplyr::XXX_join
see dplyr::XXX_join
disk.frame or data.frame/data.table
df.df = as.disk.frame(data.frame(x = 1:3, y = 4:6), overwrite = TRUE)
df2.df = as.disk.frame(data.frame(x = 1:2, z = 10:11), overwrite = TRUE)
anti_joined.df = anti_join(df.df, df2.df)
#> Warning: merge_by_chunk_id = FALSE. This will take significantly longer and the preparations needed are performed eagerly which may lead to poor performance. Consider making y a data.frame or set merge_by_chunk_id = TRUE for better performance.
#> Appending disk.frames:
#> Appending disk.frames:
anti_joined.df %>% collect
#> x y
#> 1: 3 6
anti_joined.data.frame = anti_join(df.df, data.frame(x = 1:2, z = 10:11))
# clean up
delete(df.df)
delete(df2.df)
delete(anti_joined.df)
cars.df = as.disk.frame(cars)
join.df = full_join(cars.df, cars.df, merge_by_chunk_id = TRUE)
# clean up cars.df
delete(cars.df)
delete(join.df)
cars.df = as.disk.frame(cars)
join.df = inner_join(cars.df, cars.df, merge_by_chunk_id = TRUE)
# clean up cars.df
delete(cars.df)
delete(join.df)
cars.df = as.disk.frame(cars)
join.df = left_join(cars.df, cars.df)
#> Warning: `merge_by_chunk_id = FALSE`. This will take significantly longer and the preparations needed are performed eagerly which may lead to poor performance. Consider making `y` a data.frame or set merge_by_chunk_id = TRUE for better performance.
#> Appending disk.frames:
#> Appending disk.frames:
# clean up cars.df
delete(cars.df)
delete(join.df)
cars.df = as.disk.frame(cars)
join.df = semi_join(cars.df, cars.df)
#> Warning: merge_by_chunk_id = FALSE. This will take significantly longer and the preparations needed are performed eagerly which may lead to poor performance. Consider making y a data.frame or set merge_by_chunk_id = TRUE for better performance.
#> Appending disk.frames:
#> Appending disk.frames:
# clean up cars.df
delete(cars.df)
delete(join.df)