Use bloomfilters to make

Return the chunks that values are in certain chunks

Use bloomfilter

make_bloomfilter(df, cols, ...)

bf_likely_in_chunks(df, cols, values)

use_bloom_filter(df, cols, values)

Arguments

df

a disk.frame

cols

columns to make bloomfilters with

...

not used

values

the values to lookup using bloomfilter

Examples

df = nycflights13::flights %>% as.disk.frame(shardby = c("carrier"))
#> Hashing...
values = "UA" system.time(make_bloomfilter(df, "carrier"))
#> Error in make_bloomfilter(df, "carrier"): could not find function "make_bloomfilter"
#> Timing stopped at: 0 0 0
bf_likely_in_chunks(df, "carrier", values)
#> Error in bf_likely_in_chunks(df, "carrier", values): could not find function "bf_likely_in_chunks"
system.time(bf_likely_in_chunks(df, "carrier", "values"))
#> Error in bf_likely_in_chunks(df, "carrier", "values"): could not find function "bf_likely_in_chunks"
#> Timing stopped at: 0 0 0
system.time(d1 <- df %>% use_bloom_filter("carrier", "UA") %>% collect)
#> Error in use_bloom_filter(., "carrier", "UA"): could not find function "use_bloom_filter"
#> Timing stopped at: 0 0 0