Define a function that can be used to feed data into speedglm and biglm

make_glm_streaming_fn(data, verbose = FALSE)

Arguments

data

a disk.frame

verbose

Whether to print the status of data loading. Default to FALSE

Value

return a function, fn, that can be used as the data argument in biglm::bigglm or speedglm::shglm

See also

Other Machine Learning (ML): dfglm()

Examples

cars.df = as.disk.frame(cars)
streamacq = make_glm_streaming_fn(cars.df, verbose = FALSE)

majorv = as.integer(version$major)
minorv = as.integer(strsplit(version$minor, ".", fixed=TRUE)[[1]][1])
if(((majorv == 3) & (minorv >= 6)) | (majorv > 3)) {
  m = biglm::bigglm(dist ~ speed, data = streamacq)
  summary(m)
  predict(m, get_chunk(cars.df, 1))
  predict(m, collect(cars.df, 1))
} else {
  m = speedglm::shglm(dist ~ speed, data = streamacq)
}
#>         [,1]
#> 1  -1.849460
#> 2  -1.849460
#> 3   9.947766
#> 4   9.947766
#> 5  13.880175
#> 6  17.812584
#> 7  21.744993
#> 8  21.744993
#> 9  21.744993
#> 10 25.677401
#> 11 25.677401
#> 12 29.609810
#> 13 29.609810
#> 14 29.609810
#> 15 29.609810
#> 16 33.542219
#> 17 33.542219
#> 18 33.542219
#> 19 33.542219
#> 20 37.474628
#> 21 37.474628
#> 22 37.474628
#> 23 37.474628
#> 24 41.407036
#> 25 41.407036
#> 26 41.407036
#> 27 45.339445
#> 28 45.339445
#> 29 49.271854
#> 30 49.271854
#> 31 49.271854
#> 32 53.204263
#> 33 53.204263
#> 34 53.204263
#> 35 53.204263
#> 36 57.136672
#> 37 57.136672
#> 38 57.136672
#> 39 61.069080
#> 40 61.069080
#> 41 61.069080
#> 42 61.069080
#> 43 61.069080
#> 44 68.933898
#> 45 72.866307
#> 46 76.798715
#> 47 76.798715
#> 48 76.798715
#> 49 76.798715
#> 50 80.731124