Define a function that can be used to feed data into speedglm and biglm
make_glm_streaming_fn(data, verbose = FALSE)
a disk.frame
Whether to print the status of data loading. Default to FALSE
return a function, fn, that can be used as the data argument in biglm::bigglm or speedglm::shglm
Other Machine Learning (ML):
dfglm()
cars.df = as.disk.frame(cars)
streamacq = make_glm_streaming_fn(cars.df, verbose = FALSE)
majorv = as.integer(version$major)
minorv = as.integer(strsplit(version$minor, ".", fixed=TRUE)[[1]][1])
if(((majorv == 3) & (minorv >= 6)) | (majorv > 3)) {
m = biglm::bigglm(dist ~ speed, data = streamacq)
summary(m)
predict(m, get_chunk(cars.df, 1))
predict(m, collect(cars.df, 1))
} else {
m = speedglm::shglm(dist ~ speed, data = streamacq)
}
#> [,1]
#> 1 -1.849460
#> 2 -1.849460
#> 3 9.947766
#> 4 9.947766
#> 5 13.880175
#> 6 17.812584
#> 7 21.744993
#> 8 21.744993
#> 9 21.744993
#> 10 25.677401
#> 11 25.677401
#> 12 29.609810
#> 13 29.609810
#> 14 29.609810
#> 15 29.609810
#> 16 33.542219
#> 17 33.542219
#> 18 33.542219
#> 19 33.542219
#> 20 37.474628
#> 21 37.474628
#> 22 37.474628
#> 23 37.474628
#> 24 41.407036
#> 25 41.407036
#> 26 41.407036
#> 27 45.339445
#> 28 45.339445
#> 29 49.271854
#> 30 49.271854
#> 31 49.271854
#> 32 53.204263
#> 33 53.204263
#> 34 53.204263
#> 35 53.204263
#> 36 57.136672
#> 37 57.136672
#> 38 57.136672
#> 39 61.069080
#> 40 61.069080
#> 41 61.069080
#> 42 61.069080
#> 43 61.069080
#> 44 68.933898
#> 45 72.866307
#> 46 76.798715
#> 47 76.798715
#> 48 76.798715
#> 49 76.798715
#> 50 80.731124