Define a function that can be used to feed data into speedglm and biglm

make_glm_streaming_fn(data, verbose = FALSE)

Arguments

data

a disk.frame

verbose

Whether to print the status of data loading. Default to FALSE

Value

return a function, fn, that can be used as the data argument in biglm::bigglm or speedglm::shglm

See also

Other Machine Learning (ML): dfglm()

Examples

cars.df = as.disk.frame(cars) streamacq = make_glm_streaming_fn(cars.df, verbose = FALSE) majorv = as.integer(version$major) minorv = as.integer(strsplit(version$minor, ".", fixed=TRUE)[[1]][1]) if(((majorv == 3) & (minorv >= 6)) | (majorv > 3)) { m = biglm::bigglm(dist ~ speed, data = streamacq) summary(m) predict(m, get_chunk(cars.df, 1)) predict(m, collect(cars.df, 1)) } else { m = speedglm::shglm(dist ~ speed, data = streamacq) }
#> [,1] #> 1 -1.849460 #> 2 -1.849460 #> 3 9.947766 #> 4 9.947766 #> 5 13.880175 #> 6 17.812584 #> 7 21.744993 #> 8 21.744993 #> 9 21.744993 #> 10 25.677401 #> 11 25.677401 #> 12 29.609810 #> 13 29.609810 #> 14 29.609810 #> 15 29.609810 #> 16 33.542219 #> 17 33.542219 #> 18 33.542219 #> 19 33.542219 #> 20 37.474628 #> 21 37.474628 #> 22 37.474628 #> 23 37.474628 #> 24 41.407036 #> 25 41.407036 #> 26 41.407036 #> 27 45.339445 #> 28 45.339445 #> 29 49.271854 #> 30 49.271854 #> 31 49.271854 #> 32 53.204263 #> 33 53.204263 #> 34 53.204263 #> 35 53.204263 #> 36 57.136672 #> 37 57.136672 #> 38 57.136672 #> 39 61.069080 #> 40 61.069080 #> 41 61.069080 #> 42 61.069080 #> 43 61.069080 #> 44 68.933898 #> 45 72.866307 #> 46 76.798715 #> 47 76.798715 #> 48 76.798715 #> 49 76.798715 #> 50 80.731124