Read large text files by splitting them in smaller files. Package 'bigreadr' also provides some convenient wrappers around fread() and fwrite() from package 'data.table'.
Read large text files based on splitting + data.table::fread
# devtools::install_github("privefl/bigreadr")library(bigreadr) # Create a temporary file of ~141 MB (just as an example)csv <- fwrite2(iris[rep(seq_len(nrow(iris)), 1e4), rep(1:5, 4)], tempfile())format(file.size(csv), big.mark = ",") ## Splitting lines (1)# Read (by parts) all data -> using `fread` would be fasternlines(csv) ## 1M5 lines -> split every 500,000big_iris1 <- big_fread1(csv, every_nlines = 5e5)# Read and subset (by parts)big_iris1_setosa <- big_fread1(csv, every_nlines = 5e5, .transform = function(df) { dplyr::filter(df, Species == "setosa")}) ## Splitting columns (2)big_iris2 <- big_fread2(csv, nb_parts = 3)# Read and subset (by parts)species_setosa <- (fread2(csv, select = 5)[[1]] == "setosa")big_iris2_setosa <- big_fread2(csv, nb_parts = 3, .transform = function(df) { dplyr::filter(df, species_setosa)}) ## Verificationidentical(big_iris1_setosa, dplyr::filter(big_iris1, Species == "setosa"))identical(big_iris2, big_iris1)identical(big_iris2_setosa, big_iris1_setosa)