Futures

Using Henrik Bengtsson’s future package, it is possible to evaluate modules asynchronously using various resources available to the user. For instance, evaluation of modules can be sequential, lazy, and/or parallelized (on multiple cores and/or on a cluster of machines).

# How many cores are available?
availableCores()
#> system 
#>      4

"foo" %provides% {
  future({
    Sys.sleep(1L)
    "Hello"
  }) %plan% multicore
}
#> [2018-12-02T16:14:14 UTC] Defining 'foo' ... OK

"bar" %provides% {
  future({
    Sys.sleep(1L)
    "World"
  }) %plan% multicore
}
#> [2018-12-02T16:14:14 UTC] Defining 'bar' ... OK

"foobar" %requires% list(
  f = "foo",
  b = "bar"
) %provides% {
  paste0(value(f), " ", value(b), "!")
}
#> [2018-12-02T16:14:14 UTC] Defining 'foobar' ... OK

system.time(print(make("foobar")))
#> [2018-12-02T16:14:14 UTC] Making 'foobar' ...
#> [2018-12-02T16:14:14 UTC] * Visiting and defining dependencies ...
#> [2018-12-02T16:14:14 UTC] * Constructing dependency graph ... OK
#> [2018-12-02T16:14:14 UTC] * Sorting 2 dependencies with 2 relations ... on 1 layer, OK
#> [2018-12-02T16:14:14 UTC] * Evaluating new and outdated dependencies ...
#> [2018-12-02T16:14:14 UTC] ** Evaluating #1/2 (layer #1/1): 'bar' ...
#> [2018-12-02T16:14:14 UTC] ** Evaluating #2/2 (layer #1/1): 'foo' ...
#> [2018-12-02T16:14:15 UTC] DONE ('foobar' in 1.1 secs)
#> [1] "Hello World!"
#>    user  system elapsed 
#>   0.059   0.014   1.073

It is often interesting to parallelize an already existing module. modulr comes with the handy futurize function for this purpose. The following two examples illustrate the difference between sequential and lazy orchestrations of parallelized modules.

"foo" %provides% { Sys.sleep(1L); "Hello" }
#> [2018-12-02T16:14:15 UTC] Defining 'foo' ... OK
"bar" %provides% { Sys.sleep(1L); "World" }
#> [2018-12-02T16:14:15 UTC] Defining 'bar' ... OK
"foobar" %requires% list(
  f = "foo", 
  b = "bar"
) %provides% {
  paste0(f, " ", b, "!")
}
#> [2018-12-02T16:14:15 UTC] Defining 'foobar' ... OK

futurize("foo", strategy = multicore)
#> [2018-12-02T16:14:15 UTC] Defining 'foo/future' ... OK
futurize("bar", strategy = multicore)
#> [2018-12-02T16:14:15 UTC] Defining 'bar/future' ... OK

futurize(
  "foobar", 
  name = "foobar/sequential", 
  dependencies = list(f = "foo/future", b = "bar/future"),
  strategy = sequential
)
#> [2018-12-02T16:14:15 UTC] Defining 'foobar/sequential' ... OK

system.time(fb_sequential <- make("foobar/sequential"))
#> [2018-12-02T16:14:16 UTC] Making 'foobar/sequential' ...
#> [2018-12-02T16:14:16 UTC] * Visiting and defining dependencies ...
#> [2018-12-02T16:14:16 UTC] * Constructing dependency graph ... OK
#> [2018-12-02T16:14:16 UTC] * Sorting 2 dependencies with 2 relations ... on 1 layer, OK
#> [2018-12-02T16:14:16 UTC] * Evaluating new and outdated dependencies ...
#> [2018-12-02T16:14:16 UTC] ** Evaluating #1/2 (layer #1/1): 'bar/future' ...
#> [2018-12-02T16:14:16 UTC] ** Evaluating #2/2 (layer #1/1): 'foo/future' ...
#> [2018-12-02T16:14:17 UTC] DONE ('foobar/sequential' in 1.1 secs)
#>    user  system elapsed 
#>   0.073   0.023   1.073
system.time(print(value(fb_sequential)))
#> [1] "Hello World!"
#>    user  system elapsed 
#>   0.001   0.000   0.001
touch("foo/future")
#> [2018-12-02T16:14:17 UTC] Touching 'foo/future' ... OK
touch("bar/future")
#> [2018-12-02T16:14:17 UTC] Touching 'bar/future' ... OK
futurize(
  "foobar", 
  name = "foobar/lazy", 
  dependencies = list(f = "foo/future", b = "bar/future"),
  strategy = sequential,
  lazy = TRUE
)
#> [2018-12-02T16:14:17 UTC] Defining 'foobar/lazy' ... OK

system.time(fb_lazy <- make("foobar/lazy"))
#> [2018-12-02T16:14:17 UTC] Making 'foobar/lazy' ...
#> [2018-12-02T16:14:17 UTC] * Visiting and defining dependencies ...
#> [2018-12-02T16:14:17 UTC] * Constructing dependency graph ... OK
#> [2018-12-02T16:14:17 UTC] * Sorting 2 dependencies with 2 relations ... on 1 layer, OK
#> [2018-12-02T16:14:17 UTC] * Evaluating new and outdated dependencies ...
#> [2018-12-02T16:14:17 UTC] ** Evaluating #1/2 (layer #1/1): 'bar/future' ...
#> [2018-12-02T16:14:17 UTC] ** Evaluating #2/2 (layer #1/1): 'foo/future' ...
#> [2018-12-02T16:14:17 UTC] DONE ('foobar/lazy' in 0.074 secs)
#>    user  system elapsed 
#>   0.062   0.013   0.077
Sys.sleep(0.5)
system.time(print(value(fb_lazy)))
#> [1] "Hello World!"
#>    user  system elapsed 
#>   0.011   0.009   0.420

Package isolation

Let us consider the following situation: most of your code rely on an old version of dplyr, say 0.4.1, and you want to migrate progressively to the latest version available on CRAN, say 0.7.0. How two different versions of dplyr can cohabitate in your ecosystem? Package isolation is an experimental feature allowing to isolate a portion of code, for example a module, from already loaded and attached packages.

Using other packages

Let us assume that dplyr_0.4.1 is installed in the current library paths:

library(dplyr)  # version 0.4.1, installed in the library paths
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
print(sessionInfo())
#> R version 3.2.5 (2016-04-14)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Red Hat Enterprise Linux
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] dplyr_0.4.1        memoise_1.1.0      future_1.10.0     
#> [4] RColorBrewer_1.1-2 chorddiag_0.1.1    networkD3_0.2.13  
#> [7] modulr_0.1.7.9208 
#> 
#> loaded via a namespace (and not attached):
#>  [1] igraph_1.0.1          Rcpp_0.12.19          rstudioapi_0.8.0.9000
#>  [4] knitr_1.19            magrittr_1.5          pooh_0.3-2           
#>  [7] stringr_1.3.1         globals_0.12.4        tools_3.2.5          
#> [10] parallel_3.2.5        DBI_0.6-1             withr_2.1.2          
#> [13] htmltools_0.3.6       yaml_2.1.16           assertthat_0.2.0     
#> [16] rprojroot_1.3-2       digest_0.6.18         htmlwidgets_0.8      
#> [19] codetools_0.2-14      evaluate_0.10.1       rmarkdown_1.8        
#> [22] stringi_1.2.4         backports_1.1.2       listenv_0.7.0

This is the current code using dplyr_0.4.1, where arrange applies by default to every element of the group:

cars %>% 
  group_by(speed) %>% 
  arrange(desc(dist)) %>% 
  ungroup %>% 
  head
#> Source: local data frame [6 x 2]
#> 
#>   speed dist
#> 1     4   10
#> 2     4    2
#> 3     7   22
#> 4     7    4
#> 5     8   16
#> 6     9   10

Let us isolate from all the loaded and attached packages (except the so-called base packages and a few others, like modulr itself, obviously):

isolate_from_packages()
#> [2018-12-02T16:14:18 UTC] Isolating from loaded and attached packages ...
#> [2018-12-02T16:14:18 UTC] * Ignoring base packages and 14 packages: 'assertthat', 'curl', 'devtools', 'digest', 'httr', 'jsonlite', 'knitr', 'memoise', 'modulr', 'pooh', 'rmarkdown', 'rstudioapi', 'stringi', 'stringr'.
#> [2018-12-02T16:14:18 UTC] * Forgetting 'dplyr' ... OK
#> [2018-12-02T16:14:19 UTC] * Forgetting 'future' ... OK
#> [2018-12-02T16:14:19 UTC] * Forgetting 'RColorBrewer' ... OK
#> [2018-12-02T16:14:19 UTC] * Forgetting 'chorddiag' ... OK
#> [2018-12-02T16:14:19 UTC] * Forgetting 'networkD3' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'igraph' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'Rcpp' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'magrittr' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'globals' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'DBI' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'withr' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'htmltools' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'lazyeval' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'yaml' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'rprojroot' ... OK
#> [2018-12-02T16:14:19 UTC] * Unloading 'htmlwidgets' ... OK
#> [2018-12-02T16:14:20 UTC] * Unloading 'codetools' ... OK
#> [2018-12-02T16:14:20 UTC] * Unloading 'evaluate' ... OK
#> [2018-12-02T16:14:20 UTC] * Unloading 'backports' ... OK
#> [2018-12-02T16:14:20 UTC] * Unloading 'listenv' ... OK
#> [2018-12-02T16:14:20 UTC] Isolated from loaded and attached packages.

This function returns a packages manifest containing all the necessary information to restore the situation. The last manifest produced by isolate_from_packages is available from the .Last.packages_manifest variable and is used by restore_packages (see infra).

Let us now suppose that dplyr_0.7.0 and its dependencies have been installed in the dedicated “./new_lib” directory. We can then change the library path to point to ./new_lib, and attach the packages as usual:

.libPaths("./new_lib")
library(dplyr)  # version 0.7.0, installed in "./new_lib"
print(sessionInfo())
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> R version 3.2.5 (2016-04-14)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Red Hat Enterprise Linux
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] dplyr_0.7.0       memoise_1.1.0     modulr_0.1.7.9208
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_0.12.11          rstudioapi_0.8.0.9000 magrittr_1.5         
#>  [4] knitr_1.19            devtools_1.13.2       R6_2.2.1             
#>  [7] rlang_0.1.1           pooh_0.3-2            stringr_1.3.1        
#> [10] tools_3.2.5           parallel_3.2.5        assertthat_0.2.0     
#> [13] digest_0.6.18         tibble_1.3.3          glue_1.1.0           
#> [16] evaluate_0.10         rmarkdown_1.8         stringi_1.2.4

Thus, the following code uses dplyr_0.7.0: a tibble is returned instead of a data frame and the verb arrange requires a parameter (.by_group = TRUE) to apply to every element of the group:

cars %>% 
  group_by(speed) %>% 
  arrange(desc(dist), .by_group = TRUE) %>% 
  ungroup %>% 
  head
#> # A tibble: 6 x 2
#>   speed  dist
#>   <dbl> <dbl>
#> 1     4    10
#> 2     4     2
#> 3     7    22
#> 4     7     4
#> 5     8    16
#> 6     9    10

Finally, we can restore the previously attached and loaded packages (by default, the previous library paths are also restored), based by default on the last manifest:

restore_packages()
#> [2018-12-02T16:14:20 UTC] Restoring previously attached and loaded packages ...
#> [2018-12-02T16:14:20 UTC] * Restoring library paths ... OK
#> [2018-12-02T16:14:20 UTC] * Forgetting 'dplyr' ... OK
#> [2018-12-02T16:14:20 UTC] * Unloading 'Rcpp' ... OK
#> [2018-12-02T16:14:20 UTC] * Unloading 'magrittr' ... OK
#> [2018-12-02T16:14:21 UTC] * Unloading 'R6' ... OK
#> [2018-12-02T16:14:21 UTC] * Unloading 'rlang' ... OK
#> [2018-12-02T16:14:21 UTC] * Unloading 'tibble' ... OK
#> [2018-12-02T16:14:21 UTC] * Unloading 'glue' ... OK
#> [2018-12-02T16:14:21 UTC] * Unloading 'evaluate' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'listenv' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'backports' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'evaluate' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'codetools' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'htmlwidgets' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'rprojroot' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'yaml' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'lazyeval' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'htmltools' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'withr' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'DBI' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'globals' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'magrittr' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'Rcpp' ... OK
#> [2018-12-02T16:14:21 UTC] * Loading 'igraph' ... OK
#> [2018-12-02T16:14:21 UTC] * Attaching 'dplyr' at position 2 ... OK
#> [2018-12-02T16:14:21 UTC] * Attaching 'future' at position 4 ... OK
#> [2018-12-02T16:14:21 UTC] * Attaching 'RColorBrewer' at position 5 ... OK
#> [2018-12-02T16:14:21 UTC] * Attaching 'chorddiag' at position 6 ... OK
#> [2018-12-02T16:14:21 UTC] * Attaching 'networkD3' at position 7 ... OK
#> [2018-12-02T16:14:21 UTC] Previously attached and loaded packages restored.
print(sessionInfo())
#> R version 3.2.5 (2016-04-14)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Red Hat Enterprise Linux
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] dplyr_0.4.1        memoise_1.1.0      future_1.10.0     
#> [4] RColorBrewer_1.1-2 chorddiag_0.1.1    networkD3_0.2.13  
#> [7] modulr_0.1.7.9208 
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_0.12.19          pillar_1.1.0          tools_3.2.5          
#>  [4] digest_0.6.18         evaluate_0.10.1       rlang_0.3.0.1        
#>  [7] igraph_1.0.1          DBI_0.6-1             rstudioapi_0.8.0.9000
#> [10] yaml_2.1.16           parallel_3.2.5        withr_2.1.2          
#> [13] stringr_1.3.1         knitr_1.19            globals_0.12.4       
#> [16] htmlwidgets_0.8       devtools_1.13.2       rprojroot_1.3-2      
#> [19] listenv_0.7.0         pooh_0.3-2            rmarkdown_1.8        
#> [22] magrittr_1.5          backports_1.1.2       codetools_0.2-14     
#> [25] htmltools_0.3.6       assertthat_0.2.0      stringi_1.2.4        
#> [28] lazyeval_0.2.0

Temporarily using other packages

The with_packages function allows to isolate temporarily a portion of code, so that the previous example simply becomes:

with_verbosity(1L, with_packages("./new_lib", {
  library(dplyr)
  cars %>% 
    group_by(speed) %>% 
    arrange(desc(dist), .by_group = TRUE) %>% 
    ungroup %>% 
    as.data.frame(stringsAsFactors = FALSE) %>% 
    head
}))
#> [2018-12-02T16:14:21 UTC] Isolating from loaded and attached packages ...
#> [2018-12-02T16:14:24 UTC] Isolated from loaded and attached packages.
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> [2018-12-02T16:14:24 UTC] Restoring previously attached and loaded packages ...
#> [2018-12-02T16:14:25 UTC] Previously attached and loaded packages restored.
#>   speed dist
#> 1     4   10
#> 2     4    2
#> 3     7   22
#> 4     7    4
#> 5     8   16
#> 6     9   10

When working with modules, it is sometimes useful to associate a specific library to an on-disk module with with_module_packages or to a namespace of modules with with_namespace_packages.

In the following example, with_module_packages temporarily sets the library path to the sub-directory lib/x86_64-pc-linux-gnu-library/3.2 of the directory experimental/arranged_cars.

# File: ./modules/experimental/arranged_cars.R

library(modulr)

"experimental/arranged_cars" %provides% {
  with_verbosity(0L, with_module_packages({
    if (!"dplyr" %in% rownames(installed.packages()))
      install.packages("dplyr")
    library(dplyr)
    cars %>%
      group_by(speed) %>%
      arrange(desc(dist), .by_group = TRUE) %>%
      ungroup %>%
      as.data.frame(stringsAsFactors = FALSE) %>%
      head
  }))
}
make("experimental/arranged_cars")
#> [2018-12-02T16:14:25 UTC] Making 'experimental/arranged_cars' ...
#> [2018-12-02T16:14:25 UTC] * Visiting and defining dependencies ...
#> [2018-12-02T16:14:25 UTC] ** Defining 'experimental/arranged_cars' ... OK
#> [2018-12-02T16:14:25 UTC] * Constructing dependency graph ... OK
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> [2018-12-02T16:14:30 UTC] DONE ('experimental/arranged_cars' in 4.5 secs)
#>   speed dist
#> 1     4   10
#> 2     4    2
#> 3     7   22
#> 4     7    4
#> 5     8   16
#> 6     9   10