read.so

Read tables from Stack Overflow questions into R

Edward Visel R

For further information including complete documentation, see read.so’s full website.

read.so

Travis-CI Build Status AppVeyor Build status Coverage Status

Installation

read.so is not on CRAN, but you can install it with

# install.packages("devtools")
devtools::install_github("alistaire47/read.so")

Read Tables from Stack Overflow Questions into R

Sometimes you see a really interesting question on Stack Overflow, but the asker only presents the data as a presentation-style table instead of as runnable R code. Fear no more! read.so will read even heinous tables into a data frame in a trice.

Read data frame print output back a data frame

For instance, should you want to return output copied from the R console back into your own session, use read.so for a data.frame, and read_so for a tibble. Pass in a filepath, a raw string of text, a vector of lines, or if the data is on the clipboard, nothing at all, and the functions will grab it for you:

library(read.so)

iris_lines <- capture.output(head(iris))

iris_lines
#> [1] "  Sepal.Length Sepal.Width Petal.Length Petal.Width Species"
#> [2] "1          5.1         3.5          1.4         0.2  setosa"
#> [3] "2          4.9         3.0          1.4         0.2  setosa"
#> [4] "3          4.7         3.2          1.3         0.2  setosa"
#> [5] "4          4.6         3.1          1.5         0.2  setosa"
#> [6] "5          5.0         3.6          1.4         0.2  setosa"
#> [7] "6          5.4         3.9          1.7         0.4  setosa"

read.so(iris_lines)
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1          5.1         3.5          1.4         0.2  setosa
#> 2          4.9         3.0          1.4         0.2  setosa
#> 3          4.7         3.2          1.3         0.2  setosa
#> 4          4.6         3.1          1.5         0.2  setosa
#> 5          5.0         3.6          1.4         0.2  setosa
#> 6          5.4         3.9          1.7         0.4  setosa

read_so(iris_lines)
#> # A tibble: 6 x 5
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>          <dbl>       <dbl>        <dbl>       <dbl> <chr>  
#> 1         5.10        3.50         1.40       0.200 setosa 
#> 2         4.90        3.00         1.40       0.200 setosa 
#> 3         4.70        3.20         1.30       0.200 setosa 
#> 4         4.60        3.10         1.50       0.200 setosa 
#> 5         5.00        3.60         1.40       0.200 setosa 
#> 6         5.40        3.90         1.70       0.400 setosa

clipr::write_clip(head(iris))

read.so()
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1          5.1         3.5          1.4         0.2  setosa
#> 2          4.9         3.0          1.4         0.2  setosa
#> 3          4.7         3.2          1.3         0.2  setosa
#> 4          4.6         3.1          1.5         0.2  setosa
#> 5          5.0         3.6          1.4         0.2  setosa
#> 6          5.4         3.9          1.7         0.4  setosa

Further, read_so will attempt to read in the results of printing a tibble:

mtcars_lines <- capture.output(tibble::as_tibble(mtcars))

mtcars_lines
#>  [1] "# A tibble: 32 x 11"                                                 
#>  [2] "     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb"
#>  [3] " * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>"
#>  [4] " 1  21.0    6.  160.  110.  3.90  2.62  16.5    0.    1.    4.    4."
#>  [5] " 2  21.0    6.  160.  110.  3.90  2.88  17.0    0.    1.    4.    4."
#>  [6] " 3  22.8    4.  108.   93.  3.85  2.32  18.6    1.    1.    4.    1."
#>  [7] " 4  21.4    6.  258.  110.  3.08  3.22  19.4    1.    0.    3.    1."
#>  [8] " 5  18.7    8.  360.  175.  3.15  3.44  17.0    0.    0.    3.    2."
#>  [9] " 6  18.1    6.  225.  105.  2.76  3.46  20.2    1.    0.    3.    1."
#> [10] " 7  14.3    8.  360.  245.  3.21  3.57  15.8    0.    0.    3.    4."
#> [11] " 8  24.4    4.  147.   62.  3.69  3.19  20.0    1.    0.    4.    2."
#> [12] " 9  22.8    4.  141.   95.  3.92  3.15  22.9    1.    0.    4.    2."
#> [13] "10  19.2    6.  168.  123.  3.92  3.44  18.3    1.    0.    4.    4."
#> [14] "# ... with 22 more rows"

read_so(mtcars_lines)
#> # A tibble: 10 x 11
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  21.0    6.  160.  110.  3.90  2.62  16.5    0.    1.    4.    4.
#>  2  21.0    6.  160.  110.  3.90  2.88  17.0    0.    1.    4.    4.
#>  3  22.8    4.  108.   93.  3.85  2.32  18.6    1.    1.    4.    1.
#>  4  21.4    6.  258.  110.  3.08  3.22  19.4    1.    0.    3.    1.
#>  5  18.7    8.  360.  175.  3.15  3.44  17.0    0.    0.    3.    2.
#>  6  18.1    6.  225.  105.  2.76  3.46  20.2    1.    0.    3.    1.
#>  7  14.3    8.  360.  245.  3.21  3.57  15.8    0.    0.    3.    4.
#>  8  24.4    4.  147.   62.  3.69  3.19  20.0    1.    0.    4.    2.
#>  9  22.8    4.  141.   95.  3.92  3.15  22.9    1.    0.    4.    2.
#> 10  19.2    6.  168.  123.  3.92  3.44  18.3    1.    0.    4.    4.

Read Markdown tables into data frames

When you need to read Markdown tables into R, read.so has you covered with read.md and read_md:

chick_lines <- capture.output(
    knitr::kable(head(ChickWeight), format = "markdown")
)

cat(chick_lines, sep = "\n")
#> 
#> 
#> | weight| Time|Chick |Diet |
#> |------:|----:|:-----|:----|
#> |     42|    0|1     |1    |
#> |     51|    2|1     |1    |
#> |     59|    4|1     |1    |
#> |     64|    6|1     |1    |
#> |     76|    8|1     |1    |
#> |     93|   10|1     |1    |

read.md(chick_lines)
#>   weight Time Chick Diet
#> 1     42    0     1    1
#> 2     51    2     1    1
#> 3     59    4     1    1
#> 4     64    6     1    1
#> 5     76    8     1    1
#> 6     93   10     1    1

read_md(chick_lines)
#> # A tibble: 6 x 4
#>   weight  Time Chick  Diet
#>    <dbl> <dbl> <dbl> <dbl>
#> 1    42.    0.    1.    1.
#> 2    51.    2.    1.    1.
#> 3    59.    4.    1.    1.
#> 4    64.    6.    1.    1.
#> 5    76.    8.    1.    1.
#> 6    93.   10.    1.    1.

They can handle a number of formats, including tables with delimiter rows composed of “-”, “=”, “+”, and whitespace.

Read str results back into a data frame

If all you have is the results of calling str on a data frame, read.str will read as many complete rows as possible into a new data frame of the same class as the original:

warp_lines <- capture.output(str(warpbreaks))

warp_lines
#> [1] "'data.frame':\t54 obs. of  3 variables:"                                  
#> [2] " $ breaks : num  26 30 54 25 70 52 51 26 67 18 ..."                       
#> [3] " $ wool   : Factor w/ 2 levels \"A\",\"B\": 1 1 1 1 1 1 1 1 1 1 ..."      
#> [4] " $ tension: Factor w/ 3 levels \"L\",\"M\",\"H\": 1 1 1 1 1 1 1 1 1 2 ..."

read.str(warp_lines)
#>    breaks wool tension
#> 1      26    A       L
#> 2      30    A       L
#> 3      54    A       L
#> 4      25    A       L
#> 5      70    A       L
#> 6      52    A       L
#> 7      51    A       L
#> 8      26    A       L
#> 9      67    A       L
#> 10     18    A       M

Read tibble::glimpse results back into a data frame

Similarly, if the data was printed by tibble::glimpse, try read.glimpse or read_glimpse:

states <- data.frame(state.name, state.abb, state.region, state.division, 
                     state.area, center = state.center, state.x77)

states_lines <- capture.output(tibble::glimpse(states))

states_lines
#>  [1] "Observations: 50"                                                         
#>  [2] "Variables: 15"                                                            
#>  [3] "$ state.name     <fct> Alabama, Alaska, Arizona, Arkansas, California,..."
#>  [4] "$ state.abb      <fct> AL, AK, AZ, AR, CA, CO, CT, DE, FL, GA, HI, ID,..."
#>  [5] "$ state.region   <fct> South, West, West, South, West, West, Northeast..."
#>  [6] "$ state.division <fct> East South Central, Pacific, Mountain, West Sou..."
#>  [7] "$ state.area     <dbl> 51609, 589757, 113909, 53104, 158693, 104247, 5..."
#>  [8] "$ center.x       <dbl> -86.7509, -127.2500, -111.6250, -92.2992, -119...."
#>  [9] "$ center.y       <dbl> 32.5901, 49.2500, 34.2192, 34.7336, 36.5341, 38..."
#> [10] "$ Population     <dbl> 3615, 365, 2212, 2110, 21198, 2541, 3100, 579, ..."
#> [11] "$ Income         <dbl> 3624, 6315, 4530, 3378, 5114, 4884, 5348, 4809,..."
#> [12] "$ Illiteracy     <dbl> 2.1, 1.5, 1.8, 1.9, 1.1, 0.7, 1.1, 0.9, 1.3, 2...."
#> [13] "$ Life.Exp       <dbl> 69.05, 69.31, 70.55, 70.66, 71.71, 72.06, 72.48..."
#> [14] "$ Murder         <dbl> 15.1, 11.3, 7.8, 10.1, 10.3, 6.8, 3.1, 6.2, 10...."
#> [15] "$ HS.Grad        <dbl> 41.3, 66.7, 58.1, 39.9, 62.6, 63.9, 56.0, 54.6,..."
#> [16] "$ Frost          <dbl> 20, 152, 15, 65, 20, 166, 139, 103, 11, 60, 0, ..."
#> [17] "$ Area           <dbl> 50708, 566432, 113417, 51945, 156361, 103766, 4..."

read_glimpse(states_lines)
#> # A tibble: 4 x 15
#>   state.name state.abb state.region state.division     state.area center.x
#> * <fct>      <fct>     <fct>        <fct>                   <dbl>    <dbl>
#> 1 Alabama    AL        South        East South Central     51609.    -86.8
#> 2 Alaska     AK        West         Pacific               589757.   -127. 
#> 3 Arizona    AZ        West         Mountain              113909.   -112. 
#> 4 Arkansas   AR        South        West Sou...            53104.    -92.3
#> # ... with 9 more variables: center.y <dbl>, Population <dbl>,
#> #   Income <dbl>, Illiteracy <dbl>, Life.Exp <dbl>, Murder <dbl>,
#> #   HS.Grad <dbl>, Frost <dbl>, Area <dbl>