Science is often not reproducible or repeatable, even within the same lab group over time.
Source: Wolkovich et al. GCB 2012.
Source: http://bit.ly/Tprrx8
+
Open Sciencelibrary(RCurl); library(RJSONIO) dat <- fromJSON(getURL("https://api.github.com/users/hadley/repos"))
library(plyr); library(reshape2) dat_df <- ldply(dat, function(x) as.data.frame(x[names(x) %in% c("name", "watchers_count", "forks", "open_issues")])) dat_melt <- melt(dat_df)
lm(value ~ variable, data = dat_melt)
library(ggplot2) ggplot(dat_melt, aes(name, value, colour = variable)) + geom_point() + coord_flip()
API keys can be stored in a users.Rprofile
file
.Rprofile
files
options(MendeleyKey = "uf5daib7wyil7ag5buc") options(MendeleyPrivateKey = "faj2os5dyd7jop2fok6") options(PlosApiKey = "ef3vip9yak7od3hud4g") options(SpringerMetdataKey = "ri9hi7woc6jax4vaf8w")
# Call the github json data within R using the RCurl pkg library(RCurl)
getURL("https://api.github.com/repos/hadley/ggplot2") # or getForm()/postForm()
[1] "{\"has_downloads\":true,\"full_name\":\"hadley/ggplot2\",\"owner\":{\"gravatar_id\":\"7ba164f40a50bc23dbb2aa825fb7bc16\",\"login\":\"hadley\",\"avatar_url\":\"https://secure.gravatar.com/avatar/7ba164f40a50bc23dbb2aa825fb7bc16?d=https://a248.e.akamai.net/assets.github.com%2Fimages%2Fgravatars%2Fgravatar-140.png\",\"url\":\"https://api.github.com/users/hadley\",\"id\":4196},\"forks_count\":53,\"homepage\":\"http://had.co.nz/ggplot2\",\"svn_url\":\"https://github.com/hadley/ggplot2\",\"mirror_url\":null,\"git_url\":\"git://github.com/hadley/ggplot2.git\",\"pushed_at\":\"2012-08-17T20:49:44Z\",\"network_count\":53,\"forks\":53,\"has_wiki\":true,\"language\":\"R\",\"created_at\":\"2008-05-25T01:21:32Z\",\"watchers\":392,\"watchers_count\":392,\"description\":\"An implementation of the Grammar of Graphics in R\",\"html_url\":\"https://github.com/hadley/ggplot2\",\"clone_url\":\"https://github.com/hadley/ggplot2.git\",\"open_issues\":106,\"open_issues_count\":106,\"has_issues\":true,\"size\":1722,\"fork\":false,\"updated_at\":\"2012-08-21T16:29:13Z\",\"ssh_url\":\"git@github.com:hadley/ggplot2.git\",\"name\":\"ggplot2\",\"url\":\"https://api.github.com/repos/hadley/ggplot2\",\"private\":false,\"id\":19438,\"master_branch\":\"master\"}"
# And parse the results to more R friendly list library(RJSONIO) fromJSON(getURL("https://api.github.com/repos/hadley/ggplot2"))
$has_downloads [1] TRUE $full_name [1] "hadley/ggplot2" ....
# Or use httr package by Hadley Wickham library(httr) tt <- GET("https://api.github.com/repos/hadley/ggplot2") content(tt) # content auto-detects data type, and parses
$has_downloads [1] TRUE $full_name [1] "hadley/ggplot2" ....
httr
occurrencecount <- function(scientificname = NULL, coordinatestatus = NULL, url = "http://data.gbif.org/ws/rest/occurrence/count", curl = getCurlHandle()) {
# The compact fxn is a great way to gather parameters - removes all NULL
querystr <- compact(list( scientificname = scientificname, coordinatestatus = coordinatestatus ))
temp <- GET(url, query = querystr) out <- content(temp)$doc$children$gbifResponse as.numeric(xmlGetAttr(getNodeSet(out, "//gbif:summary")[[1]], "totalMatched")) }
httr
$doc $file [1] "" $version [1] "1.0" </gbif:statements> <gbif:stylesheet>http://data.gbif.org/ws/rest/occurrence/stylesheet</gbif:stylesheet> <gbif:parameter name="request" value="count"/> <gbif:parameter name="service" value="occurrence"/> <gbif:parameter name="scientificname" value="Abies concolor"/> <gbif:parameter name="coordinatestatus" value="true"/> <gbif:summary totalMatched="597"/> </gbif:header> </gbif:gbifResponse>
httr
library(XML) library(httr) library(plyr)
occurrencecount(scientificname = "Abies concolor", coordinatestatus = TRUE)
[1] 597
rplos
library(rplos) plot_throughtime(list("reproducible science"), 500)
rplos
!rplos
!RMendeley
groupDocInfo(mc, 530031, 4344945792)
$abstract [1] "SUMMARY: Modern biological experiments create vast amounts of data which are geographically distributed. These datasets consist of petabytes of raw data and billions of documents. Yet to the best of our knowledge, a search engine technology that searches and cross-links all different data types in life sciences does not exist..... $authors $authors[[1]] forename surname "Dominic S" "L\xfctjohann" # ....
rdryad
# Get URL for a specific dataset dryaddat <- download_url("10255/dryad.1759") # Download the file from the Dryad servers file <- dryad_getfile(dryaddat) # Just first four columns head(file[, 1:4])
year nest.identity season clutch.size 1 2001 1 0 6 2 2001 1 0 6 3 2001 1 0 6 4 2001 1 0 6 5 2001 1 0 6 6 2001 1 0 6
raltmet
GitHub(userorg = "ropensci", repo = "rmendeley")
totimp(id = "10.5061/dryad.8671")
stackexchange(ids = 16632)
rgbif
distribution <- occurrencelist(sciname = "Danaus plexippus", coordinatestatus = TRUE, maxresults = 1000, latlongdf = TRUE)
(rfigshare)
R
directly to one's figshare account.> figshare(data) # code isn't ready yet but once it is, it will return a persistent identifier
knitr
+ Markdown + GitHubGitHub automatically renders Markdown and even provides syntax highlighting
knitr
+ Markdown + GitHub = executible paperknitr
+ Markdown + GitHub = pre publication reviewknitcitations
citet(c(Halpern2006 = "10.1111/j.1461-0248.2005.00827.x")) # then cite in your markdown file citet("Halpern2006")
# or read citations from a bibtex file which can be automatically generated and updated from services like Mendeley
bib <- read.bibtex("example.bib") # then cite inline citet(bib[["knitr"]])
knitcitations
by Carl Boettiger @ GitHub/
#