## Packages (install)
<- rownames(installed.packages())
pkgs
if(!"tidyverse" %in% pkgs) install.packages("tidyverse")
if(!"QBMS" %in% pkgs) install.packages("QBMS")
if(!"here" %in% pkgs) install.packages("here")
Download data from Cassava Base
Data Download from Cassava Base using QBMS Package
This documentation outlines the process of downloading data from Cassava Base through the use of the QBMS (Query Breeding Management System) package. The QBMS package facilitates the interaction with breeding databases such as BMS (Breeding Management System), BreedBase, and GIGWA through standardized BrAPI calls, enabling the efficient retrieval of both phenotypic and genotypic data.
Getting Started:
To begin downloading data from Cassava Base, refer to the attached code snippet. This R package is designed to be user-friendly, catering to the specific needs of those involved in the genetic improvement and analysis of crops.
For more information on how to use the QBMS package, visit the GitHub repository: QBMS on GitHub.
Load packages
This is where we check to see if all the necessary packages are installed on your computer.
The code will detect if certain packages are missing from your package library, and if so, will install them automatically.
Now we need to load the packages:
library(tidyverse)
library(QBMS)
library(here)
Historical Data from Cassava Base
The Cassava program at CIAT has a rich history of conducting various trials. The code snippet provided below offers a straightforward method for downloading this historical data, facilitating easy access to valuable research information.
Conection with Cassava BreedBase server
set_qbms_config("https://cassavabase.org/brapi/v1/calls/",
path = "", time_out = 300, no_auth = TRUE,
page_size = 10000,
engine = "breedbase")
List supported crops in the current bms server
list_crops()
[1] "Cassava"
Select a crop by name
set_crop("Cassava")
List all breeding programs in the selected crop
list_programs()
programName
1 5CP
2 BTI
3 CARI
4 CH
5 CIAT
6 CIP-genebank
7 CNRA
8 Cornell
9 CSIR
10 Embrapa
11 IDIAF
12 IITA
13 INERA_IITA_DRC
14 ISABU
15 ITC
16 KALRO
17 KU
18 NaCRRI
19 NRCRI
20 Rayong
21 SLARI
22 TARI
23 UAC
24 UH
25 UNILA-Indonesia
26 ZARI
Select the desire breeding program by name
set_program("CIAT")
List all year’s trial in the selected program
<- list_trials() %>% pull()
trials trials
[1] "Malawi" "Africa" "Vietnam_2018" "Vietnam_2019" "Vietnam_2020"
[6] "Vietnam_2021" "Vietnam_2022" "Vietnam_2023" "Vietnam_2024" "Asia"
[11] "CIAT_1979" "CIAT_1980" "CIAT_1981" "CIAT_1982" "CIAT_1983"
[16] "CIAT_1984" "CIAT_1985" "CIAT_1986" "CIAT_1987" "CIAT_1988"
[21] "CIAT_1989" "CIAT_1990" "CIAT_1991" "CIAT_1992" "CIAT_1993"
[26] "CIAT_1994" "CIAT_1995" "CIAT_1996" "CIAT_1997" "CIAT_1998"
[31] "CIAT_1999" "CIAT_2000" "CIAT_2001" "CIAT_2002" "CIAT_2003"
[36] "CIAT_2004" "CIAT_2005" "CIAT_2006" "CIAT_2007" "CIAT_2008"
[41] "CIAT_2009" "CIAT_2010" "CIAT_2011" "CIAT_2012" "CIAT_2013"
[46] "CIAT_2014" "CIAT_2015" "CIAT_2016" "CIAT_2017" "CIAT_2018"
[51] "CIAT_2019" "CIAT_2020" "CIAT_2021" "CIAT_2022" "CIAT_2023"
[56] "CIAT_2024" "CIAT"
Above you can see all the years of the trial that have been carried out by the Cassava program. For the actual example case, we will take the years from 1979 to 2022.
<- trials[str_starts(trials, "CIAT")]
trials trials
[1] "CIAT_1979" "CIAT_1980" "CIAT_1981" "CIAT_1982" "CIAT_1983" "CIAT_1984"
[7] "CIAT_1985" "CIAT_1986" "CIAT_1987" "CIAT_1988" "CIAT_1989" "CIAT_1990"
[13] "CIAT_1991" "CIAT_1992" "CIAT_1993" "CIAT_1994" "CIAT_1995" "CIAT_1996"
[19] "CIAT_1997" "CIAT_1998" "CIAT_1999" "CIAT_2000" "CIAT_2001" "CIAT_2002"
[25] "CIAT_2003" "CIAT_2004" "CIAT_2005" "CIAT_2006" "CIAT_2007" "CIAT_2008"
[31] "CIAT_2009" "CIAT_2010" "CIAT_2011" "CIAT_2012" "CIAT_2013" "CIAT_2014"
[37] "CIAT_2015" "CIAT_2016" "CIAT_2017" "CIAT_2018" "CIAT_2019" "CIAT_2020"
[43] "CIAT_2021" "CIAT_2022" "CIAT_2023" "CIAT_2024" "CIAT"
<- trials[-c(45:47)]
trials trials
[1] "CIAT_1979" "CIAT_1980" "CIAT_1981" "CIAT_1982" "CIAT_1983" "CIAT_1984"
[7] "CIAT_1985" "CIAT_1986" "CIAT_1987" "CIAT_1988" "CIAT_1989" "CIAT_1990"
[13] "CIAT_1991" "CIAT_1992" "CIAT_1993" "CIAT_1994" "CIAT_1995" "CIAT_1996"
[19] "CIAT_1997" "CIAT_1998" "CIAT_1999" "CIAT_2000" "CIAT_2001" "CIAT_2002"
[25] "CIAT_2003" "CIAT_2004" "CIAT_2005" "CIAT_2006" "CIAT_2007" "CIAT_2008"
[31] "CIAT_2009" "CIAT_2010" "CIAT_2011" "CIAT_2012" "CIAT_2013" "CIAT_2014"
[37] "CIAT_2015" "CIAT_2016" "CIAT_2017" "CIAT_2018" "CIAT_2019" "CIAT_2020"
[43] "CIAT_2021" "CIAT_2022"
Downloading process
The download process may take a short or long time depending on the number of trials to be downloaded.
# Función para procesar cada trial
<- function(trial) {
process_trial set_trial(trial)
<- get_trial_obs_ontology()
ontology <- list_studies()
STUDIES <- STUDIES %>% pull(studyName)
complete_studies
# Utilizar map para iterar sobre complete_studies y obtener datos de cada estudio
<- map(set_names(complete_studies), ~ {
study_data set_study(.x)
get_study_data()
})
return(study_data)
}
<- map(trials, process_trial)
raw_data
# flatten list
<- flatten(raw_data) raw_data
Convert list into a data.frame
= data.table::rbindlist(raw_data, fill = TRUE) %>%
all_raw as_tibble() %>%
filter(observationLevel == "plot")
# dimentions of the entire data set
dim(all_raw)
Save the file in .RDS file
= "hist_raw_data_"
trial_interest
= paste("01_", trial_interest,
meta_file_name.r Sys.Date(),".rds", sep = "")
saveRDS(all_raw, file = here::here("data", meta_file_name.r))