vignettes/work_with_mass_dataset.Rmd
work_with_mass_dataset.Rmd
library(massdataset)
library(tidyverse)
library(metid)
ms1_data =
readr::read_csv(file.path(
system.file("ms1_peak", package = "metid"),
"ms1.peak.table.csv"
))
ms1_data = data.frame(ms1_data, sample1 = 1, sample2 = 2)
expression_data = ms1_data %>%
dplyr::select(-c(name:rt))
variable_info =
ms1_data %>%
dplyr::select(name:rt) %>%
dplyr::rename(variable_id = name)
sample_info =
data.frame(
sample_id = colnames(expression_data),
injection.order = c(1, 2),
class = c("Subject", "Subject"),
group = c("Subject", "Subject")
)
rownames(expression_data) = variable_info$variable_id
object = create_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info
)
object
#> --------------------
#> massdataset version: 1.0.18
#> --------------------
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> 1 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-09-19 17:13:25
mass_dataset
object
path = "./example"
dir.create(path)
ms2_data <- system.file("ms2_data", package = "metid")
file.copy(
from = file.path(ms2_data, "QC1_MSMS_NCE25.mgf"),
to = path,
overwrite = TRUE,
recursive = TRUE
)
#> [1] FALSE
object =
massdataset::mutate_ms2(
object = object,
column = "rp",
polarity = "positive",
ms1.ms2.match.mz.tol = 10,
ms1.ms2.match.rt.tol = 30
)
object
#> --------------------
#> massdataset version: 1.0.18
#> --------------------
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 25 variables x 24 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> 2 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-09-19 17:13:25
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2022-09-19 17:14:40
object@ms2_data
#> $`Mix_A_NCE25.mzXML;Mix_A_NCE25.mzXML;QC1_MSMS_NCE25_2.mgf;QC1_MSMS_NCE25.mgf`
#> --------------------
#> column: rp
#> polarity: positive
#> mz_tol: 10
#> rt_tol (second): 30
#> --------------------
#> 25 variables:
#> pRPLC_603 pRPLC_722 pRPLC_778 pRPLC_1046 pRPLC_1112...
#> 24 MS2 spectra.
#> mz162.112442157672rt37.9743312 mz181.072050304971rt226.14144 mz289.227264404297rt284.711172 mz181.072050673093rt196.800648 mz209.092155077047rt58.3735608...
#>
data("snyder_database_rplc0.0.3", package = "metid")
data_base <- snyder_database_rplc0.0.3
data_base@spectra.data <- list()
data_base@spectra.info$RT <- NA
object1 =
annotate_metabolites_mass_dataset(object = object,
database = data_base)
object1
#> --------------------
#> massdataset version: 1.0.18
#> --------------------
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 25 variables x 24 MS2 spectra]
#> --------------------
#> Processing information (extract_process_info())
#> 3 processings in total
#> create_mass_dataset ----------
#> Package Function.used Time
#> 1 massdataset create_mass_dataset() 2022-09-19 17:13:25
#> mutate_ms2 ----------
#> Package Function.used Time
#> 1 massdataset mutate_ms2() 2022-09-19 17:14:40
#> annotate_metabolites_mass_dataset ----------
#> Package Function.used Time
#> 1 metid annotate_metabolites_mass_dataset() 2022-09-19 17:15:04
data("snyder_database_rplc0.0.3", package = "metid")
=
object2 annotate_metabolites_mass_dataset(object = object1,
database = snyder_database_rplc0.0.3)
#>
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|======== | 12%
|
|=========== | 16%
|
|============== | 20%
|
|================= | 24%
|
|==================== | 28%
|
|====================== | 32%
|
|========================= | 36%
|
|============================ | 40%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================== | 60%
|
|============================================= | 64%
|
|================================================ | 68%
|
|================================================== | 72%
|
|===================================================== | 76%
|
|======================================================== | 80%
|
|=========================================================== | 84%
|
|============================================================== | 88%
|
|================================================================ | 92%
|
|=================================================================== | 96%
|
|======================================================================| 100%
head(object2@annotation_table)
#>
[38;5;246m# A tibble: 6 × 18
[39m
#> variable_id ms2_files_id ms2_spectrum_id Compound.name CAS.ID HMDB.ID KEGG.ID
#>
[3m
[38;5;246m<chr>
[39m
[23m
[3m
[38;5;246m<chr>
[39m
[23m
[3m
[38;5;246m<chr>
[39m
[23m
[3m
[38;5;246m<chr>
[39m
[23m
[3m
[38;5;246m<chr>
[39m
[23m
[3m
[38;5;246m<chr>
[39m
[23m
[3m
[38;5;246m<chr>
[39m
[23m
#>
[38;5;250m1
[39m pRPLC_10319
[31mNA
[39m
[31mNA
[39m (+)-Catechin…
[31mNA
[39m
[31mNA
[39m
[31mNA
[39m
#>
[38;5;250m2
[39m pRPLC_10319
[31mNA
[39m
[31mNA
[39m (-)Epicatech…
[31mNA
[39m
[31mNA
[39m
[31mNA
[39m
#>
[38;5;250m3
[39m pRPLC_1046 Mix_A_NCE25.… mz181.07205067… Theophylline 611-5… HMDB01… C07130
#>
[38;5;250m4
[39m pRPLC_1046 Mix_A_NCE25.… mz181.07205067… Paraxanthine 611-5… HMDB01… C13747
#>
[38;5;250m5
[39m pRPLC_1046 Mix_A_NCE25.… mz181.07205067… Theophylline
[31mNA
[39m HMDB00…
[31mNA
[39m
#>
[38;5;250m6
[39m pRPLC_10514
[31mNA
[39m
[31mNA
[39m CORTISONE
[31mNA
[39m
[31mNA
[39m
[31mNA
[39m
#>
[38;5;246m# … with 11 more variables: Lab.ID <chr>, Adduct <chr>, mz.error <dbl>,
[39m
#>
[38;5;246m# mz.match.score <dbl>, RT.error <dbl>, RT.match.score <dbl>, CE <chr>,
[39m
#>
[38;5;246m# SS <dbl>, Total.score <dbl>, Database <chr>, Level <dbl>
[39m
head(extract_variable_info(object = object2))
#> variable_id mz rt Compound.name
#> 1 pRPLC_376 472.3032 772.906 Chenodeoxycholic acid glycine conjugate
#> 2 pRPLC_391 466.3292 746.577 C18:0 AC (Stearoylcarnitine)
#> 3 pRPLC_603 162.1125 33.746 L-Carnitine
#> 4 pRPLC_629 181.0720 36.360 THEOBROMINE
#> 5 pRPLC_685 230.0701 158.205 Pyridoxic acid
#> 6 pRPLC_722 181.0721 228.305 Theophylline
#> CAS.ID HMDB.ID KEGG.ID Lab.ID Adduct mz.error mz.match.score
#> 1 640-79-9 HMDB00637 C05466 RPLC_871 (M+Na)+ 0.2398883 0.9999540
#> 2 1976-27-8 HMDB00848 0 RPLC_692 (M+K)+ 3.8309850 0.9883275
#> 3 541-15-1 HMDB00062 C00318 RPLC_406 (M+H)+ 1.6678942 0.9977770
#> 4 <NA> <NA> <NA> RPLC_313 (M+H)+ 0.0265000 0.9999994
#> 5 82-82-6 HMDB00017 C00847 RPLC_469 (M+HCOO+2H)+ 9.1145000 0.9357010
#> 6 <NA> HMDB0001889 <NA> RPLC_443 (M+H)+ 1.6882624 0.9977224
#> RT.error RT.match.score CE SS Total.score Database Level
#> 1 NA NA <NA> NA 0.9999540 MS_0.0.2 3
#> 2 NA NA <NA> NA 0.9883275 MS_0.0.2 3
#> 3 1.974331 0.9978368 NCE25 0.6048288 0.8013178 MS_0.0.2 1
#> 4 NA NA <NA> NA 0.9999994 MS_0.0.2 3
#> 5 NA NA <NA> NA 0.9357010 MS_0.0.2 3
#> 6 17.615671 0.8416462 NCE25 0.6071017 0.7633930 MS_0.0.2 1
data("orbitrap_database0.0.3", package = "metid")
=
object3 annotate_metabolites_mass_dataset(object = object2,
database = orbitrap_database0.0.3)
#>
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|======== | 12%
|
|=========== | 16%
|
|============== | 20%
|
|================= | 24%
|
|==================== | 28%
|
|====================== | 32%
|
|========================= | 36%
|
|============================ | 40%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================== | 60%
|
|============================================= | 64%
|
|================================================ | 68%
|
|================================================== | 72%
|
|===================================================== | 76%
|
|======================================================== | 80%
|
|=========================================================== | 84%
|
|============================================================== | 88%
|
|================================================================ | 92%
|
|=================================================================== | 96%
|
|======================================================================| 100%
head(extract_variable_info(object = object3))
#> variable_id mz rt Compound.name
#> 1 pRPLC_376 472.3032 772.906 Chenodeoxycholic acid glycine conjugate
#> 2 pRPLC_391 466.3292 746.577 C18:0 AC (Stearoylcarnitine)
#> 3 pRPLC_603 162.1125 33.746 L-Carnitine
#> 4 pRPLC_629 181.0720 36.360 THEOBROMINE
#> 5 pRPLC_685 230.0701 158.205 Pyridoxic acid
#> 6 pRPLC_722 181.0721 228.305 Theophylline
#> CAS.ID HMDB.ID KEGG.ID Lab.ID Adduct mz.error mz.match.score
#> 1 640-79-9 HMDB00637 C05466 RPLC_871 (M+Na)+ 0.2398883 0.9999540
#> 2 1976-27-8 HMDB00848 0 RPLC_692 (M+K)+ 3.8309850 0.9883275
#> 3 541-15-1 HMDB00062 C00318 RPLC_406 (M+H)+ 1.6678942 0.9977770
#> 4 <NA> <NA> <NA> RPLC_313 (M+H)+ 0.0265000 0.9999994
#> 5 82-82-6 HMDB00017 C00847 RPLC_469 (M+HCOO+2H)+ 9.1145000 0.9357010
#> 6 <NA> HMDB0001889 <NA> RPLC_443 (M+H)+ 1.6882624 0.9977224
#> RT.error RT.match.score CE SS Total.score Database Level
#> 1 NA NA <NA> NA 0.9999540 MS_0.0.2 3
#> 2 NA NA <NA> NA 0.9883275 MS_0.0.2 3
#> 3 1.974331 0.9978368 NCE25 0.6048288 0.8013178 MS_0.0.2 1
#> 4 NA NA <NA> NA 0.9999994 MS_0.0.2 3
#> 5 NA NA <NA> NA 0.9357010 MS_0.0.2 3
#> 6 17.615671 0.8416462 NCE25 0.6071017 0.7633930 MS_0.0.2 1
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur ... 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] MSnbase_2.22.0 ProtGenerics_1.28.0 S4Vectors_0.34.0
#> [4] mzR_2.30.0 Rcpp_1.0.8.3 Biobase_2.56.0
#> [7] BiocGenerics_0.42.0 metid_1.2.24 forcats_0.5.1.9000
#> [10] stringr_1.4.1 purrr_0.3.4 readr_2.1.2
#> [13] tidyr_1.2.0 tibble_3.1.7 tidyverse_1.3.1
#> [16] tinytools_0.9.1 dplyr_1.0.9 ggplot2_3.3.6
#> [19] magrittr_2.0.3 masstools_1.0.8 massdataset_1.0.18
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.4.0 backports_1.4.1
#> [3] circlize_0.4.15 systemfonts_1.0.4
#> [5] plyr_1.8.7 lazyeval_0.2.2
#> [7] listenv_0.8.0 BiocParallel_1.30.3
#> [9] GenomeInfoDb_1.32.2 Rdisop_1.56.0
#> [11] digest_0.6.29 foreach_1.5.2
#> [13] yulab.utils_0.0.5 htmltools_0.5.2
#> [15] fansi_1.0.3 memoise_2.0.1
#> [17] cluster_2.1.3 doParallel_1.0.17
#> [19] tzdb_0.3.0 openxlsx_4.2.5
#> [21] limma_3.52.2 globals_0.15.1
#> [23] ComplexHeatmap_2.12.1 modelr_0.1.8
#> [25] matrixStats_0.62.0 vroom_1.5.7
#> [27] pkgdown_2.0.6 prettyunits_1.1.1
#> [29] colorspace_2.0-3 rvest_1.0.2
#> [31] textshaping_0.3.6 haven_2.5.0
#> [33] xfun_0.31 crayon_1.5.1
#> [35] RCurl_1.98-1.7 jsonlite_1.8.0
#> [37] impute_1.70.0 iterators_1.0.14
#> [39] glue_1.6.2 gtable_0.3.0
#> [41] zlibbioc_1.42.0 XVector_0.36.0
#> [43] GetoptLong_1.0.5 DelayedArray_0.22.0
#> [45] shape_1.4.6 scales_1.2.0
#> [47] vsn_3.64.0 DBI_1.1.3
#> [49] progress_1.2.2 viridisLite_0.4.0
#> [51] clue_0.3-61 gridGraphics_0.5-1
#> [53] bit_4.0.4 preprocessCore_1.58.0
#> [55] MsCoreUtils_1.8.0 htmlwidgets_1.5.4
#> [57] httr_1.4.3 RColorBrewer_1.1-3
#> [59] ellipsis_0.3.2 pkgconfig_2.0.3
#> [61] XML_3.99-0.10 sass_0.4.1
#> [63] dbplyr_2.2.1 utf8_1.2.2
#> [65] ggplotify_0.1.0 tidyselect_1.1.2
#> [67] rlang_1.0.5 munsell_0.5.0
#> [69] cellranger_1.1.0 tools_4.2.1
#> [71] cachem_1.0.6 cli_3.3.0
#> [73] generics_0.1.3 broom_1.0.0
#> [75] evaluate_0.15 fastmap_1.1.0
#> [77] mzID_1.34.0 yaml_2.3.5
#> [79] ragg_1.2.2 bit64_4.0.5
#> [81] knitr_1.39 fs_1.5.2
#> [83] zip_2.2.0 ncdf4_1.19
#> [85] future_1.26.1 pbapply_1.5-0
#> [87] xml2_1.3.3 compiler_4.2.1
#> [89] rstudioapi_0.14 plotly_4.10.0
#> [91] png_0.1-7 affyio_1.66.0
#> [93] reprex_2.0.1 bslib_0.3.1
#> [95] stringi_1.7.8 desc_1.4.1
#> [97] lattice_0.20-45 Matrix_1.4-1
#> [99] ggsci_2.9 vctrs_0.4.1
#> [101] furrr_0.3.0 pillar_1.7.0
#> [103] lifecycle_1.0.1 BiocManager_1.30.18
#> [105] jquerylib_0.1.4 MALDIquant_1.21
#> [107] GlobalOptions_0.1.2 data.table_1.14.2
#> [109] bitops_1.0-7 GenomicRanges_1.48.0
#> [111] R6_2.5.1 pcaMethods_1.88.0
#> [113] affy_1.74.0 parallelly_1.32.0
#> [115] IRanges_2.30.0 codetools_0.2-18
#> [117] MASS_7.3-57 assertthat_0.2.1
#> [119] SummarizedExperiment_1.26.1 rprojroot_2.0.3
#> [121] rjson_0.2.21 withr_2.5.0
#> [123] GenomeInfoDbData_1.2.8 parallel_4.2.1
#> [125] hms_1.1.1 grid_4.2.1
#> [127] rmarkdown_2.14 MatrixGenerics_1.8.1
#> [129] lubridate_1.8.0