Data preparation

library(massdataset)
library(tidyverse)
library(metid)

ms1_data =
  readr::read_csv(file.path(
    system.file("ms1_peak", package = "metid"),
    "ms1.peak.table.csv"
  ))

ms1_data = data.frame(ms1_data, sample1 = 1, sample2 = 2)

expression_data = ms1_data %>%
  dplyr::select(-c(name:rt))

variable_info =
  ms1_data %>%
  dplyr::select(name:rt) %>%
  dplyr::rename(variable_id = name)

sample_info =
  data.frame(
    sample_id = colnames(expression_data),
    injection.order = c(1, 2),
    class = c("Subject", "Subject"),
    group = c("Subject", "Subject")
  )
rownames(expression_data) = variable_info$variable_id

object = create_mass_dataset(
  expression_data = expression_data,
  sample_info = sample_info,
  variable_info = variable_info
)

object
#> -------------------- 
#> massdataset version: 1.0.18 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 1 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-09-19 17:13:25

Add MS2 to mass_dataset object

path = "./example"
dir.create(path)

ms2_data <- system.file("ms2_data", package = "metid")
file.copy(
  from = file.path(ms2_data, "QC1_MSMS_NCE25.mgf"),
  to = path,
  overwrite = TRUE,
  recursive = TRUE
)
#> [1] FALSE

object =
  massdataset::mutate_ms2(
    object = object,
    column = "rp",
    polarity = "positive",
    ms1.ms2.match.mz.tol = 10,
    ms1.ms2.match.rt.tol = 30
  )

object
#> -------------------- 
#> massdataset version: 1.0.18 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 25 variables x 24 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 2 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-09-19 17:13:25
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2022-09-19 17:14:40

object@ms2_data
#> $`Mix_A_NCE25.mzXML;Mix_A_NCE25.mzXML;QC1_MSMS_NCE25_2.mgf;QC1_MSMS_NCE25.mgf`
#> -------------------- 
#> column: rp 
#> polarity: positive 
#> mz_tol: 10 
#> rt_tol (second): 30 
#> -------------------- 
#> 25 variables:
#> pRPLC_603 pRPLC_722 pRPLC_778 pRPLC_1046 pRPLC_1112...
#> 24 MS2 spectra.
#> mz162.112442157672rt37.9743312 mz181.072050304971rt226.14144 mz289.227264404297rt284.711172 mz181.072050673093rt196.800648 mz209.092155077047rt58.3735608...
#> 

Identify metabolites according to MS1

data("snyder_database_rplc0.0.3", package = "metid")
data_base <- snyder_database_rplc0.0.3
data_base@spectra.data <- list()
data_base@spectra.info$RT <- NA
object1 =
  annotate_metabolites_mass_dataset(object = object,
                                    database = data_base)
object1
#> -------------------- 
#> massdataset version: 1.0.18 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 25 variables x 24 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> 3 processings in total
#> create_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2022-09-19 17:13:25
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2022-09-19 17:14:40
#> annotate_metabolites_mass_dataset ---------- 
#>   Package                       Function.used                Time
#> 1   metid annotate_metabolites_mass_dataset() 2022-09-19 17:15:04

Identify metabolites according to MS2

data("snyder_database_rplc0.0.3", package = "metid")

object2 =
  annotate_metabolites_mass_dataset(object = object1,
                                    database = snyder_database_rplc0.0.3)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
head(object2@annotation_table)
#> 
[38;5;246m# A tibble: 6 × 18
[39m
#>   variable_id ms2_files_id  ms2_spectrum_id Compound.name CAS.ID HMDB.ID KEGG.ID
#>   
[3m
[38;5;246m<chr>
[39m
[23m       
[3m
[38;5;246m<chr>
[39m
[23m         
[3m
[38;5;246m<chr>
[39m
[23m           
[3m
[38;5;246m<chr>
[39m
[23m         
[3m
[38;5;246m<chr>
[39m
[23m  
[3m
[38;5;246m<chr>
[39m
[23m   
[3m
[38;5;246m<chr>
[39m
[23m  
#> 
[38;5;250m1
[39m pRPLC_10319 
[31mNA
[39m            
[31mNA
[39m              (+)-Catechin… 
[31mNA
[39m     
[31mNA
[39m      
[31mNA
[39m     
#> 
[38;5;250m2
[39m pRPLC_10319 
[31mNA
[39m            
[31mNA
[39m              (-)Epicatech… 
[31mNA
[39m     
[31mNA
[39m      
[31mNA
[39m     
#> 
[38;5;250m3
[39m pRPLC_1046  Mix_A_NCE25.… mz181.07205067… Theophylline  611-5… HMDB01… C07130 
#> 
[38;5;250m4
[39m pRPLC_1046  Mix_A_NCE25.… mz181.07205067… Paraxanthine  611-5… HMDB01… C13747 
#> 
[38;5;250m5
[39m pRPLC_1046  Mix_A_NCE25.… mz181.07205067… Theophylline  
[31mNA
[39m     HMDB00… 
[31mNA
[39m     
#> 
[38;5;250m6
[39m pRPLC_10514 
[31mNA
[39m            
[31mNA
[39m              CORTISONE     
[31mNA
[39m     
[31mNA
[39m      
[31mNA
[39m     
#> 
[38;5;246m# … with 11 more variables: Lab.ID <chr>, Adduct <chr>, mz.error <dbl>,
[39m
#> 
[38;5;246m#   mz.match.score <dbl>, RT.error <dbl>, RT.match.score <dbl>, CE <chr>,
[39m
#> 
[38;5;246m#   SS <dbl>, Total.score <dbl>, Database <chr>, Level <dbl>
[39m
head(extract_variable_info(object = object2))
#>   variable_id       mz      rt                           Compound.name
#> 1   pRPLC_376 472.3032 772.906 Chenodeoxycholic acid glycine conjugate
#> 2   pRPLC_391 466.3292 746.577            C18:0 AC (Stearoylcarnitine)
#> 3   pRPLC_603 162.1125  33.746                             L-Carnitine
#> 4   pRPLC_629 181.0720  36.360                             THEOBROMINE
#> 5   pRPLC_685 230.0701 158.205                          Pyridoxic acid
#> 6   pRPLC_722 181.0721 228.305                            Theophylline
#>      CAS.ID     HMDB.ID KEGG.ID   Lab.ID       Adduct  mz.error mz.match.score
#> 1  640-79-9   HMDB00637  C05466 RPLC_871      (M+Na)+ 0.2398883      0.9999540
#> 2 1976-27-8   HMDB00848       0 RPLC_692       (M+K)+ 3.8309850      0.9883275
#> 3  541-15-1   HMDB00062  C00318 RPLC_406       (M+H)+ 1.6678942      0.9977770
#> 4      <NA>        <NA>    <NA> RPLC_313       (M+H)+ 0.0265000      0.9999994
#> 5   82-82-6   HMDB00017  C00847 RPLC_469 (M+HCOO+2H)+ 9.1145000      0.9357010
#> 6      <NA> HMDB0001889    <NA> RPLC_443       (M+H)+ 1.6882624      0.9977224
#>    RT.error RT.match.score    CE        SS Total.score Database Level
#> 1        NA             NA  <NA>        NA   0.9999540 MS_0.0.2     3
#> 2        NA             NA  <NA>        NA   0.9883275 MS_0.0.2     3
#> 3  1.974331      0.9978368 NCE25 0.6048288   0.8013178 MS_0.0.2     1
#> 4        NA             NA  <NA>        NA   0.9999994 MS_0.0.2     3
#> 5        NA             NA  <NA>        NA   0.9357010 MS_0.0.2     3
#> 6 17.615671      0.8416462 NCE25 0.6071017   0.7633930 MS_0.0.2     1

Identify metabolites according another database

data("orbitrap_database0.0.3", package = "metid")

object3 =
  annotate_metabolites_mass_dataset(object = object2,
                                    database = orbitrap_database0.0.3)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
head(extract_variable_info(object = object3))
#>   variable_id       mz      rt                           Compound.name
#> 1   pRPLC_376 472.3032 772.906 Chenodeoxycholic acid glycine conjugate
#> 2   pRPLC_391 466.3292 746.577            C18:0 AC (Stearoylcarnitine)
#> 3   pRPLC_603 162.1125  33.746                             L-Carnitine
#> 4   pRPLC_629 181.0720  36.360                             THEOBROMINE
#> 5   pRPLC_685 230.0701 158.205                          Pyridoxic acid
#> 6   pRPLC_722 181.0721 228.305                            Theophylline
#>      CAS.ID     HMDB.ID KEGG.ID   Lab.ID       Adduct  mz.error mz.match.score
#> 1  640-79-9   HMDB00637  C05466 RPLC_871      (M+Na)+ 0.2398883      0.9999540
#> 2 1976-27-8   HMDB00848       0 RPLC_692       (M+K)+ 3.8309850      0.9883275
#> 3  541-15-1   HMDB00062  C00318 RPLC_406       (M+H)+ 1.6678942      0.9977770
#> 4      <NA>        <NA>    <NA> RPLC_313       (M+H)+ 0.0265000      0.9999994
#> 5   82-82-6   HMDB00017  C00847 RPLC_469 (M+HCOO+2H)+ 9.1145000      0.9357010
#> 6      <NA> HMDB0001889    <NA> RPLC_443       (M+H)+ 1.6882624      0.9977224
#>    RT.error RT.match.score    CE        SS Total.score Database Level
#> 1        NA             NA  <NA>        NA   0.9999540 MS_0.0.2     3
#> 2        NA             NA  <NA>        NA   0.9883275 MS_0.0.2     3
#> 3  1.974331      0.9978368 NCE25 0.6048288   0.8013178 MS_0.0.2     1
#> 4        NA             NA  <NA>        NA   0.9999994 MS_0.0.2     3
#> 5        NA             NA  <NA>        NA   0.9357010 MS_0.0.2     3
#> 6 17.615671      0.8416462 NCE25 0.6071017   0.7633930 MS_0.0.2     1

Session information

sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur ... 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] MSnbase_2.22.0      ProtGenerics_1.28.0 S4Vectors_0.34.0   
#>  [4] mzR_2.30.0          Rcpp_1.0.8.3        Biobase_2.56.0     
#>  [7] BiocGenerics_0.42.0 metid_1.2.24        forcats_0.5.1.9000 
#> [10] stringr_1.4.1       purrr_0.3.4         readr_2.1.2        
#> [13] tidyr_1.2.0         tibble_3.1.7        tidyverse_1.3.1    
#> [16] tinytools_0.9.1     dplyr_1.0.9         ggplot2_3.3.6      
#> [19] magrittr_2.0.3      masstools_1.0.8     massdataset_1.0.18 
#> 
#> loaded via a namespace (and not attached):
#>   [1] readxl_1.4.0                backports_1.4.1            
#>   [3] circlize_0.4.15             systemfonts_1.0.4          
#>   [5] plyr_1.8.7                  lazyeval_0.2.2             
#>   [7] listenv_0.8.0               BiocParallel_1.30.3        
#>   [9] GenomeInfoDb_1.32.2         Rdisop_1.56.0              
#>  [11] digest_0.6.29               foreach_1.5.2              
#>  [13] yulab.utils_0.0.5           htmltools_0.5.2            
#>  [15] fansi_1.0.3                 memoise_2.0.1              
#>  [17] cluster_2.1.3               doParallel_1.0.17          
#>  [19] tzdb_0.3.0                  openxlsx_4.2.5             
#>  [21] limma_3.52.2                globals_0.15.1             
#>  [23] ComplexHeatmap_2.12.1       modelr_0.1.8               
#>  [25] matrixStats_0.62.0          vroom_1.5.7                
#>  [27] pkgdown_2.0.6               prettyunits_1.1.1          
#>  [29] colorspace_2.0-3            rvest_1.0.2                
#>  [31] textshaping_0.3.6           haven_2.5.0                
#>  [33] xfun_0.31                   crayon_1.5.1               
#>  [35] RCurl_1.98-1.7              jsonlite_1.8.0             
#>  [37] impute_1.70.0               iterators_1.0.14           
#>  [39] glue_1.6.2                  gtable_0.3.0               
#>  [41] zlibbioc_1.42.0             XVector_0.36.0             
#>  [43] GetoptLong_1.0.5            DelayedArray_0.22.0        
#>  [45] shape_1.4.6                 scales_1.2.0               
#>  [47] vsn_3.64.0                  DBI_1.1.3                  
#>  [49] progress_1.2.2              viridisLite_0.4.0          
#>  [51] clue_0.3-61                 gridGraphics_0.5-1         
#>  [53] bit_4.0.4                   preprocessCore_1.58.0      
#>  [55] MsCoreUtils_1.8.0           htmlwidgets_1.5.4          
#>  [57] httr_1.4.3                  RColorBrewer_1.1-3         
#>  [59] ellipsis_0.3.2              pkgconfig_2.0.3            
#>  [61] XML_3.99-0.10               sass_0.4.1                 
#>  [63] dbplyr_2.2.1                utf8_1.2.2                 
#>  [65] ggplotify_0.1.0             tidyselect_1.1.2           
#>  [67] rlang_1.0.5                 munsell_0.5.0              
#>  [69] cellranger_1.1.0            tools_4.2.1                
#>  [71] cachem_1.0.6                cli_3.3.0                  
#>  [73] generics_0.1.3              broom_1.0.0                
#>  [75] evaluate_0.15               fastmap_1.1.0              
#>  [77] mzID_1.34.0                 yaml_2.3.5                 
#>  [79] ragg_1.2.2                  bit64_4.0.5                
#>  [81] knitr_1.39                  fs_1.5.2                   
#>  [83] zip_2.2.0                   ncdf4_1.19                 
#>  [85] future_1.26.1               pbapply_1.5-0              
#>  [87] xml2_1.3.3                  compiler_4.2.1             
#>  [89] rstudioapi_0.14             plotly_4.10.0              
#>  [91] png_0.1-7                   affyio_1.66.0              
#>  [93] reprex_2.0.1                bslib_0.3.1                
#>  [95] stringi_1.7.8               desc_1.4.1                 
#>  [97] lattice_0.20-45             Matrix_1.4-1               
#>  [99] ggsci_2.9                   vctrs_0.4.1                
#> [101] furrr_0.3.0                 pillar_1.7.0               
#> [103] lifecycle_1.0.1             BiocManager_1.30.18        
#> [105] jquerylib_0.1.4             MALDIquant_1.21            
#> [107] GlobalOptions_0.1.2         data.table_1.14.2          
#> [109] bitops_1.0-7                GenomicRanges_1.48.0       
#> [111] R6_2.5.1                    pcaMethods_1.88.0          
#> [113] affy_1.74.0                 parallelly_1.32.0          
#> [115] IRanges_2.30.0              codetools_0.2-18           
#> [117] MASS_7.3-57                 assertthat_0.2.1           
#> [119] SummarizedExperiment_1.26.1 rprojroot_2.0.3            
#> [121] rjson_0.2.21                withr_2.5.0                
#> [123] GenomeInfoDbData_1.2.8      parallel_4.2.1             
#> [125] hms_1.1.1                   grid_4.2.1                 
#> [127] rmarkdown_2.14              MatrixGenerics_1.8.1       
#> [129] lubridate_1.8.0