Data preparation

library(massdataset)
library(tidyverse)
library(metid)

ms1_data =
  readr::read_csv(file.path(
    system.file("ms1_peak", package = "metid"),
    "ms1.peak.table.csv"
  ))

ms1_data = data.frame(ms1_data, sample1 = 1, sample2 = 2)

expression_data = ms1_data %>%
  dplyr::select(-c(name:rt))

variable_info =
  ms1_data %>%
  dplyr::select(name:rt) %>%
  dplyr::rename(variable_id = name)

sample_info =
  data.frame(
    sample_id = colnames(expression_data),
    injection.order = c(1, 2),
    class = c("Subject", "Subject"),
    group = c("Subject", "Subject")
  )
rownames(expression_data) = variable_info$variable_id

object = create_mass_dataset(
  expression_data = expression_data,
  sample_info = sample_info,
  variable_info = variable_info
)

object
#> -------------------- 
#> massdataset version: 0.01 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 0 variables x 0 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> Creation ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2021-12-27 12:09:32

Add MS2 to mass_dataset object

path = "./example"
dir.create(path)

ms2_data <- system.file("ms2_data", package = "metid")
file.copy(
  from = file.path(ms2_data, "QC1_MSMS_NCE25.mgf"),
  to = path,
  overwrite = TRUE,
  recursive = TRUE
)
#> [1] TRUE

object =
  massdataset::mutate_ms2(
    object = object,
    column = "rp",
    polarity = "positive",
    ms1.ms2.match.mz.tol = 10,
    ms1.ms2.match.rt.tol = 30
  )
#> Reading mgf data...
#> 25 out of 100 variable have MS2 spectra.
#> Selecting the most intense MS2 spectrum for each peak...

object
#> -------------------- 
#> massdataset version: 0.01 
#> -------------------- 
#> 1.expression_data:[ 100 x 2 data.frame]
#> 2.sample_info:[ 2 x 4 data.frame]
#> 3.variable_info:[ 100 x 3 data.frame]
#> 4.sample_info_note:[ 4 x 2 data.frame]
#> 5.variable_info_note:[ 3 x 2 data.frame]
#> 6.ms2_data:[ 25 variables x 24 MS2 spectra]
#> -------------------- 
#> Processing information (extract_process_info())
#> Creation ---------- 
#>       Package         Function.used                Time
#> 1 massdataset create_mass_dataset() 2021-12-27 12:09:32
#> update_mass_dataset ---------- 
#>       Package         Function.used                Time
#> 1 massdataset update_mass_dataset() 2021-12-27 12:09:32
#> mutate_ms2 ---------- 
#>       Package Function.used                Time
#> 1 massdataset  mutate_ms2() 2021-12-27 12:09:36

object@ms2_data
#> $QC1_MSMS_NCE25.mgf
#> -------------------- 
#> column: rp 
#> polarity: positive 
#> mz_tol: 10 
#> rt_tol (second): 30 
#> -------------------- 
#> 25 variables:
#> pRPLC_603 pRPLC_722 pRPLC_778 pRPLC_1046 pRPLC_1112...
#> 24 MS2 spectra.
#> mz162.112442157672rt37.9743312 mz181.072050304971rt226.14144 mz289.227264404297rt284.711172 mz181.072050673093rt196.800648 mz209.092155077047rt58.3735608...
#> 

Identify metabolites according to MS2

data("snyder_database_rplc0.0.3", package = "metid")

object1 =
  annotate_metabolites_mass_dataset(object = object,
                                    database = snyder_database_rplc0.0.3)
#> 
[33mQC1_MSMS_NCE25.mgf file:
#> 
[39m
[32m25 MS2 spectra.
#> 
[39m
[33mUse all CE values.
#> 
[39m
#> 
[32mIdentifing metabolites with MS/MS database...
#> 
[39m
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |======================================================================| 100%
#> 
#> 
[41mAll done.
#> 
[49m
head(object1@annotation_table)
#>   variable_id       ms2_files_id                ms2_spectrum_id Compound.name
#> 1  pRPLC_1046 QC1_MSMS_NCE25.mgf mz181.072050673093rt196.800648  Theophylline
#> 2  pRPLC_1046 QC1_MSMS_NCE25.mgf mz181.072050673093rt196.800648  Paraxanthine
#> 3  pRPLC_1046 QC1_MSMS_NCE25.mgf mz181.072050673093rt196.800648  Theophylline
#> 4  pRPLC_1112 QC1_MSMS_NCE25.mgf mz209.092155077047rt58.3735608  L-KYNURENINE
#> 5  pRPLC_1307 QC1_MSMS_NCE25.mgf mz314.232707486156rt400.268664      C10:1 AC
#> 6  pRPLC_1860 QC1_MSMS_NCE25.mgf   mz249.185015539689rt579.6807      C16:4 FA
#>     CAS.ID     HMDB.ID KEGG.ID   Lab.ID Adduct  mz.error mz.match.score
#> 1 611-59-6   HMDB01889  C07130 RPLC_732 (M+H)+ 2.4391827      0.9952516
#> 2 611-59-6   HMDB01860  C13747 RPLC_456 (M+H)+ 1.9391827      0.9969962
#> 3     <NA> HMDB0001889    <NA> RPLC_443 (M+H)+ 1.6891827      0.9977199
#> 4     <NA>        <NA>    <NA> RPLC_252 (M+H)+ 0.1626926      0.9999788
#> 5     <NA>   HMDB13205    <NA> RPLC_385 (M+H)+ 1.0812154      0.9990652
#> 6     <NA>        <NA>    <NA> RPLC_529 (M+H)+ 1.0236508      0.9991621
#>     RT.error RT.match.score    CE        SS Total.score Database Level
#> 1  4.8006480      0.9872782 NCE25 0.7523276   0.8717963 MS_0.0.2     1
#> 2  6.4283218      0.9773041 NCE25 0.7523276   0.8697389 MS_0.0.2     1
#> 3 11.7251207      0.9264669 NCE25 0.7523276   0.8572105 MS_0.0.2     1
#> 4  0.4264392      0.9998990 NCE25 0.6469087   0.8234238 MS_0.0.2     1
#> 5 29.0070282      0.6265989 NCE25 0.7931137   0.8029729 MS_0.0.2     1
#> 6 15.6807000      0.8723170 NCE25 0.7315045   0.8336220 MS_0.0.2     1
head(extract_variable_info(object = object1))
#>   variable_id       mz      rt Compound.name   CAS.ID     HMDB.ID KEGG.ID
#> 1   pRPLC_376 472.3032 772.906          <NA>     <NA>        <NA>    <NA>
#> 2   pRPLC_391 466.3292 746.577          <NA>     <NA>        <NA>    <NA>
#> 3   pRPLC_603 162.1125  33.746   L-Carnitine 541-15-1   HMDB00062  C00318
#> 4   pRPLC_629 181.0720  36.360          <NA>     <NA>        <NA>    <NA>
#> 5   pRPLC_685 230.0701 158.205          <NA>     <NA>        <NA>    <NA>
#> 6   pRPLC_722 181.0721 228.305  Theophylline     <NA> HMDB0001889    <NA>
#>     Lab.ID Adduct mz.error mz.match.score  RT.error RT.match.score    CE
#> 1     <NA>   <NA>       NA             NA        NA             NA  <NA>
#> 2     <NA>   <NA>       NA             NA        NA             NA  <NA>
#> 3 RPLC_406 (M+H)+ 1.667894      0.9977770  1.974331      0.9978368 NCE25
#> 4     <NA>   <NA>       NA             NA        NA             NA  <NA>
#> 5     <NA>   <NA>       NA             NA        NA             NA  <NA>
#> 6 RPLC_443 (M+H)+ 1.688262      0.9977224 17.615671      0.8416462 NCE25
#>          SS Total.score Database Level
#> 1        NA          NA     <NA>    NA
#> 2        NA          NA     <NA>    NA
#> 3 0.6048288   0.8013178 MS_0.0.2     1
#> 4        NA          NA     <NA>    NA
#> 5        NA          NA     <NA>    NA
#> 6 0.6071017   0.7633930 MS_0.0.2     1

MS2 plot

ms2_plot_mass_dataset(object = object1, variable_index = 1, database = snyder_database_rplc0.0.3)
#> pRPLC_376 has no annotation with MS2.
#> NULL
ms2_plot_mass_dataset(object = object1, variable_index = 3, database = snyder_database_rplc0.0.3)
#> $pRPLC_603_1

Session information

sessionInfo()
#> R version 4.1.0 (2021-05-18)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] metid_1.2.1        forcats_0.5.1      stringr_1.4.0      dplyr_1.0.7       
#>  [5] purrr_0.3.4        readr_2.0.0        tidyr_1.1.3        tibble_3.1.3      
#>  [9] ggplot2_3.3.5      tidyverse_1.3.1    magrittr_2.0.1     tinytools_0.9.1   
#> [13] massdataset_0.99.1
#> 
#> loaded via a namespace (and not attached):
#>   [1] colorspace_2.0-2      rjson_0.2.20          ellipsis_0.3.2       
#>   [4] leaflet_2.0.4.1       rprojroot_2.0.2       circlize_0.4.14      
#>   [7] GlobalOptions_0.1.2   fs_1.5.0              clue_0.3-59          
#>  [10] rstudioapi_0.13       listenv_0.8.0         furrr_0.2.3          
#>  [13] mzR_2.26.1            affyio_1.62.0         fansi_0.5.0          
#>  [16] lubridate_1.7.10      xml2_1.3.2            codetools_0.2-18     
#>  [19] ncdf4_1.17            doParallel_1.0.16     cachem_1.0.5         
#>  [22] impute_1.66.0         knitr_1.33            jsonlite_1.7.2       
#>  [25] Cairo_1.5-12.2        broom_0.7.9           vsn_3.60.0           
#>  [28] cluster_2.1.2         dbplyr_2.1.1          png_0.1-7            
#>  [31] BiocManager_1.30.16   compiler_4.1.0        httr_1.4.2           
#>  [34] rvcheck_0.1.8         backports_1.2.1       assertthat_0.2.1     
#>  [37] fastmap_1.1.0         lazyeval_0.2.2        limma_3.48.1         
#>  [40] cli_3.0.1             htmltools_0.5.2       tools_4.1.0          
#>  [43] affy_1.70.0           gtable_0.3.0          glue_1.4.2           
#>  [46] MALDIquant_1.20       Rcpp_1.0.7            Biobase_2.52.0       
#>  [49] cellranger_1.1.0      jquerylib_0.1.4       pkgdown_2.0.1        
#>  [52] vctrs_0.3.8           preprocessCore_1.54.0 iterators_1.0.13     
#>  [55] crosstalk_1.1.1       xfun_0.24             globals_0.14.0       
#>  [58] openxlsx_4.2.4        rvest_1.0.1           lifecycle_1.0.0      
#>  [61] XML_3.99-0.6          future_1.21.0         zlibbioc_1.38.0      
#>  [64] MASS_7.3-54           scales_1.1.1          MSnbase_2.18.0       
#>  [67] pcaMethods_1.84.0     ragg_1.1.3            hms_1.1.0            
#>  [70] ProtGenerics_1.24.0   parallel_4.1.0        RColorBrewer_1.1-2   
#>  [73] ComplexHeatmap_2.8.0  yaml_2.2.1            memoise_2.0.0        
#>  [76] pbapply_1.4-3         sass_0.4.0            stringi_1.7.3        
#>  [79] S4Vectors_0.30.0      desc_1.3.0            foreach_1.5.1        
#>  [82] BiocGenerics_0.38.0   zip_2.2.0             BiocParallel_1.26.1  
#>  [85] shape_1.4.6           rlang_0.4.11          pkgconfig_2.0.3      
#>  [88] systemfonts_1.0.2     matrixStats_0.60.0    mzID_1.30.0          
#>  [91] evaluate_0.14         lattice_0.20-44       htmlwidgets_1.5.3    
#>  [94] tidyselect_1.1.1      parallelly_1.27.0     ggsci_2.9            
#>  [97] plyr_1.8.6            R6_2.5.0              IRanges_2.26.0       
#> [100] generics_0.1.0        DBI_1.1.1             pillar_1.6.2         
#> [103] haven_2.4.1           withr_2.4.2           MsCoreUtils_1.4.0    
#> [106] modelr_0.1.8          crayon_1.4.1          utf8_1.2.2           
#> [109] plotly_4.9.4.1        tzdb_0.1.2            rmarkdown_2.9        
#> [112] GetoptLong_1.0.5      grid_4.1.0            readxl_1.3.1         
#> [115] data.table_1.14.0     reprex_2.0.0          digest_0.6.27        
#> [118] gridGraphics_0.5-1    textshaping_0.3.6     stats4_4.1.0         
#> [121] munsell_0.5.0         viridisLite_0.4.0     ggplotify_0.0.8      
#> [124] bslib_0.3.1