Daniel Vaulot
2021-02-05
List
Matrix
Factors
Data frames
df <- data.frame(label = letters[1:6], id = 1:6, value = rnorm(6, mean = 0, sd = 1), flag=c(TRUE, FALSE), # recycling rule stringsAsFactors = FALSE) df
label id value flag1 a 1 1.3952912 TRUE2 b 2 0.4437818 FALSE3 c 3 -0.5267217 TRUE4 d 4 -0.8592721 FALSE5 e 5 1.1239631 TRUE6 f 6 -1.1016689 FALSEstringsAsFactors = FALSE dim(df) # returns the dimensions of data frame
[1] 6 4nrow(df) # number of rows
[1] 6ncol(df) # number of columns
[1] 4str(df) # structure of data frame - name, type and preview of data in each column
'data.frame': 6 obs. of 4 variables: $ label: chr "a" "b" "c" "d" ... $ id : int 1 2 3 4 5 6 $ value: num 1.395 0.444 -0.527 -0.859 1.124 ... $ flag : logi TRUE FALSE TRUE FALSE TRUE FALSEcolnames(df) # columns names
[1] "label" "id" "value" "flag"df[i,j] notation, first index corresponds to row, second index to columndf[5, 3]
[1] 1.123963df[i,j] notation, first index corresponds to row, second index to columndf[5, 3]
[1] 1.123963df[5, "value"]
[1] 1.123963df[i,j] notationdf[, 3]
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689df[, "value"]
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689$notationdf$value
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689$notationdf$value
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689$ for the column, [i] for the rowdf$value[5]
[1] 1.123963df[i,j] notationdf[1, ]
label id value flag1 a 1 1.395291 TRUE df[df$id <= 3,]
label id value flag1 a 1 1.3952912 TRUE2 b 2 0.4437818 FALSE3 c 3 -0.5267217 TRUESelect lines for which the label is c
df[df$id <= 3,]
label id value flag1 a 1 1.3952912 TRUE2 b 2 0.4437818 FALSE3 c 3 -0.5267217 TRUESelect lines for which the label is c
df[df$label == "c", ]
label id value flag3 c 3 -0.5267217 TRUEThis syntax is complicated - tidyverse packages make it much more easy to manipulate and remember
R-session-04.zipR-session-04-data_wrangling.RR for data science: (Chapter 5)
Cheat sheets


library("readxl") # Import the data from Excel filelibrary("readr") # Import the data from Excel filelibrary("dplyr") # filter and reformat data frameslibrary("tidyr") # make data tidylibrary("stringr") # manipulate stringslibrary("lubridate") # manipulate date library("ggplot2") # graphics






samples <- readr::read_tsv("data/CARBOM data.txt")samples <- readr::read_tsv("data/CARBOM data.txt")
| sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 13/11/2013 | 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
| 11 | 1 | 85 | 13/11/2013 | 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
| 120 | 2 | 96 | 18/11/2013 | 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
| 121 | 2 | 18/11/2013 | 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
| 122 | 2 | 18/11/2013 | 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
| 125 | 2 | 98 | 18/11/2013 | 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
| 126 | 2 | 18/11/2013 | 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
| 127 | 2 | 18/11/2013 | 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
| 13 | 1 | 86 | 13/11/2013 | 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
| 140 | 2 | 101 | 18/11/2013 | 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
samples <- readr::read_tsv("data/CARBOM data.txt")
| sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 13/11/2013 | 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
| 11 | 1 | 85 | 13/11/2013 | 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
| 120 | 2 | 96 | 18/11/2013 | 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
| 121 | 2 | 18/11/2013 | 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
| 122 | 2 | 18/11/2013 | 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
| 125 | 2 | 98 | 18/11/2013 | 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
| 126 | 2 | 18/11/2013 | 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
| 127 | 2 | 18/11/2013 | 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
| 13 | 1 | 86 | 13/11/2013 | 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
| 140 | 2 | 101 | 18/11/2013 | 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
readr::read_csv() : read comma delimited files
readr::write_tsv() : write tab delimited files

samples <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_boat")samples <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_boat")
| sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
| 121 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
| 122 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
| 126 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
| 127 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
| sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
| 121 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
| 122 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
| 126 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
| 127 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
samples <- tidyr::fill(samples, station)samples <- tidyr::fill(samples, station)
| sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
| 121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 |
| 122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
| 126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 |
| 127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
readr::write_tsv(samples, "data/CARBOM data fixed.tsv")readr::write_tsv(samples, "data/CARBOM data fixed.tsv")
openxlsx::write.xlsx : write tab delimited files
Many options: specific sheet, formatting etc...
openxlsx::write.xlsx(samples, "data/CARBOM data fixed.xlsx")
@allison_horst
colnames(samples)
[1] "sample number" "transect" "station" "date" [5] "time" "depth" "level" "latitude" [9] "longitude" "picoeuks" "nanoeuks" "phosphates" [13] "nitrates" "temperature" "salinity"summary(samples$depth)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 5.0 5.0 50.0 56.6 100.0 140.0 3samples_select <- dplyr::select(samples, transect, `sample number`, station, depth, latitude, longitude, picoeuks, nanoeuks)samples_select <- dplyr::select(samples, transect, `sample number`, station, depth, latitude, longitude, picoeuks, nanoeuks)
| transect | sample number | station | depth | latitude | longitude | picoeuks | nanoeuks |
|---|---|---|---|---|---|---|---|
| 1 | 10 | 81 | 140 | -27.42 | -44.72 | 3278 | 1232 |
| 1 | 11 | 85 | 110 | -26.80 | -45.30 | 16312 | 1615 |
| 2 | 120 | 96 | 5 | -27.39 | -47.82 | 1150 | 75 |
| 2 | 121 | 96 | 30 | -27.39 | -47.82 | 1737 | 218 |
| 2 | 122 | 96 | 50 | -27.39 | -47.82 | 853 | 234 |
| 2 | 125 | 98 | 5 | -27.59 | -47.39 | 3086 | 1300 |
| 2 | 126 | 98 | 50 | -27.59 | -47.39 | 1217 | 782 |
| 2 | 127 | 98 | 85 | -27.59 | -47.39 | 3420 | 226 |
| 1 | 13 | 86 | 105 | -26.33 | -45.41 | 6366 | 1007 |
| 2 | 140 | 101 | 5 | -27.79 | -46.96 | 500 | 366 |
samples_select <- dplyr::select(samples, transect:nanoeuks)samples_select <- dplyr::select(samples, transect:nanoeuks)
| transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
| 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
| 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
| 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
| 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
| 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
| 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
samples_select <- dplyr::select(samples, -nitrates, -phosphates)samples_select <- dplyr::select(samples, -nitrates, -phosphates)
| sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 17.3 | 35.9 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 21.3 | 36.5 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 23.1 | 33.5 |
| 121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 22.6 | 33.7 |
| 122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 20.3 | 35.9 |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 23.1 | 35.7 |
| 126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 23.7 | 37.2 |
| 127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 22.9 | 37.0 |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 20.9 | 36.3 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 23.5 | 36.5 |
samples_select <- samples %>% dplyr::select(transect:nanoeuks)samples_select <- samples %>% dplyr::select(transect:nanoeuks)
| transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
| 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
| 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
| 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
| 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
| 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
| 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
samples_select <- samples %>% dplyr::select(transect:nanoeuks)samples <- samples %>% dplyr::rename(sample_number = `sample number`)samples <- samples %>% dplyr::rename(sample_number = `sample number`)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
| 121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 |
| 122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
| 126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 |
| 127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
samples <- samples %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100) samples <- samples %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 | 93.87755 |
| 121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | 88.84910 |
| 122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | 78.47286 |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 | 70.36024 |
| 126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | 60.88044 |
| 127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | 93.80143 |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 | 57.73672 |
samples_select <- samples %>% dplyr::select(sample_number:nanoeuks, level) %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100) samples_select <- samples %>% dplyr::select(sample_number:nanoeuks, level) %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | pico_pct |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 72.68293 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 90.99124 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 93.87755 |
| 121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 88.84910 |
| 122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 78.47286 |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 70.36024 |
| 126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 60.88044 |
| 127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 93.80143 |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 86.34206 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 57.73672 |
samples <- samples %>% dplyr::mutate(sample_label = str_c("TR",transect,"St",station, sep="_")) samples <- samples %>% dplyr::mutate(sample_label = str_c("TR",transect,"St",station, sep="_"))
| sample_number | transect | station | date | time | sample_label |
|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | TR_1_St_81 |
| 11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | TR_1_St_85 |
| 120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | TR_2_St_96 |
| 121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | TR_2_St_96 |
| 122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | TR_2_St_96 |
| 125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | TR_2_St_98 |
| 126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | TR_2_St_98 |
| 127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | TR_2_St_98 |
| 13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | TR_1_St_86 |
| 140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | TR_2_St_101 |
samples <- samples %>% dplyr::mutate(time = str_c(lubridate::hour(time), lubridate::minute(time), sep=":")) samples <- samples %>% dplyr::mutate(time = str_c(lubridate::hour(time), lubridate::minute(time), sep=":"))
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
| 120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 | 93.87755 |
| 121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | 88.84910 |
| 122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | 78.47286 |
| 125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 | 70.36024 |
| 126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | 60.88044 |
| 127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | 93.80143 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
| 140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 | 57.73672 |
lubridate package to manipulate datessamples <- samples %>% dplyr::arrange(transect, station)samples <- samples %>% dplyr::arrange(transect, station)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 | 0.29 | 0.48 | 22.7 | 36.9 | 52.81135 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 | 0.16 | 0.90 | 22.8 | 36.9 | 54.57674 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 | 0.20 | 0.50 | 21.2 | 36.4 | 51.44640 |
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 | 0.47 | 1.07 | 19.7 | 36.3 | 61.22759 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 | 0.47 | 1.07 | 19.7 | 36.3 | 69.26705 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 | 0.20 | 0.26 | 17.3 | 35.9 | 72.03351 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 | 0.47 | 1.51 | 19.5 | 36.1 | 90.86771 |
samples <- samples %>% dplyr::mutate(station = as.numeric(station)) %>% dplyr::arrange(transect, station) samples <- samples %>% dplyr::mutate(station = as.numeric(station)) %>% dplyr::arrange(transect, station)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 | 0.47 | 1.07 | 19.7 | 36.3 | 61.22759 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 | 0.47 | 1.07 | 19.7 | 36.3 | 69.26705 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 | 0.29 | 0.48 | 22.7 | 36.9 | 52.81135 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 | 0.16 | 0.90 | 22.8 | 36.9 | 54.57674 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 | 0.20 | 0.50 | 21.2 | 36.4 | 51.44640 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 | 0.20 | 0.26 | 17.3 | 35.9 | 72.03351 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 | 0.47 | 1.51 | 19.5 | 36.1 | 90.86771 |
stations_count <- samples %>% dplyr::count(transect) stations_count <- samples %>% dplyr::count(transect)
| transect | n |
|---|---|
| 0 | 5 |
| 1 | 5 |
| 2 | 18 |
samples_mean <- samples %>% dplyr::group_by(transect, station) %>% dplyr::summarise(n_samples = n(), mean_pico_percent = mean(pico_pct, na.rm=TRUE)) samples_mean <- samples %>% dplyr::group_by(transect, station) %>% dplyr::summarise(n_samples = n(), mean_pico_percent = mean(pico_pct, na.rm=TRUE))
| transect | station | n_samples | mean_pico_percent |
|---|---|---|---|
| 0 | 6 | 2 | 65.24732 |
| 0 | 19 | 1 | 52.81135 |
| 0 | 21 | 1 | 54.57674 |
| 0 | 26 | 1 | 51.44640 |
| 1 | 81 | 2 | 72.35822 |
| 1 | 85 | 1 | 90.99124 |
| 1 | 86 | 1 | 86.34206 |
| 1 | 87 | 1 | 90.86771 |
| 2 | 96 | 3 | 87.06651 |
| 2 | 98 | 3 | 75.01403 |
samples_surf <- samples %>% dplyr::filter(level == "Surf")samples_surf <- samples %>% dplyr::filter(level == "Surf")
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 | 0.29 | 0.48 | 22.7 | 36.9 | 52.81135 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 | 0.16 | 0.90 | 22.8 | 36.9 | 54.57674 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 | 0.20 | 0.50 | 21.2 | 36.4 | 51.44640 |
| 120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 | 93.87755 |
| 125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 | 70.36024 |
| 140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 | 57.73672 |
| 155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 | 0.25 | 0.37 | 23.0 | 36.9 | 95.17426 |
| 165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 | 0.29 | 0.28 | 22.4 | 36.4 | 76.31027 |
| Trichod.1 | 2 | Surf | -27.80 | -47.10 | 1002 | 194 | 83.77926 | ||||||||
| Trichod.2 | 2 | Surf | -27.80 | -47.10 | 744 | 206 | 78.31579 |

Very often you have tables that contain a common field and that you need to join together.
Very often you have tables that contain a common field and that you need to join together.
A common example in oceanography. After a cruise you have many tables
Tables :
You want to know the Longitude and Latitude of a given biological sample.
Very often you have tables that contain a common field and that you need to join together.
A common example in oceanography. After a cruise you have many tables
Tables :
You want to know the Longitude and Latitude of a given biological sample.
Analysis # -> Sample # -> Bottle # -> Cast # -> Station # -> Long, Lat
In order to join 2 tables, they must have a common field. It is called the KEY.
For example it can be Bottle #, Station #
sequences <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_sequencing")
| sample | sample_number | fraction | n_sequences |
|---|---|---|---|
| X10n | 10 | Nano | 53230 |
| X10p | 10 | Pico | 47390 |
| X11n | 11 | Nano | 24007 |
| X11p | 11 | Pico | 31899 |
| X120n | 120 | Nano | 70455 |
sequences <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_sequencing")
| sample | sample_number | fraction | n_sequences |
|---|---|---|---|
| X10n | 10 | Nano | 53230 |
| X10p | 10 | Pico | 47390 |
| X11n | 11 | Nano | 24007 |
| X11p | 11 | Pico | 31899 |
| X120n | 120 | Nano | 70455 |
samples_select <- samples %>% dplyr::select(sample_number:longitude)Table sequences
| sample | sample_number | fraction | n_sequences |
|---|---|---|---|
| X10n | 10 | Nano | 53230 |
| X10p | 10 | Pico | 47390 |
| X11n | 11 | Nano | 24007 |
| X11p | 11 | Pico | 31899 |
| X120n | 120 | Nano | 70455 |
Table samples_select
| sample_number | transect | station | date | time | depth | level | latitude | longitude |
|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 |
sequences_join <- left_join(sequences, samples_select)sequences_join <- left_join(sequences, samples_select)
| sample | sample_number | fraction | n_sequences | transect | station | date | time | depth | level | latitude | longitude |
|---|---|---|---|---|---|---|---|---|---|---|---|
| X10n | 10 | Nano | 53230 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
| X10p | 10 | Pico | 47390 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
| X11n | 11 | Nano | 24007 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| X11p | 11 | Pico | 31899 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| X120n | 120 | Nano | 70455 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
| X120p | 120 | Pico | 76182 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
| X121n | 121 | Nano | 52401 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
| X121p | 121 | Pico | 71785 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
| X122n | 122 | Nano | 78740 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
| X122p | 122 | Pico | 37364 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
sequences <- sequences %>% rename(sample_code = sample_number) sequences <- sequences %>% rename(sample_code = sample_number)
| sample | sample_code | fraction | n_sequences |
|---|---|---|---|
| X10n | 10 | Nano | 53230 |
| X10p | 10 | Pico | 47390 |
| X11n | 11 | Nano | 24007 |
| X11p | 11 | Pico | 31899 |
| X120n | 120 | Nano | 70455 |
| X120p | 120 | Pico | 76182 |
| X121n | 121 | Nano | 52401 |
| X121p | 121 | Pico | 71785 |
| X122n | 122 | Nano | 78740 |
| X122p | 122 | Pico | 37364 |
sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number")) sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number"))
| sample | sample_code | fraction | n_sequences | transect | station | date | time | depth | level | latitude | longitude |
|---|---|---|---|---|---|---|---|---|---|---|---|
| X10n | 10 | Nano | 53230 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
| X10p | 10 | Pico | 47390 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
| X11n | 11 | Nano | 24007 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| X11p | 11 | Pico | 31899 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| X120n | 120 | Nano | 70455 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
| X120p | 120 | Pico | 76182 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
| X121n | 121 | Nano | 52401 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
| X121p | 121 | Pico | 71785 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
| X122n | 122 | Nano | 78740 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
| X122p | 122 | Pico | 37364 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
samples_select <- samples_select %>% filter(sample_number != "10") samples_select <- samples_select %>% filter(sample_number != "10")
| sample_number | transect | station | date | time | depth | level | latitude | longitude |
|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 |
| 120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number")) sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number"))
| sample | sample_code | fraction | n_sequences | transect | station | date | time | depth | level | latitude | longitude |
|---|---|---|---|---|---|---|---|---|---|---|---|
| X10n | 10 | Nano | 53230 | ||||||||
| X10p | 10 | Pico | 47390 | ||||||||
| X11n | 11 | Nano | 24007 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| X11p | 11 | Pico | 31899 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
| X120n | 120 | Nano | 70455 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
| X120p | 120 | Pico | 76182 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |

@allison_horst

We want to have a column with type of phytoplankton and one column with abundance
samples_select <- samples %>% dplyr::select(sample_number:nanoeuks)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
samples_long <- samples_select %>% tidyr::pivot_longer(picoeuks:nanoeuks, names_to="population", values_to="cell_ml" ) samples_long <- samples_select %>% tidyr::pivot_longer(picoeuks:nanoeuks, names_to="population", values_to="cell_ml" )
| sample_number | transect | station | date | time | depth | level | latitude | longitude | population | cell_ml |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | picoeuks | 7651 |
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | nanoeuks | 4845 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | picoeuks | 7343 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | nanoeuks | 3258 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | picoeuks | 1005 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | nanoeuks | 898 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | picoeuks | 793 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | nanoeuks | 660 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | picoeuks | 907 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | nanoeuks | 856 |

samples_wide <- samples_long %>% tidyr::pivot_wider(names_from = "population", values_from="cell_ml") samples_wide <- samples_long %>% tidyr::pivot_wider(names_from = "population", values_from="cell_ml")
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
samples_wide
# A tibble: 28 x 11 sample_number transect station date time depth level latitude <chr> <dbl> <dbl> <dttm> <chr> <dbl> <chr> <dbl> 1 1 0 6 2013-10-31 00:00:00 5:20 45 Deep -23.6 2 2 0 6 2013-10-31 00:00:00 5:20 45 Deep -23.6 3 3 0 19 2013-11-02 00:00:00 13:30 5 Surf -25.8 4 5 0 21 2013-11-02 00:00:00 0:0 5 Surf -26.2 5 7 0 26 2013-11-03 00:00:00 19:30 5 Surf -27.3 6 10 1 81 2013-11-13 00:00:00 1:0 140 Deep -27.4 7 9 1 81 2013-11-13 00:00:00 1:0 140 Deep -27.4 8 11 1 85 2013-11-13 00:00:00 13:30 110 Deep -26.8 9 13 1 86 2013-11-13 00:00:00 17:0 105 Deep -26.310 15 1 87 2013-11-13 00:00:00 19:30 105 Deep -26.2# ... with 18 more rows, and 3 more variables: longitude <dbl>, picoeuks <dbl>,# nanoeuks <dbl>library(kableExtra)kbl(samples_wide) %>% kable_styling(font_size = 9)
| sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
| 120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
| 121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
| 122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
| 125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
| 126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
| 127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
| 140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
| 141 | 2 | 101 | 2013-11-18 | 12:0 | 60 | Deep | -27.79 | -46.96 | 1046 | 485 |
| 142 | 2 | 101 | 2013-11-18 | 12:0 | 110 | Deep | -27.79 | -46.96 | 641 | 159 |
| 155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 |
| 156 | 2 | 106 | 2013-11-19 | 2:30 | 60 | Deep | -28.12 | -46.17 | 1800 | 300 |
| 157 | 2 | 106 | 2013-11-19 | 2:30 | 100 | Deep | -28.12 | -46.17 | 6910 | 1152 |
| 165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 |
| 166 | 2 | 114 | 2013-11-19 | 21:40 | 60 | Deep | -28.65 | -44.99 | 660 | 578 |
| 167 | 2 | 114 | 2013-11-19 | 21:40 | 80 | Deep | -28.65 | -44.99 | 722 | 390 |
| Trichod.1 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 1002 | 194 |
| Trichod.2 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 744 | 206 |
| Trichod.3 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 600 | 218 |
kbl(samples_wide, col.names = c("sample", "transect", "station", "date", "time", "depth", "level", "lat", "long", "pico", "nano")) %>% kable_styling(font_size = 9)
| sample | transect | station | date | time | depth | level | lat | long | pico | nano |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
| 120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
| 121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
| 122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
| 125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
| 126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
| 127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
| 140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
| 141 | 2 | 101 | 2013-11-18 | 12:0 | 60 | Deep | -27.79 | -46.96 | 1046 | 485 |
| 142 | 2 | 101 | 2013-11-18 | 12:0 | 110 | Deep | -27.79 | -46.96 | 641 | 159 |
| 155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 |
| 156 | 2 | 106 | 2013-11-19 | 2:30 | 60 | Deep | -28.12 | -46.17 | 1800 | 300 |
| 157 | 2 | 106 | 2013-11-19 | 2:30 | 100 | Deep | -28.12 | -46.17 | 6910 | 1152 |
| 165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 |
| 166 | 2 | 114 | 2013-11-19 | 21:40 | 60 | Deep | -28.65 | -44.99 | 660 | 578 |
| 167 | 2 | 114 | 2013-11-19 | 21:40 | 80 | Deep | -28.65 | -44.99 | 722 | 390 |
| Trichod.1 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 1002 | 194 |
| Trichod.2 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 744 | 206 |
| Trichod.3 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 600 | 218 |
kbl(samples_wide, linesep = "", col.names = c("sample", "transect", "station", "date", "time", "depth", "level", "lat", "long", "pico", "nano")) %>% kable_styling(font_size = 9, bootstrap_options = c("condensed") ) %>% add_header_above(header = c(" " = 9, "cell/ml"=2)) %>% scroll_box(width = "600px", height = "300px")
cell/ml |
||||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| sample | transect | station | date | time | depth | level | lat | long | pico | nano |
| 1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
| 2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
| 3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
| 5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
| 7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
| 10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
| 9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
| 11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
| 13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
| 15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
| 120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
| 121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
| 122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
| 125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
| 126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
| 127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
| 140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
| 141 | 2 | 101 | 2013-11-18 | 12:0 | 60 | Deep | -27.79 | -46.96 | 1046 | 485 |
| 142 | 2 | 101 | 2013-11-18 | 12:0 | 110 | Deep | -27.79 | -46.96 | 641 | 159 |
| 155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 |
| 156 | 2 | 106 | 2013-11-19 | 2:30 | 60 | Deep | -28.12 | -46.17 | 1800 | 300 |
| 157 | 2 | 106 | 2013-11-19 | 2:30 | 100 | Deep | -28.12 | -46.17 | 6910 | 1152 |
| 165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 |
| 166 | 2 | 114 | 2013-11-19 | 21:40 | 60 | Deep | -28.65 | -44.99 | 660 | 578 |
| 167 | 2 | 114 | 2013-11-19 | 21:40 | 80 | Deep | -28.65 | -44.99 | 722 | 390 |
| Trichod.1 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 1002 | 194 |
| Trichod.2 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 744 | 206 |
| Trichod.3 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 600 | 218 |
Import and Export data
Select and create columns
Summarize data
Joining
Long vs. Wide format
Displaying tables

Keyboard shortcuts
| ↑, ←, Pg Up, k | Go to previous slide |
| ↓, →, Pg Dn, Space, j | Go to next slide |
| Home | Go to first slide |
| End | Go to last slide |
| Number + Return | Go to specific slide |
| b / m / f | Toggle blackout / mirrored / fullscreen mode |
| c | Clone slideshow |
| p | Toggle presenter mode |
| t | Restart the presentation timer |
| ?, h | Toggle this help |
| Esc | Back to slideshow |