Daniel Vaulot
2021-02-05
List
Matrix
Factors
Data frames
df <- data.frame(label = letters[1:6], id = 1:6, value = rnorm(6, mean = 0, sd = 1), flag=c(TRUE, FALSE), # recycling rule stringsAsFactors = FALSE) df
label id value flag1 a 1 1.3952912 TRUE2 b 2 0.4437818 FALSE3 c 3 -0.5267217 TRUE4 d 4 -0.8592721 FALSE5 e 5 1.1239631 TRUE6 f 6 -1.1016689 FALSE
stringsAsFactors = FALSE
dim(df) # returns the dimensions of data frame
[1] 6 4
nrow(df) # number of rows
[1] 6
ncol(df) # number of columns
[1] 4
str(df) # structure of data frame - name, type and preview of data in each column
'data.frame': 6 obs. of 4 variables: $ label: chr "a" "b" "c" "d" ... $ id : int 1 2 3 4 5 6 $ value: num 1.395 0.444 -0.527 -0.859 1.124 ... $ flag : logi TRUE FALSE TRUE FALSE TRUE FALSE
colnames(df) # columns names
[1] "label" "id" "value" "flag"
df[i,j]
notation, first index corresponds to row, second index to columndf[5, 3]
[1] 1.123963
df[i,j]
notation, first index corresponds to row, second index to columndf[5, 3]
[1] 1.123963
df[5, "value"]
[1] 1.123963
df[i,j]
notationdf[, 3]
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689
df[, "value"]
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689
$
notationdf$value
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689
$
notationdf$value
[1] 1.3952912 0.4437818 -0.5267217 -0.8592721 1.1239631 -1.1016689
$
for the column, [i]
for the rowdf$value[5]
[1] 1.123963
df[i,j]
notationdf[1, ]
label id value flag1 a 1 1.395291 TRUE
df[df$id <= 3,]
label id value flag1 a 1 1.3952912 TRUE2 b 2 0.4437818 FALSE3 c 3 -0.5267217 TRUE
Select lines for which the label is c
df[df$id <= 3,]
label id value flag1 a 1 1.3952912 TRUE2 b 2 0.4437818 FALSE3 c 3 -0.5267217 TRUE
Select lines for which the label is c
df[df$label == "c", ]
label id value flag3 c 3 -0.5267217 TRUE
This syntax is complicated - tidyverse packages make it much more easy to manipulate and remember
R-session-04.zip
R-session-04-data_wrangling.R
R for data science: (Chapter 5)
Cheat sheets
library("readxl") # Import the data from Excel filelibrary("readr") # Import the data from Excel filelibrary("dplyr") # filter and reformat data frameslibrary("tidyr") # make data tidylibrary("stringr") # manipulate stringslibrary("lubridate") # manipulate date library("ggplot2") # graphics
samples <- readr::read_tsv("data/CARBOM data.txt")
samples <- readr::read_tsv("data/CARBOM data.txt")
sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 13/11/2013 | 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
11 | 1 | 85 | 13/11/2013 | 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
120 | 2 | 96 | 18/11/2013 | 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
121 | 2 | 18/11/2013 | 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
122 | 2 | 18/11/2013 | 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
125 | 2 | 98 | 18/11/2013 | 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
126 | 2 | 18/11/2013 | 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
127 | 2 | 18/11/2013 | 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
13 | 1 | 86 | 13/11/2013 | 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
140 | 2 | 101 | 18/11/2013 | 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
samples <- readr::read_tsv("data/CARBOM data.txt")
sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 13/11/2013 | 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
11 | 1 | 85 | 13/11/2013 | 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
120 | 2 | 96 | 18/11/2013 | 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
121 | 2 | 18/11/2013 | 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
122 | 2 | 18/11/2013 | 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
125 | 2 | 98 | 18/11/2013 | 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
126 | 2 | 18/11/2013 | 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
127 | 2 | 18/11/2013 | 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
13 | 1 | 86 | 13/11/2013 | 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
140 | 2 | 101 | 18/11/2013 | 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
readr::read_csv() : read comma delimited files
readr::write_tsv() : write tab delimited files
samples <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_boat")
samples <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_boat")
sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
121 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
122 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
126 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
127 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
121 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | |
122 | 2 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
126 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | |
127 | 2 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
samples <- tidyr::fill(samples, station)
samples <- tidyr::fill(samples, station)
sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 |
122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 |
127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
readr::write_tsv(samples, "data/CARBOM data fixed.tsv")
readr::write_tsv(samples, "data/CARBOM data fixed.tsv")
openxlsx::write.xlsx : write tab delimited files
Many options: specific sheet, formatting etc...
openxlsx::write.xlsx(samples, "data/CARBOM data fixed.xlsx")
@allison_horst
colnames(samples)
[1] "sample number" "transect" "station" "date" [5] "time" "depth" "level" "latitude" [9] "longitude" "picoeuks" "nanoeuks" "phosphates" [13] "nitrates" "temperature" "salinity"
summary(samples$depth)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 5.0 5.0 50.0 56.6 100.0 140.0 3
samples_select <- dplyr::select(samples, transect, `sample number`, station, depth, latitude, longitude, picoeuks, nanoeuks)
samples_select <- dplyr::select(samples, transect, `sample number`, station, depth, latitude, longitude, picoeuks, nanoeuks)
transect | sample number | station | depth | latitude | longitude | picoeuks | nanoeuks |
---|---|---|---|---|---|---|---|
1 | 10 | 81 | 140 | -27.42 | -44.72 | 3278 | 1232 |
1 | 11 | 85 | 110 | -26.80 | -45.30 | 16312 | 1615 |
2 | 120 | 96 | 5 | -27.39 | -47.82 | 1150 | 75 |
2 | 121 | 96 | 30 | -27.39 | -47.82 | 1737 | 218 |
2 | 122 | 96 | 50 | -27.39 | -47.82 | 853 | 234 |
2 | 125 | 98 | 5 | -27.59 | -47.39 | 3086 | 1300 |
2 | 126 | 98 | 50 | -27.59 | -47.39 | 1217 | 782 |
2 | 127 | 98 | 85 | -27.59 | -47.39 | 3420 | 226 |
1 | 13 | 86 | 105 | -26.33 | -45.41 | 6366 | 1007 |
2 | 140 | 101 | 5 | -27.79 | -46.96 | 500 | 366 |
samples_select <- dplyr::select(samples, transect:nanoeuks)
samples_select <- dplyr::select(samples, transect:nanoeuks)
transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
---|---|---|---|---|---|---|---|---|---|
1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
samples_select <- dplyr::select(samples, -nitrates, -phosphates)
samples_select <- dplyr::select(samples, -nitrates, -phosphates)
sample number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 17.3 | 35.9 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 21.3 | 36.5 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 23.1 | 33.5 |
121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 22.6 | 33.7 |
122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 20.3 | 35.9 |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 23.1 | 35.7 |
126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 23.7 | 37.2 |
127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 22.9 | 37.0 |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 20.9 | 36.3 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 23.5 | 36.5 |
samples_select <- samples %>% dplyr::select(transect:nanoeuks)
samples_select <- samples %>% dplyr::select(transect:nanoeuks)
transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
---|---|---|---|---|---|---|---|---|---|
1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
samples_select <- samples %>% dplyr::select(transect:nanoeuks)
samples <- samples %>% dplyr::rename(sample_number = `sample number`)
samples <- samples %>% dplyr::rename(sample_number = `sample number`)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 |
121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 |
122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 |
126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 |
127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 |
samples <- samples %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100)
samples <- samples %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 | 93.87755 |
121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | 88.84910 |
122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | 78.47286 |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 | 70.36024 |
126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | 60.88044 |
127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | 93.80143 |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 | 57.73672 |
samples_select <- samples %>% dplyr::select(sample_number:nanoeuks, level) %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100)
samples_select <- samples %>% dplyr::select(sample_number:nanoeuks, level) %>% dplyr::mutate(pico_pct = picoeuks/(picoeuks+nanoeuks)*100)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | pico_pct |
---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 72.68293 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 90.99124 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 93.87755 |
121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 88.84910 |
122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 78.47286 |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 70.36024 |
126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 60.88044 |
127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 93.80143 |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 86.34206 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 57.73672 |
samples <- samples %>% dplyr::mutate(sample_label = str_c("TR",transect,"St",station, sep="_"))
samples <- samples %>% dplyr::mutate(sample_label = str_c("TR",transect,"St",station, sep="_"))
sample_number | transect | station | date | time | sample_label |
---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1899-12-31 01:00:00 | TR_1_St_81 |
11 | 1 | 85 | 2013-11-13 | 1899-12-31 13:30:00 | TR_1_St_85 |
120 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | TR_2_St_96 |
121 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | TR_2_St_96 |
122 | 2 | 96 | 2013-11-18 | 1899-12-31 23:50:00 | TR_2_St_96 |
125 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | TR_2_St_98 |
126 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | TR_2_St_98 |
127 | 2 | 98 | 2013-11-18 | 1899-12-31 05:00:00 | TR_2_St_98 |
13 | 1 | 86 | 2013-11-13 | 1899-12-31 17:00:00 | TR_1_St_86 |
140 | 2 | 101 | 2013-11-18 | 1899-12-31 12:00:00 | TR_2_St_101 |
samples <- samples %>% dplyr::mutate(time = str_c(lubridate::hour(time), lubridate::minute(time), sep=":"))
samples <- samples %>% dplyr::mutate(time = str_c(lubridate::hour(time), lubridate::minute(time), sep=":"))
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 | 93.87755 |
121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 | 0.43 | 0.23 | 22.6 | 33.7 | 88.84910 |
122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 | 0.56 | 0.21 | 20.3 | 35.9 | 78.47286 |
125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 | 70.36024 |
126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 | 0.25 | 0.20 | 23.7 | 37.2 | 60.88044 |
127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 | 0.25 | 0.47 | 22.9 | 37.0 | 93.80143 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 | 57.73672 |
lubridate
package to manipulate datessamples <- samples %>% dplyr::arrange(transect, station)
samples <- samples %>% dplyr::arrange(transect, station)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 | 0.29 | 0.48 | 22.7 | 36.9 | 52.81135 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 | 0.16 | 0.90 | 22.8 | 36.9 | 54.57674 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 | 0.20 | 0.50 | 21.2 | 36.4 | 51.44640 |
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 | 0.47 | 1.07 | 19.7 | 36.3 | 61.22759 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 | 0.47 | 1.07 | 19.7 | 36.3 | 69.26705 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 | 0.20 | 0.26 | 17.3 | 35.9 | 72.03351 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 | 0.47 | 1.51 | 19.5 | 36.1 | 90.86771 |
samples <- samples %>% dplyr::mutate(station = as.numeric(station)) %>% dplyr::arrange(transect, station)
samples <- samples %>% dplyr::mutate(station = as.numeric(station)) %>% dplyr::arrange(transect, station)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 | 0.47 | 1.07 | 19.7 | 36.3 | 61.22759 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 | 0.47 | 1.07 | 19.7 | 36.3 | 69.26705 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 | 0.29 | 0.48 | 22.7 | 36.9 | 52.81135 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 | 0.16 | 0.90 | 22.8 | 36.9 | 54.57674 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 | 0.20 | 0.50 | 21.2 | 36.4 | 51.44640 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 | 0.20 | 0.26 | 17.3 | 35.9 | 72.68293 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 | 0.20 | 0.26 | 17.3 | 35.9 | 72.03351 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 | 0.29 | 0.22 | 21.3 | 36.5 | 90.99124 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 | 0.34 | 0.15 | 20.9 | 36.3 | 86.34206 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 | 0.47 | 1.51 | 19.5 | 36.1 | 90.86771 |
stations_count <- samples %>% dplyr::count(transect)
stations_count <- samples %>% dplyr::count(transect)
transect | n |
---|---|
0 | 5 |
1 | 5 |
2 | 18 |
samples_mean <- samples %>% dplyr::group_by(transect, station) %>% dplyr::summarise(n_samples = n(), mean_pico_percent = mean(pico_pct, na.rm=TRUE))
samples_mean <- samples %>% dplyr::group_by(transect, station) %>% dplyr::summarise(n_samples = n(), mean_pico_percent = mean(pico_pct, na.rm=TRUE))
transect | station | n_samples | mean_pico_percent |
---|---|---|---|
0 | 6 | 2 | 65.24732 |
0 | 19 | 1 | 52.81135 |
0 | 21 | 1 | 54.57674 |
0 | 26 | 1 | 51.44640 |
1 | 81 | 2 | 72.35822 |
1 | 85 | 1 | 90.99124 |
1 | 86 | 1 | 86.34206 |
1 | 87 | 1 | 90.86771 |
2 | 96 | 3 | 87.06651 |
2 | 98 | 3 | 75.01403 |
samples_surf <- samples %>% dplyr::filter(level == "Surf")
samples_surf <- samples %>% dplyr::filter(level == "Surf")
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks | phosphates | nitrates | temperature | salinity | pico_pct |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 | 0.29 | 0.48 | 22.7 | 36.9 | 52.81135 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 | 0.16 | 0.90 | 22.8 | 36.9 | 54.57674 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 | 0.20 | 0.50 | 21.2 | 36.4 | 51.44640 |
120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 | 0.43 | 0.19 | 23.1 | 33.5 | 93.87755 |
125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 | 0.29 | 0.25 | 23.1 | 35.7 | 70.36024 |
140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 | 0.29 | 0.14 | 23.5 | 36.5 | 57.73672 |
155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 | 0.25 | 0.37 | 23.0 | 36.9 | 95.17426 |
165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 | 0.29 | 0.28 | 22.4 | 36.4 | 76.31027 |
Trichod.1 | 2 | Surf | -27.80 | -47.10 | 1002 | 194 | 83.77926 | ||||||||
Trichod.2 | 2 | Surf | -27.80 | -47.10 | 744 | 206 | 78.31579 |
Very often you have tables that contain a common field and that you need to join together.
Very often you have tables that contain a common field and that you need to join together.
A common example in oceanography. After a cruise you have many tables
Tables :
You want to know the Longitude and Latitude of a given biological sample.
Very often you have tables that contain a common field and that you need to join together.
A common example in oceanography. After a cruise you have many tables
Tables :
You want to know the Longitude and Latitude of a given biological sample.
Analysis # -> Sample # -> Bottle # -> Cast # -> Station # -> Long, Lat
In order to join 2 tables, they must have a common field. It is called the KEY.
For example it can be Bottle #, Station #
sequences <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_sequencing")
sample | sample_number | fraction | n_sequences |
---|---|---|---|
X10n | 10 | Nano | 53230 |
X10p | 10 | Pico | 47390 |
X11n | 11 | Nano | 24007 |
X11p | 11 | Pico | 31899 |
X120n | 120 | Nano | 70455 |
sequences <- readxl::read_excel("data/CARBOM data.xlsx", sheet = "Samples_sequencing")
sample | sample_number | fraction | n_sequences |
---|---|---|---|
X10n | 10 | Nano | 53230 |
X10p | 10 | Pico | 47390 |
X11n | 11 | Nano | 24007 |
X11p | 11 | Pico | 31899 |
X120n | 120 | Nano | 70455 |
samples_select <- samples %>% dplyr::select(sample_number:longitude)
Table sequences
sample | sample_number | fraction | n_sequences |
---|---|---|---|
X10n | 10 | Nano | 53230 |
X10p | 10 | Pico | 47390 |
X11n | 11 | Nano | 24007 |
X11p | 11 | Pico | 31899 |
X120n | 120 | Nano | 70455 |
Table samples_select
sample_number | transect | station | date | time | depth | level | latitude | longitude |
---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 |
sequences_join <- left_join(sequences, samples_select)
sequences_join <- left_join(sequences, samples_select)
sample | sample_number | fraction | n_sequences | transect | station | date | time | depth | level | latitude | longitude |
---|---|---|---|---|---|---|---|---|---|---|---|
X10n | 10 | Nano | 53230 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
X10p | 10 | Pico | 47390 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
X11n | 11 | Nano | 24007 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
X11p | 11 | Pico | 31899 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
X120n | 120 | Nano | 70455 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
X120p | 120 | Pico | 76182 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
X121n | 121 | Nano | 52401 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
X121p | 121 | Pico | 71785 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
X122n | 122 | Nano | 78740 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
X122p | 122 | Pico | 37364 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
sequences <- sequences %>% rename(sample_code = sample_number)
sequences <- sequences %>% rename(sample_code = sample_number)
sample | sample_code | fraction | n_sequences |
---|---|---|---|
X10n | 10 | Nano | 53230 |
X10p | 10 | Pico | 47390 |
X11n | 11 | Nano | 24007 |
X11p | 11 | Pico | 31899 |
X120n | 120 | Nano | 70455 |
X120p | 120 | Pico | 76182 |
X121n | 121 | Nano | 52401 |
X121p | 121 | Pico | 71785 |
X122n | 122 | Nano | 78740 |
X122p | 122 | Pico | 37364 |
sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number"))
sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number"))
sample | sample_code | fraction | n_sequences | transect | station | date | time | depth | level | latitude | longitude |
---|---|---|---|---|---|---|---|---|---|---|---|
X10n | 10 | Nano | 53230 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
X10p | 10 | Pico | 47390 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
X11n | 11 | Nano | 24007 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
X11p | 11 | Pico | 31899 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
X120n | 120 | Nano | 70455 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
X120p | 120 | Pico | 76182 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
X121n | 121 | Nano | 52401 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
X121p | 121 | Pico | 71785 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 |
X122n | 122 | Nano | 78740 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
X122p | 122 | Pico | 37364 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 |
samples_select <- samples_select %>% filter(sample_number != "10")
samples_select <- samples_select %>% filter(sample_number != "10")
sample_number | transect | station | date | time | depth | level | latitude | longitude |
---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 |
120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number"))
sequences_join <- left_join(sequences, samples_select, by= c("sample_code" = "sample_number"))
sample | sample_code | fraction | n_sequences | transect | station | date | time | depth | level | latitude | longitude |
---|---|---|---|---|---|---|---|---|---|---|---|
X10n | 10 | Nano | 53230 | ||||||||
X10p | 10 | Pico | 47390 | ||||||||
X11n | 11 | Nano | 24007 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
X11p | 11 | Pico | 31899 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 |
X120n | 120 | Nano | 70455 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
X120p | 120 | Pico | 76182 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 |
@allison_horst
We want to have a column with type of phytoplankton and one column with abundance
samples_select <- samples %>% dplyr::select(sample_number:nanoeuks)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
samples_long <- samples_select %>% tidyr::pivot_longer(picoeuks:nanoeuks, names_to="population", values_to="cell_ml" )
samples_long <- samples_select %>% tidyr::pivot_longer(picoeuks:nanoeuks, names_to="population", values_to="cell_ml" )
sample_number | transect | station | date | time | depth | level | latitude | longitude | population | cell_ml |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | picoeuks | 7651 |
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | nanoeuks | 4845 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | picoeuks | 7343 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | nanoeuks | 3258 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | picoeuks | 1005 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | nanoeuks | 898 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | picoeuks | 793 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | nanoeuks | 660 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | picoeuks | 907 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | nanoeuks | 856 |
samples_wide <- samples_long %>% tidyr::pivot_wider(names_from = "population", values_from="cell_ml")
samples_wide <- samples_long %>% tidyr::pivot_wider(names_from = "population", values_from="cell_ml")
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
samples_wide
# A tibble: 28 x 11 sample_number transect station date time depth level latitude <chr> <dbl> <dbl> <dttm> <chr> <dbl> <chr> <dbl> 1 1 0 6 2013-10-31 00:00:00 5:20 45 Deep -23.6 2 2 0 6 2013-10-31 00:00:00 5:20 45 Deep -23.6 3 3 0 19 2013-11-02 00:00:00 13:30 5 Surf -25.8 4 5 0 21 2013-11-02 00:00:00 0:0 5 Surf -26.2 5 7 0 26 2013-11-03 00:00:00 19:30 5 Surf -27.3 6 10 1 81 2013-11-13 00:00:00 1:0 140 Deep -27.4 7 9 1 81 2013-11-13 00:00:00 1:0 140 Deep -27.4 8 11 1 85 2013-11-13 00:00:00 13:30 110 Deep -26.8 9 13 1 86 2013-11-13 00:00:00 17:0 105 Deep -26.310 15 1 87 2013-11-13 00:00:00 19:30 105 Deep -26.2# ... with 18 more rows, and 3 more variables: longitude <dbl>, picoeuks <dbl>,# nanoeuks <dbl>
library(kableExtra)kbl(samples_wide) %>% kable_styling(font_size = 9)
sample_number | transect | station | date | time | depth | level | latitude | longitude | picoeuks | nanoeuks |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
141 | 2 | 101 | 2013-11-18 | 12:0 | 60 | Deep | -27.79 | -46.96 | 1046 | 485 |
142 | 2 | 101 | 2013-11-18 | 12:0 | 110 | Deep | -27.79 | -46.96 | 641 | 159 |
155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 |
156 | 2 | 106 | 2013-11-19 | 2:30 | 60 | Deep | -28.12 | -46.17 | 1800 | 300 |
157 | 2 | 106 | 2013-11-19 | 2:30 | 100 | Deep | -28.12 | -46.17 | 6910 | 1152 |
165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 |
166 | 2 | 114 | 2013-11-19 | 21:40 | 60 | Deep | -28.65 | -44.99 | 660 | 578 |
167 | 2 | 114 | 2013-11-19 | 21:40 | 80 | Deep | -28.65 | -44.99 | 722 | 390 |
Trichod.1 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 1002 | 194 |
Trichod.2 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 744 | 206 |
Trichod.3 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 600 | 218 |
kbl(samples_wide, col.names = c("sample", "transect", "station", "date", "time", "depth", "level", "lat", "long", "pico", "nano")) %>% kable_styling(font_size = 9)
sample | transect | station | date | time | depth | level | lat | long | pico | nano |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
141 | 2 | 101 | 2013-11-18 | 12:0 | 60 | Deep | -27.79 | -46.96 | 1046 | 485 |
142 | 2 | 101 | 2013-11-18 | 12:0 | 110 | Deep | -27.79 | -46.96 | 641 | 159 |
155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 |
156 | 2 | 106 | 2013-11-19 | 2:30 | 60 | Deep | -28.12 | -46.17 | 1800 | 300 |
157 | 2 | 106 | 2013-11-19 | 2:30 | 100 | Deep | -28.12 | -46.17 | 6910 | 1152 |
165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 |
166 | 2 | 114 | 2013-11-19 | 21:40 | 60 | Deep | -28.65 | -44.99 | 660 | 578 |
167 | 2 | 114 | 2013-11-19 | 21:40 | 80 | Deep | -28.65 | -44.99 | 722 | 390 |
Trichod.1 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 1002 | 194 |
Trichod.2 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 744 | 206 |
Trichod.3 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 600 | 218 |
kbl(samples_wide, linesep = "", col.names = c("sample", "transect", "station", "date", "time", "depth", "level", "lat", "long", "pico", "nano")) %>% kable_styling(font_size = 9, bootstrap_options = c("condensed") ) %>% add_header_above(header = c(" " = 9, "cell/ml"=2)) %>% scroll_box(width = "600px", height = "300px")
cell/ml |
||||||||||
---|---|---|---|---|---|---|---|---|---|---|
sample | transect | station | date | time | depth | level | lat | long | pico | nano |
1 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7651 | 4845 |
2 | 0 | 6 | 2013-10-31 | 5:20 | 45 | Deep | -23.58 | -41.78 | 7343 | 3258 |
3 | 0 | 19 | 2013-11-02 | 13:30 | 5 | Surf | -25.79 | -40.36 | 1005 | 898 |
5 | 0 | 21 | 2013-11-02 | 0:0 | 5 | Surf | -26.23 | -40.09 | 793 | 660 |
7 | 0 | 26 | 2013-11-03 | 19:30 | 5 | Surf | -27.31 | -39.38 | 907 | 856 |
10 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3278 | 1232 |
9 | 1 | 81 | 2013-11-13 | 1:0 | 140 | Deep | -27.42 | -44.72 | 3181 | 1235 |
11 | 1 | 85 | 2013-11-13 | 13:30 | 110 | Deep | -26.80 | -45.30 | 16312 | 1615 |
13 | 1 | 86 | 2013-11-13 | 17:0 | 105 | Deep | -26.33 | -45.41 | 6366 | 1007 |
15 | 1 | 87 | 2013-11-13 | 19:30 | 105 | Deep | -26.22 | -45.48 | 6189 | 622 |
120 | 2 | 96 | 2013-11-18 | 23:50 | 5 | Surf | -27.39 | -47.82 | 1150 | 75 |
121 | 2 | 96 | 2013-11-18 | 23:50 | 30 | Deep | -27.39 | -47.82 | 1737 | 218 |
122 | 2 | 96 | 2013-11-18 | 23:50 | 50 | Deep | -27.39 | -47.82 | 853 | 234 |
125 | 2 | 98 | 2013-11-18 | 5:0 | 5 | Surf | -27.59 | -47.39 | 3086 | 1300 |
126 | 2 | 98 | 2013-11-18 | 5:0 | 50 | Deep | -27.59 | -47.39 | 1217 | 782 |
127 | 2 | 98 | 2013-11-18 | 5:0 | 85 | Deep | -27.59 | -47.39 | 3420 | 226 |
140 | 2 | 101 | 2013-11-18 | 12:0 | 5 | Surf | -27.79 | -46.96 | 500 | 366 |
141 | 2 | 101 | 2013-11-18 | 12:0 | 60 | Deep | -27.79 | -46.96 | 1046 | 485 |
142 | 2 | 101 | 2013-11-18 | 12:0 | 110 | Deep | -27.79 | -46.96 | 641 | 159 |
155 | 2 | 106 | 2013-11-19 | 2:30 | 5 | Surf | -28.12 | -46.17 | 355 | 18 |
156 | 2 | 106 | 2013-11-19 | 2:30 | 60 | Deep | -28.12 | -46.17 | 1800 | 300 |
157 | 2 | 106 | 2013-11-19 | 2:30 | 100 | Deep | -28.12 | -46.17 | 6910 | 1152 |
165 | 2 | 114 | 2013-11-19 | 21:40 | 5 | Surf | -28.65 | -44.99 | 728 | 226 |
166 | 2 | 114 | 2013-11-19 | 21:40 | 60 | Deep | -28.65 | -44.99 | 660 | 578 |
167 | 2 | 114 | 2013-11-19 | 21:40 | 80 | Deep | -28.65 | -44.99 | 722 | 390 |
Trichod.1 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 1002 | 194 |
Trichod.2 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 744 | 206 |
Trichod.3 | 2 | NA | NA | NA | NA | Surf | -27.80 | -47.10 | 600 | 218 |
Import and Export data
Select and create columns
Summarize data
Joining
Long vs. Wide format
Displaying tables
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |