Chapter 5 Extracting and Visualizing Meteorological Data
“What do you call dangerous precipitation? A rain of terror.”
For this assignment, we used custom functions to read in and look at average meteorological data scraped from a public data archive.
Data is from Snowstudies.org. Assignment by Dr. Matthew Ross and Dr. Nathan Mueller of Colorado State University.
5.1 Extract the meteorological data URLs.
# Read HTML page
<- read_html("https://snowstudies.org/archived-data/")
snowarchive
# Read link with specific pattern
<- snowarchive %>%
links html_nodes('a') %>% #look for links
grepl('forcing',.)] %>% #filter to only links with "forcing" term
.[html_attr('href') #tell it these are urls
# view links
## [1] "https://snowstudies.org/wp-content/uploads/2022/02/SBB_SASP_Forcing_Data.txt"
## [2] "https://snowstudies.org/wp-content/uploads/2022/02/SBB_SBSP_Forcing_Data.txt"
5.2 Download the meteorological data from the URL
# Grab only the name of the file by splitting out on forward slashes
<- str_split_fixed(links,'/',8)
splits
#Keep only the 8th column
<- splits[,8]
files
files
## [1] "SBB_SASP_Forcing_Data.txt" "SBB_SBSP_Forcing_Data.txt"
# Generate a file list for where the data goes
<- paste0('Data_sci_bookdown/data/snow/', files)
file_names
# For loop that downloads each - i for every instance, length function tells how many instances
for(i in 1:length(file_names)){
download.file(links[i],destfile=file_names[i])
}
# Download via map function
#map2(links, file_names, download.file)
# Map version of the for loop (downloading files)
<- file.exists(file_names)
downloaded <- !all(downloaded) # sees if files are downloaded (T/F)
evaluate if(evaluate == T){
map2(links[1:2],file_names[1:2],download.file)
else{print('data downloaded')} }
## [1] "data downloaded"
5.3 Write a custom function to read in the data and append a site column to the data
# Traditional read in
<- read.csv("Data_sci_bookdown/data/snow/SBB_SASP_Forcing_Data.csv") %>%
SASP select(1,2,3,7,10)
colnames(SASP) <- c("year","month","day","precip","temp")
<- read.csv("Data_sci_bookdown/data/snow/SBB_SBSP_Forcing_Data.csv") %>%
SBSP select(1,2,3,7,10)
colnames(SBSP) <- c("year","month","day","precip","temp")
# Combine csvs
<- rbind(SASP,SBSP)
alldata
# Read in via new function
# Grab headers from metadata pdf
library(pdftools)
## Using poppler version 20.12.1
<- pdf_text('https://snowstudies.org/wp-content/uploads/2022/02/Serially-Complete-Metadata-text08.pdf') %>%
headers ::read_lines(.) %>%
readrtrimws(.) %>%
str_split_fixed(.,'\\.',2) %>%
2] %>%
.[,1:26] %>%
.[str_trim(side = "left")
5.4 Use the map
function to read in both meteorological files
# Pull site name out of the file name and read in the .txt files
<- function(file){
read_data = str_split_fixed(file,'_',2)[,2] %>%
name gsub('_Forcing_Data.txt','',.)
<- read_fwf(file) %>%
df select(year=1, month=2, day=3, hour=4, precip=7, air_temp=10) %>% #choose and name columns
mutate(site = name) #add column
}
<- map_dfr(file_names,read_data) alldata2
## Rows: 69168 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
##
## chr (2): X12, X14
## dbl (17): X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X13, X15, X16, X17, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 69168 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
##
## chr (2): X12, X14
## dbl (17): X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X13, X15, X16, X17, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(alldata2)
## year month day hour
## Min. :2003 Min. : 1.000 Min. : 1.00 Min. : 0.00
## 1st Qu.:2005 1st Qu.: 3.000 1st Qu.: 8.00 1st Qu.: 5.75
## Median :2007 Median : 6.000 Median :16.00 Median :11.50
## Mean :2007 Mean : 6.472 Mean :15.76 Mean :11.50
## 3rd Qu.:2009 3rd Qu.: 9.000 3rd Qu.:23.00 3rd Qu.:17.25
## Max. :2011 Max. :12.000 Max. :31.00 Max. :23.00
## precip air_temp site
## Min. :0.000e+00 Min. :242.1 Length:138336
## 1st Qu.:0.000e+00 1st Qu.:265.8 Class :character
## Median :0.000e+00 Median :272.6 Mode :character
## Mean :3.838e-05 Mean :272.6
## 3rd Qu.:0.000e+00 3rd Qu.:279.7
## Max. :6.111e-03 Max. :295.8
5.5 Make a line plot of mean temp by year by site
<- alldata2 %>%
temp_yearly group_by(year, site) %>%
summarise(mean_temp = mean(`air_temp`, na.rm=T))
## `summarise()` has grouped output by 'year'. You can override using the `.groups`
## argument.
ggplot(temp_yearly,aes(x=year, y=mean_temp, color=site)) +
geom_point() + geom_line() +
xlab("Year") + ylab("Mean Temperature (Degrees Kelvin)") +
::theme_few() +
ggthemesscale_color_brewer(palette = "Set2") +
scale_x_continuous(breaks = pretty(c(2003,2012), n = 6)) +
theme(legend.position="bottom")
5.6 Write a function that makes line plots of monthly average temperature at each site for a given year. Use a for loop to make these plots for 2005 to 2010.
<- alldata2 %>%
temp_monthly group_by(year, month, site) %>%
summarize(mean_temp = mean(`air_temp`, na.rm=T))
## `summarise()` has grouped output by 'year', 'month'. You can override using the
## `.groups` argument.
par(mfrow=c(5,1))
<- function(year.no) {
plot_monthly <- temp_monthly %>%
plot filter(year == year.no) %>%
ggplot(aes(x=month, y=mean_temp, color=site)) +
geom_line() +
xlab("Month") + ylab("Mean Temperature (Degrees Kelvin)") +
::theme_few() +
ggthemesscale_color_brewer(palette = "Set2") +
scale_x_discrete(limits = c(1,2,3,4,5,6,7,8,9,10,11,12)) +
scale_y_continuous(breaks = pretty(c(255,290), n = 4)) +
theme(legend.position="bottom")
print(plot)
}
for(i in 2005:2010){
plot_monthly(i)
}
5.7 Make a plot of average daily precipitation by day of year (averaged across all available years)
<- alldata2 %>%
precip_daily mutate(date = make_date(year, month, day),
day_no = yday(date)) %>%
group_by(day_no) %>%
summarize(mean_precip = mean(`precip`*86400, na.rm=T))
ggplot(precip_daily, aes(x=day_no, y=mean_precip)) +
geom_line() +
xlab("Day of Year") + ylab("Mean Precipitation (mm/day)") +
::theme_few() +
ggthemesscale_color_brewer(palette = "Set2") +
scale_y_continuous(breaks = pretty(c(0,14), n = 7)) +
scale_x_continuous(breaks = pretty(c(1,365), n = 8))