# Studio 8  ----

# 1. Be sure to read the instructions file!

# 2. The test answers file gives sample output and the functions we ran to get it. You should use this to verify your
#    code is giving correct output in the format asked for.

# NOTE: download the data from https://svante.mit.edu/~pgiani/18.05/clivarStudio/data/ and put it in the same folder as
# studio8.r

require(maps)
require(ncdf4)
require(lubridate)
source("studio8_helper.r")

# Open the ERA5 SST data we just downloaded
n4era <- nc_open("sst_data_0_regridded_1940_2019.nc")
sst <- ncvar_get(n4era, "sst")
eraLats <- n4era$dim$lat$vals
eraLons <- n4era$dim$lon$vals
# Open the GPCC precip data we just downloaded
n4gpcc <- nc_open("precip.mon.total.1x1.v2020_years1940_2019.nc")
precip <- ncvar_get(n4gpcc, "precip")
gpccLats <- n4gpcc$dim$lat$vals
gpccLons <- n4gpcc$dim$lon$vals
# These two are the SST data for the Niño3.4 and precipitation for MSEA region
nino34_sst <- select_region(sst, n4era, c(-5, 5), c(190, 240))
msea_precip <- select_region(precip, n4gpcc, c(10, 25), c(90, 110))
# Get the time from the era5 file
eraTime <- as.POSIXct(
  ncvar_get(n4era, "valid_time"),
  origin = "1970-01-01",
  tz = "UTC"
)
gpccTime <- as.Date(
  ncvar_get(n4gpcc, "time"),
  origin = "1800-01-01",
  tz = "UTC"
)

###############
# Problem (1) #
###############
studio8_problem_1a <- function() {
  sst_timeavg <- apply(sst, 1:2, mean)
  nino34_sst_timeavg <- apply(nino34_sst, 1:2, mean)
  png(
    "fig1Problem1a.png",
    width = 1250,
    height = 750,
    res = 150,
    pointsize = 14
  )
  drawContourMap(
    eraLats,
    eraLons,
    sst_timeavg,
    color.palette = RdBu.color,
    main = "ERA5 Sea Surface Temperature"
  )
  dev.off()
  png(
    "fig2Problem1a.png",
    width = 1250,
    height = 750,
    res = 150,
    pointsize = 14
  )
  drawContourMap(
    eraLats,
    eraLons,
    nino34_sst_timeavg,
    main = "Nino3.4 Sea Surface Temperature",
    color.palette = RdBu.color
  )
  dev.off()
  print("---WRITE YOUR REASONING HERE---")
  print("There is a gradient showing ...")
}

studio8_problem_1b <- function(plotFlag = TRUE) {
  nino34_sst_weighted_mean <- calc_coslat_wmean(nino34_sst, eraLats)
  if (plotFlag) {
    png(
      "fig3Problem1b.png",
      width = 1250,
      height = 750,
      res = 150,
      pointsize = 14
    )
    plot(
      eraTime,
      nino34_sst_weighted_mean,
      type = "l",
      lwd = 2,
      ylab = "Temperature [K]",
      xlab = "",
      main = "Monthly SST in nino34 region"
    )
    dev.off()
  }
  nino34_sst_anomaly <- remove_monthly_clm(eraTime, nino34_sst_weighted_mean)
  nino34_index <- calc_rolling_mean(nino34_sst_anomaly, window = 5)
  if (plotFlag) {
    png(
      "fig4Problem1b.png",
      width = 1250,
      height = 750,
      res = 150,
      pointsize = 14
    )
    plot_nino34_index_timeseries(
      eraTime,
      nino34_index,
      threshold = 0.4,
      main = "Observed Nino3.4 Index"
    )
    dev.off()
  }
  return(list(
    nino34_index = nino34_index,
    nino34_sst_anomaly = nino34_sst_anomaly
  ))
}

studio8_problem_1c <- function(plotFlag = TRUE) {
  nino34_index <- studio8_problem_1b(plot = FALSE)$nino34_index
  nino34_amplitude <- abs(nino34_index)
  if (plotFlag) {
    # Calculate the mean and standard deviation of the ENSO amplitude for each month
    nino34_seasonal_amplitude_mean <- tapply(
      nino34_amplitude,
      month(eraTime),
      mean
    )
    nino34_seasonal_amplitude_stdev <- tapply(
      nino34_amplitude,
      month(eraTime),
      sd
    )
    maxValDisplay <- max(
      nino34_seasonal_amplitude_mean + nino34_seasonal_amplitude_stdev
    )
    png(
      "fig5Problem1c.png",
      width = 1000,
      height = 1000,
      res = 150,
      pointsize = 14
    )
    ylab <- "Nino34 index amplitude [K]"
    h <- barplot(
      nino34_seasonal_amplitude_mean,
      col = "gray70",
      names.arg = c("J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D"),
      ylim = c(0, maxValDisplay * 1.2),
      ylab = ylab
    )
    grid()
    box()
    arrows(
      x0 = h,
      y0 = nino34_seasonal_amplitude_mean - nino34_seasonal_amplitude_stdev,
      y1 = nino34_seasonal_amplitude_mean + nino34_seasonal_amplitude_stdev,
      angle = 90,
      code = 3,
      length = 0.1
    )
    text(
      h,
      nino34_seasonal_amplitude_mean + nino34_seasonal_amplitude_stdev + 0.05,
      labels = sprintf("%.2f", nino34_seasonal_amplitude_mean)
    )
    dev.off()
    print("---WRITE YOUR THREE MONTHS HERE---")
    print("November, December, January")
  }
}

###############
# Problem (2) #
###############
studio8_problem_2 <- function(plotFlag = TRUE) {
  precip_timeavg <- apply(precip, 1:2, mean)
  msea_precip_timeavg <- apply(msea_precip, 1:2, mean)
  if (plotFlag) {
    # Note: filled.contour() expects x and y in ascending order, but original latitudes in gpcc file are flipped;
    # rev() flips the latitude and the indexing in the matrix adjusts the data accordingly
    png(
      "fig6Problem2.png",
      width = 1250,
      height = 750,
      res = 150,
      pointsize = 14
    )
    drawContourMap(
      rev(gpccLats),
      gpccLons,
      precip_timeavg[, ncol(precip_timeavg):1],
      main = "GPCC Precipitation (mm/month)"
    )
    dev.off()
    png(
      "fig7problem2.png",
      width = 1250,
      height = 750,
      res = 150,
      pointsize = 14
    )
    drawContourMap(
      rev(gpccLats),
      gpccLons,
      msea_precip_timeavg[, ncol(msea_precip_timeavg):1],
      main = "GPCC Precipitation for MSEA (mm/month)"
    )
    dev.off()
  }
  # Get the average over the MSEA box (latitude weighted to account for unequal areas)
  msea_precip_weighted_mean <- calc_coslat_wmean(msea_precip, gpccLats)
  if (plotFlag) {
    png(
      "fig8problem2.png",
      width = 1250,
      height = 750,
      res = 150,
      pointsize = 14
    )
    plot(
      gpccTime,
      msea_precip_weighted_mean,
      col = "black",
      ylab = "Precipitation (mm/month)",
      main = "Monthly precipitation in mainland Southeast Asia",
      type = "l",
      xlab = NA
    )
    dev.off()
  }
  # Calculate anomaly
  msea_precip_anomaly <- remove_monthly_clm(gpccTime, msea_precip_weighted_mean)
  if (plotFlag) {
    png(
      "fig9problem2.png",
      width = 1250,
      height = 750,
      res = 150,
      pointsize = 14
    )
    plot(
      gpccTime,
      msea_precip_anomaly,
      col = "black",
      ylab = "Precipitation anomalies (mm/month)",
      main = "Monthly precipitation anomalies in mainland Southeast Asia",
      type = "l",
      xlab = NA
    )
    dev.off()
  }
  return(list(
    msea_precip_weighted_mean = msea_precip_weighted_mean,
    msea_precip_anomaly = msea_precip_anomaly
  ))
}

###############
# Problem (3) #
###############
studio8_problem_3 <- function() {
  # Get data from previous problems and downsample to quarterly
  nino34_sst_anomaly <- studio8_problem_1b(plot = FALSE)$nino34_sst_anomaly
  msea_precip_anomaly <- studio8_problem_2(plot = FALSE)$msea_precip_anomaly
  nino34_index_quarterly <- downsample_quarterly(eraTime, nino34_sst_anomaly)
  msea_precip_quarterly <- downsample_quarterly(gpccTime, msea_precip_anomaly)

  # For ENSO, let's focus on months November - January, when the ENSO amplitude is at its peak
  nino34_index_NDJ <- nino34_index_quarterly[
    nino34_index_quarterly$quarter == "NDJ",
    "val"
  ]
  shifts <- c(1, 0, -1)
  seasons <- c("NDJ", "FMA", "MJJ", "ASO") # corresponds to NDJ, FMA, MJJ, ASO because of label='right'
  # Initiatlize empty dataframe to save the correlations and their corresponding shifts/seasons
  dfCorr <- data.frame(matrix(ncol = 3, nrow = 0))
  for (shift in shifts) {
    for (season in seasons) {
      # Select season
      msea_precip_season <- msea_precip_quarterly[
        nino34_index_quarterly$quarter == season,
        "val"
      ]
      # Calculate correlation
      msea_shifted <- shift_vector(msea_precip_season, lag = shift)
      if (length(msea_shifted) < length(nino34_index_NDJ)) {
        # Pad the end because NDJ has one more index ( this is true for all other seasons except NDJ )
        msea_shifted <- c(msea_shifted, NA)
      }
      corrCoef <- cor(nino34_index_NDJ, msea_shifted, use = "complete.obs")
      # Append to respective arrays
      dfCorr <- rbind(dfCorr, c(season, shift, corrCoef))
    }
  }
  colnames(dfCorr) <- c("Season", "Shift", "corrCoef")
  print(dfCorr)

  # Now plot to visually see the correlations
  png(
    "fig10problem3.png",
    width = 1250,
    height = 750,
    res = 150,
    pointsize = 14
  )
  xlabels <- c(
    "ND(-2)J(-1)",
    "FMA(-1)",
    "MJJ(-1)",
    "ASO(-1)",
    "ND(-1)J(0)",
    "FMA(0)",
    "MJJ(0)",
    "ASO(0)",
    "ND(0)J(+1)",
    "FMA(+1)",
    "MJJ(+1)",
    "ASO(+1)"
  )
  par(mar = c(6.1, 4.1, 2.1, 2.1))
  titlePlot <- "Lead-Lag Correlations of NDJ Niño3.4 index with MSEA precipitation"
  plot(
    1:dim(dfCorr)[1],
    dfCorr[["corrCoef"]],
    type = "o",
    xlab = NA,
    ylab = "Correlation Coefficient",
    xaxt = "n",
    lwd = 2,
    pch = 19,
    main = titlePlot
  )
  axis(1, at = 1:dim(dfCorr)[1], label = xlabels, las = 2)
  grid()
  box()
  abline(h = 0, lty = 2, col = "gray30", lwd = 2)
  abline(v = c(1, 5), col = "gray30", lwd = 2, lty = 3)
  text(2.5, 0.2, "ENSO leading")
  text(6.5, 0.2, "ENSO lagging")
  dev.off()
}

###############
# Problem (4) #
###############
studio8_problem_4 <- function() {
  # Get data from previous problems and downsample to quarterly
  nino34_sst_anomaly <- studio8_problem_1b(plot = FALSE)$nino34_sst_anomaly
  msea_precip_anomaly <- studio8_problem_2(plot = FALSE)$msea_precip_anomaly
  nino34_index_quarterly <- downsample_quarterly(eraTime, nino34_sst_anomaly)
  msea_precip_quarterly <- downsample_quarterly(gpccTime, msea_precip_anomaly)
  # For ENSO, let's focus on months November - January, when the ENSO amplitude is at its peak
  # Correlated NDJ ENSO with FMA precip, and see how it evolves with time
  yearsNino <- nino34_index_quarterly[
    nino34_index_quarterly$quarter == "NDJ",
    "year"
  ]
  nino34_index_NDJ <- nino34_index_quarterly[
    nino34_index_quarterly$quarter == "NDJ",
    "val"
  ]
  msea_precip_FMA <- c(
    msea_precip_quarterly[msea_precip_quarterly$quarter == "FMA", "val"],
    NA
  ) # pad with NA because nino34 has one more NDJ
  # NOTE: students should try different windows and see for themselves that it does not affect the results that much
  window <- 13
  runcorr <- get_running_corr(
    nino34_index_NDJ,
    msea_precip_FMA,
    window = window,
    min_periods = 5
  )
  png(
    "fig11problem4.png",
    width = 1250,
    height = 750,
    res = 150,
    pointsize = 14
  )
  titlePlot <- "13-year running correlation between nino34 (NDJ) and msea precip (FMA)"
  plot(
    yearsNino,
    runcorr,
    type = "l",
    lwd = 2,
    ylab = "Running Correlation Coefficient",
    xlab = NA,
    main = titlePlot
  )
  grid()
  box()
  abline(h = 0, col = "gray30", lwd = 2, lty = 3)
  dev.off()

  # Select two time periods and compare the correlations
  # Define the periods
  prePeriod <- c(1940, 1979)
  postPeriod <- c(1980, 2019)
  # Filter by season and time period
  filtersPreNino <- (nino34_index_quarterly$year >= prePeriod[1]) &
    (nino34_index_quarterly$year <= prePeriod[2]) &
    (nino34_index_quarterly$quarter == "NDJ")
  filtersPostNino <- (nino34_index_quarterly$year >= postPeriod[1]) &
    (nino34_index_quarterly$year <= postPeriod[2]) &
    (nino34_index_quarterly$quarter == "NDJ")
  filtersPrePrecip <- (msea_precip_quarterly$year >= prePeriod[1]) &
    (msea_precip_quarterly$year <= prePeriod[2]) &
    (msea_precip_quarterly$quarter == "FMA")
  filtersPostPrecip <- (msea_precip_quarterly$year >= postPeriod[1]) &
    (msea_precip_quarterly$year <= postPeriod[2]) &
    (msea_precip_quarterly$quarter == "FMA")
  # Finally get the correlations
  runcorr_pre <- get_running_corr(
    nino34_index_quarterly[filtersPreNino, "val"],
    msea_precip_quarterly[filtersPrePrecip, "val"],
    window = 13
  )
  runcorr_post <- get_running_corr(
    nino34_index_quarterly[filtersPostNino, "val"],
    msea_precip_quarterly[filtersPostPrecip, "val"],
    window = 13
  )
  # Plot histograms and run t-test (are they statistically different?)
  minVal <- min(c(runcorr_pre, runcorr_post))
  maxVal <- max(c(runcorr_pre, runcorr_post))
  breaks <- seq(minVal, maxVal, length.out = 9)
  png(
    "fig12problem4.png",
    width = 1000,
    height = 1000,
    res = 150,
    pointsize = 14
  )
  hist(
    runcorr_post,
    col = rgb(1, 0, 0, alpha = 0.5),
    breaks = breaks,
    xlab = "Correlation Values",
    ylab = "Frequency",
    main = "Histogram of pre 1980 and post 1980 correlations"
  )
  hist(
    runcorr_pre,
    col = rgb(0, 0, 1, alpha = 0.5),
    xlim = c(minVal, maxVal),
    breaks = breaks,
    add = T
  )
  box()
  grid(nx = NA, ny = NULL)
  legend(
    "topright",
    legend = c("Post-1980", "Pre-1980"),
    fill = c(rgb(1, 0, 0, alpha = 0.5), rgb(0, 0, 1, alpha = 0.5))
  )
  dev.off()
  # Finally run t-test
  ttestResult <- t.test(runcorr_pre, runcorr_post)
  print(paste(
    "Two sample T-Test results: t-statistic: ",
    sprintf("%.3f", ttestResult$statistic),
    "; p-value: ",
    ttestResult$p.value,
    sep = ""
  ))
}

#####################################
# Finally execute all the functions #
#####################################
studio8_problem_1a()
outputLst1b <- studio8_problem_1b()
studio8_problem_1c()
outputLst2 <- studio8_problem_2()
studio8_problem_3()
studio8_problem_4()