# STEP 1: READ IN EXCEL DATA

# CREATE PATH TO EXCEL FILE
data_file_path <- file.choose()

# read in data from Excel dont use xlsx library bc it is faulty with my JAVA, use openxlsx with read.xlsx() doesnt require JAVA
library(openxlsx)

data_circ <- read.xlsx(
  data_file_path,
  sheet = 1,
  startRow = 1,
  colNames = FALSE,
  rowNames = FALSE,
  detectDates = FALSE,
  skipEmptyRows = TRUE,
  skipEmptyCols = TRUE,
  rows = NULL,
  cols = NULL,
  check.names = FALSE,
  sep.names = ".",
  namedRegion = NULL,
  na.strings = "NA",
  fillMergedCells = FALSE
)

#data_circ is a list. need to flatten a list to a vector
data_circ <- unlist(data_circ, use.names = FALSE)


# STEP 2: add X degree shift to a vector of circular values (shifting data set)
library(circular)

SHIFT_IN_DEGREES = 120 # effect size e.g. based on previous studies

original_circ_dataset <- circular(data_circ,
                                  units="degrees", template="geographics")

shift <- circular(c(SHIFT_IN_DEGREES), units="degrees", template="geographics")

expected_circ_dataset <- (original_circ_dataset + shift)



# STEP 3 - create 1000 data sets of original and 1000 data set of shifted data sets
# pairwise compare them using  WW<-watson.wheeler.test(list(x1,x2)). Extract p values into a matrix
N_BIRDS = 12 # how many animals to subsample from an original data set
P_THRESHOLD = 0.05
N_BOOTSTRAP_ITERATIONS = 1000

bootstrap_samples_original <-list()
bootstrap_samples_shifted <-list()
p_matrix <- matrix()

for (i in 1:N_BOOTSTRAP_ITERATIONS)
{bootstrap_samples_original[[i]] <- sample(original_circ_dataset, N_BIRDS, replace=TRUE);
bootstrap_samples_shifted[[i]]<- sample(expected_circ_dataset, N_BIRDS, replace=TRUE);
p_matrix[[i]] <- watson.wheeler.test(list(bootstrap_samples_original[[i]],bootstrap_samples_shifted[[i]]))$p.value}

# WARNING/ISSUE - SAME P VALUES ALL THE TIMES - > RESHUFFLE BOTH MATRICES? THIS EFFECT FOR SHIFT ANGLES >90 BUT P VALUES ARE DIFF FOR 60 degree shift


# STEP 4 -  calculate what is ratio of p < 0.05 to all p values for given n and shift angle

ratio <- length(which(p_matrix < P_THRESHOLD)) / length(p_matrix)
ratio


# NOTES AND TODO IDEAS
# 1) IF ANGULAR DIFF IN SAMPLES IS LARGE P VALUES FLATTENED TO THE SAME NUMBERS - LOOKS WEIRD AT FIRST
# 2) MORE INITIAL VALUES SO THAT WE COULD REGULATE TASK 1 - WHAT min N for subsetting from original data set
# a required power (0.8, 0.9 etc), p value threshold
# for a given angle of shift or
# or OPTION 2 what angle of shift is required for a given n of subset from original data set (min shift is expected)
# for grant applications often OPTION 1 is needed and shift value is based on maps/priors/an angle shift eg mN shift
# for BBSRC apr 2020 we can take mean result of shift to control from 2018 and 2019 seasons