## For this project/R script, we will create a simulated data set matching the example/frogs data file we used for analysis yesterday and this morning

#As an intro to creating variables and generating random data, consider the following code
pop <- rep(letters[1:3], each=2, times=6) # we can use the rep() function to specify at set of characters, how many of each character should be used and how many times the repeated sets should be put into the vector
sex <- rep(c('m','f'), times=18) # if we leave out one of the arguments, R assumes that argument takes a value = 1

length <-  runif(36, 10, 100) # the runif() function draws a pseudo-random number from a uniform distribution - here we draw 36 numbers from a uniform distribution that goes from 10 to 100.
# I used the variable name "length" in the line above; while R copes with this just fine without getting confused, it is generally bad policy and best avoided.
mass <- length + runif(1,0,5) # we calculate a mass to go with each length by adding a unform value that ranges from 0 to 5 to the length value (this is done for each element in the vector length in turn; this vectorized method in R saves us from having to loop through each element when we want to apply a function to each element in a vector)

dat <- data.frame(pop, sex, length, mass) # now we can put our variables together into a dataframe
dat #let's look at data created

options(digits=4) # we can set the number of sig. digiits to be more reasonable by modifying our option setting "digits"
dat #this will change the default number of values printed


## Okay - given that introduction we will now 
### Recreate frogs data set used earlier 

spp <- vector("character", 5) # create blank character vector

spp <- c("bullfrogs", "woodfrogs", "grays", "spring_peepers", "toads") # create a vector of species names

subject <- vector("character") #create a blank "subject" vector

## Use nested loops to input appropriate subject.id values into the vector "subject"
for(j in 1:5) { # cycle through the 5 species
  for(i in 1:10) { #cycle through the 10 reps of each spp
    subject[i + ((j-1)*10)] <- c(paste(spp[j], i, sep=".")) # elements 1-10 are labelled with the first spp name and numbers 1-10; elements 11-20 are labelled with the 2nd species and numbers 1-10, etc.
  } # close loop i
} # close loop j
subject # look at the variable we've created
treat.types <- factor(LETTERS[1:2]) # create variable of treatment level names
treatment <- rep(treat.types, each=5, times=5) #create variable indicating treatment for each individual in dataframe
length(subject) #check that lengths of each variable is equal
length(treatment)

### Now we create a function to generate the response variable for each individual
gen.dat <- function() {
  name <- vector("numeric")
  muA <<- runif(1,2,5) #the population from trtmt A has mean value drawn from uniform disbribution ranging from 2 to 5
  sdA <<- runif(1, 0.2, 0.9) #the population from trtmt A has a sd drawn from a uniform distribution that varies from 0.2 to 0.9
  muB <<- muA + (rnorm(1,2,1)) #the population mean value for 
  sdB <<- sdA
  for(i in 1:5) {
    name[i] <- rnorm(1, muA, sdA) # random values for A treatment
    }
  for(i in 6:10) {
    name[i] <- rnorm(1, muB, sdB) # random values for B treatment
    }
  return(name)
} # end function

test <- gen.dat()
svl1 <- gen.dat()
svl2 <- gen.dat()
svl3 <- gen.dat()
svl4 <- gen.dat()
svl5 <- gen.dat()


svl <- c(svl1, svl2, svl3, svl4, svl5)
length(svl)


mass1 <- gen.dat()
mass2 <- gen.dat()
mass3 <- gen.dat()
mass4 <- gen.dat()
mass5 <- gen.dat()
mass <- c(mass1, mass2, mass3, mass4, mass5)

examp <- data.frame(subject, treatment, svl, mass)
head(examp)

subject2 <- strsplit(as.character(examp$subject),split=".",  fixed =TRUE)
examp$spp <- sapply(subject2, "[", 1)

svl.means <- aggregate(examp$svl, by=list("spp" =examp$spp, "trt"= examp$treatment), mean) # create dataframe of means per spp*trtmt combination
svl.se <- aggregate(examp$svl, by=list("spp"=examp$spp, "trt" =examp$treatment), function(x) {sd(x)/sqrt(length(x))}) #create a dataframe of SE per spp*trtmt combination
svl.summ <- merge(svl.means, svl.se, by=c("spp", "trt")) # merge means and SE dataframes by spp and treatment to create summary dataframe
names(svl.summ) <- c("spp", "trtmt", "mean", "SE") # rename variables of summary dataframe


graphics.off()
plot(svl.summ$mean ~ as.factor(svl.summ$spp))
svl.summ$spp <- as.factor(svl.summ$spp) #make spp a factor
plot(svl.summ$mean ~ as.numeric(svl.summ$spp)) #plot means as points

power.t.test(power = .90, delta = 1)