## For this project/R script, we will create a simulated data set matching the example/frogs data file we used for analysis yesterday and this morning #As an intro to creating variables and generating random data, consider the following code pop <- rep(letters[1:3], each=2, times=6) # we can use the rep() function to specify at set of characters, how many of each character should be used and how many times the repeated sets should be put into the vector sex <- rep(c('m','f'), times=18) # if we leave out one of the arguments, R assumes that argument takes a value = 1 length <- runif(36, 10, 100) # the runif() function draws a pseudo-random number from a uniform distribution - here we draw 36 numbers from a uniform distribution that goes from 10 to 100. # I used the variable name "length" in the line above; while R copes with this just fine without getting confused, it is generally bad policy and best avoided. mass <- length + runif(1,0,5) # we calculate a mass to go with each length by adding a unform value that ranges from 0 to 5 to the length value (this is done for each element in the vector length in turn; this vectorized method in R saves us from having to loop through each element when we want to apply a function to each element in a vector) dat <- data.frame(pop, sex, length, mass) # now we can put our variables together into a dataframe dat #let's look at data created options(digits=4) # we can set the number of sig. digiits to be more reasonable by modifying our option setting "digits" dat #this will change the default number of values printed ## Okay - given that introduction we will now ### Recreate frogs data set used earlier spp <- vector("character", 5) # create blank character vector spp <- c("bullfrogs", "woodfrogs", "grays", "spring_peepers", "toads") # create a vector of species names subject <- vector("character") #create a blank "subject" vector ## Use nested loops to input appropriate subject.id values into the vector "subject" for(j in 1:5) { # cycle through the 5 species for(i in 1:10) { #cycle through the 10 reps of each spp subject[i + ((j-1)*10)] <- c(paste(spp[j], i, sep=".")) # elements 1-10 are labelled with the first spp name and numbers 1-10; elements 11-20 are labelled with the 2nd species and numbers 1-10, etc. } # close loop i } # close loop j subject # look at the variable we've created treat.types <- factor(LETTERS[1:2]) # create variable of treatment level names treatment <- rep(treat.types, each=5, times=5) #create variable indicating treatment for each individual in dataframe length(subject) #check that lengths of each variable is equal length(treatment) ### Now we create a function to generate the response variable for each individual gen.dat <- function() { name <- vector("numeric") muA <<- runif(1,2,5) #the population from trtmt A has mean value drawn from uniform disbribution ranging from 2 to 5 sdA <<- runif(1, 0.2, 0.9) #the population from trtmt A has a sd drawn from a uniform distribution that varies from 0.2 to 0.9 muB <<- muA + (rnorm(1,2,1)) #the population mean value for sdB <<- sdA for(i in 1:5) { name[i] <- rnorm(1, muA, sdA) # random values for A treatment } for(i in 6:10) { name[i] <- rnorm(1, muB, sdB) # random values for B treatment } return(name) } # end function test <- gen.dat() svl1 <- gen.dat() svl2 <- gen.dat() svl3 <- gen.dat() svl4 <- gen.dat() svl5 <- gen.dat() svl <- c(svl1, svl2, svl3, svl4, svl5) length(svl) mass1 <- gen.dat() mass2 <- gen.dat() mass3 <- gen.dat() mass4 <- gen.dat() mass5 <- gen.dat() mass <- c(mass1, mass2, mass3, mass4, mass5) examp <- data.frame(subject, treatment, svl, mass) head(examp) subject2 <- strsplit(as.character(examp$subject),split=".", fixed =TRUE) examp$spp <- sapply(subject2, "[", 1) svl.means <- aggregate(examp$svl, by=list("spp" =examp$spp, "trt"= examp$treatment), mean) # create dataframe of means per spp*trtmt combination svl.se <- aggregate(examp$svl, by=list("spp"=examp$spp, "trt" =examp$treatment), function(x) {sd(x)/sqrt(length(x))}) #create a dataframe of SE per spp*trtmt combination svl.summ <- merge(svl.means, svl.se, by=c("spp", "trt")) # merge means and SE dataframes by spp and treatment to create summary dataframe names(svl.summ) <- c("spp", "trtmt", "mean", "SE") # rename variables of summary dataframe graphics.off() plot(svl.summ$mean ~ as.factor(svl.summ$spp)) svl.summ$spp <- as.factor(svl.summ$spp) #make spp a factor plot(svl.summ$mean ~ as.numeric(svl.summ$spp)) #plot means as points power.t.test(power = .90, delta = 1)