Home page
Question 1
##################################################
# FUNCTION: count_zeroes
# counts all the zeroes in a numeric vector
# input: vect -> a numeric vector
# output: counter -> the number of zeroes in vect (int)
#-------------------------------------------------
count_zeroes <- function(vect=sample(c(-10:10),size=100,replace=T)){
counter <- 0
for(i in 1:length(vect)){
if(vect[i]==0){
counter <- counter+1
}
}
return(counter)
}
# Common vector to make sure both functions are working
vect <- sample(c(-10:10),size=100,replace=T)
count_zeroes(vect)
## [1] 3
Question 2
##################################################
# FUNCTION: count_zeroesSingle
# counts all the zeroes in a numeric vector in a single line of code
# input: vect -> a numeric vector
# output: the number of zeroes in vect (int)
#-------------------------------------------------
count_zeroesSingle <- function(vect=sample(c(-10:10),size=100,replace=T)){
return(length(subset(vect,vect==0)))
}
count_zeroesSingle(vect) # returns the same as previous function -> it works!
## [1] 3
Question 3
##################################################
# FUNCTION: product_matrix
# creates a matrix where each element is the product of its respective row # and col #
# input: nrows, ncols -> number of rows and cols (ints)
# output: mat -> matrix of size nrows, ncols
#-------------------------------------------------
product_matrix <- function(nrows=runif(1,min=1,max=10),ncols=runif(1,min=1,max=10)){
mat=matrix(data=NA, nrow=nrows,ncol=ncols)
if(nrows<=0 | ncols<=0){
return(message("Error: nrows and ncols must both be greater than zero"))
}
for(i in 1:nrow(mat)){
for(j in 1:ncol(mat)){
mat[i,j] <- i*j
}
}
return(mat)
}
product_matrix()
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 2 4 6 8
## [3,] 3 6 9 12
## [4,] 4 8 12 16
## [5,] 5 10 15 20
Question 4
Part a)
df <- data.frame(group_1=rnorm(n=20,mean=5),
group_2=rnorm(n=20,mean=10),
group_3=rnorm(n=10,mean=15))
mydf <- df %>% pivot_longer(colnames(df),names_to="group",values_to="response_values")
mydf <- data.frame(mydf)
str(mydf)
## 'data.frame': 60 obs. of 2 variables:
## $ group : chr "group_1" "group_2" "group_3" "group_1" ...
## $ response_values: num 5.68 10.13 17.39 5.69 8.61 ...
Part b)
##################################################
# FUNCTION: shuffle_data
# shuffles the response values of a given data frame and returns the means for those values by treatment group
# input: df -> the dataframe containing the treatment and value vectors
# group_col -> the name of the column containing treatment groups
# resp_col -> the name of the column containing the response values
# output: means -> a vector containing the means of the shuffled treatment groups,IN NUMERICAL ORDER
#-------------------------------------------------
shuffle_data <- function(df=data.frame(group=sample(c("group_1","group_2","group_3"),size=20,replace=T),response=runif(n=20,min=0,max=20)),group_col="group",resp_col="response"){
df[resp_col] <- sample(df[,resp_col])
names(df)[names(df)==group_col] <- "group"
names(df)[names(df)==resp_col] <- "response"
means <- df %>%
group_by(group) %>%
summarise(mean(response))
means <- means %>% pull("mean(response)")
return(means)
}
shuffle_data()
## [1] 10.505888 8.140683 15.206414
shuffle_data(mydf,"group","response_values")
## [1] 9.637386 9.222032 11.202178
Part C
mat <- matrix(ncol=4,nrow=0)
for(i in 1:100){
row <- c(i,shuffle_data())
mat <- rbind(mat,row)
}
mean_reps <- data.frame(mat)
colnames(mean_reps) <- c("rep_num","group1","group2","group3")
str(mean_reps)
## 'data.frame': 100 obs. of 4 variables:
## $ rep_num: num 1 2 3 4 5 6 7 8 9 10 ...
## $ group1 : num 10.46 13.16 6.78 11.34 12.97 ...
## $ group2 : num 7.45 10.85 10.05 8.07 9.59 ...
## $ group3 : num 8.41 9.68 8.95 8.91 10.65 ...
Part D
p2 <- mean_reps %>%
pivot_longer(cols=c('group1','group2','group3'),names_to = 'group') %>%
ggplot(mapping = aes(x=value,fill=group))+
geom_histogram(color='black',alpha=0.5,position = 'stack')+
labs(title = "Distribution of means for each group",
caption = "Note: bins are stacked",
x="Mean",y="Count",fill="Group")
p2