Home page

Question 1

##################################################
# FUNCTION: count_zeroes
# counts all the zeroes in a numeric vector
# input: vect -> a numeric vector
# output: counter -> the number of zeroes in vect (int)
#------------------------------------------------- 
count_zeroes <- function(vect=sample(c(-10:10),size=100,replace=T)){
  counter <- 0
  for(i in 1:length(vect)){
    if(vect[i]==0){
      counter <- counter+1
    }
  }
 return(counter)
}

# Common vector to make sure both functions are working
vect <- sample(c(-10:10),size=100,replace=T)
count_zeroes(vect)
## [1] 3

Question 2

##################################################
# FUNCTION: count_zeroesSingle
# counts all the zeroes in a numeric vector in a single line of code
# input: vect -> a numeric vector
# output: the number of zeroes in vect (int)
#------------------------------------------------- 
count_zeroesSingle <- function(vect=sample(c(-10:10),size=100,replace=T)){
  return(length(subset(vect,vect==0)))
}
count_zeroesSingle(vect) # returns the same as previous function -> it works!
## [1] 3

Question 3

##################################################
# FUNCTION: product_matrix
# creates a matrix where each element is the product of its respective row # and col #
# input: nrows, ncols -> number of rows and cols (ints)
# output: mat -> matrix of size nrows, ncols
#------------------------------------------------- 
product_matrix <- function(nrows=runif(1,min=1,max=10),ncols=runif(1,min=1,max=10)){
  mat=matrix(data=NA, nrow=nrows,ncol=ncols)
  if(nrows<=0 | ncols<=0){
    return(message("Error: nrows and ncols must both be greater than zero"))
  }
  for(i in 1:nrow(mat)){
    for(j in 1:ncol(mat)){
      mat[i,j] <- i*j
      }
  }
 return(mat)
}
product_matrix()
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    2    4    6    8
## [3,]    3    6    9   12
## [4,]    4    8   12   16
## [5,]    5   10   15   20

Question 4

Part a)

df <- data.frame(group_1=rnorm(n=20,mean=5),
                 group_2=rnorm(n=20,mean=10),
                 group_3=rnorm(n=10,mean=15))
mydf <- df %>% pivot_longer(colnames(df),names_to="group",values_to="response_values")
mydf <- data.frame(mydf)
str(mydf)
## 'data.frame':    60 obs. of  2 variables:
##  $ group          : chr  "group_1" "group_2" "group_3" "group_1" ...
##  $ response_values: num  5.68 10.13 17.39 5.69 8.61 ...

Part b)

##################################################
# FUNCTION: shuffle_data
# shuffles the response values of a given data frame and returns the means for those values by treatment group
# input: df -> the dataframe containing the treatment and value vectors
#        group_col -> the name of the column containing treatment groups
#        resp_col -> the name of the column containing the response values
# output: means -> a vector containing the means of the shuffled treatment groups,IN NUMERICAL ORDER
#------------------------------------------------- 
shuffle_data <- function(df=data.frame(group=sample(c("group_1","group_2","group_3"),size=20,replace=T),response=runif(n=20,min=0,max=20)),group_col="group",resp_col="response"){
  df[resp_col] <- sample(df[,resp_col])
  names(df)[names(df)==group_col] <- "group"
  names(df)[names(df)==resp_col] <- "response"
  means <-  df %>% 
    group_by(group) %>% 
    summarise(mean(response))
 means <- means %>% pull("mean(response)")
 return(means)
}

shuffle_data()
## [1] 10.505888  8.140683 15.206414
shuffle_data(mydf,"group","response_values")
## [1]  9.637386  9.222032 11.202178

Part C

mat <- matrix(ncol=4,nrow=0)
for(i in 1:100){
  row <- c(i,shuffle_data())
  mat <- rbind(mat,row)
}
mean_reps <- data.frame(mat)
colnames(mean_reps) <- c("rep_num","group1","group2","group3")
str(mean_reps)
## 'data.frame':    100 obs. of  4 variables:
##  $ rep_num: num  1 2 3 4 5 6 7 8 9 10 ...
##  $ group1 : num  10.46 13.16 6.78 11.34 12.97 ...
##  $ group2 : num  7.45 10.85 10.05 8.07 9.59 ...
##  $ group3 : num  8.41 9.68 8.95 8.91 10.65 ...

Part D

p2 <- mean_reps %>% 
  pivot_longer(cols=c('group1','group2','group3'),names_to = 'group') %>% 
  ggplot(mapping = aes(x=value,fill=group))+
  geom_histogram(color='black',alpha=0.5,position = 'stack')+
  labs(title = "Distribution of means for each group",
       caption = "Note: bins are stacked",
       x="Mean",y="Count",fill="Group")
p2