Homework

Home page

library(tidyverse)
library(boot)
library(ggplot2)

##################################################
# FUNCTION: sim_data()
# Simulates QED values for hypothetical attractive and unattractive chemicals
# input: integer specifying the # rows for the df
# output: df 
#------------------------------------------------- 
simData <- function(size=1000) {
  simAttractive <- inv.logit(rnorm(n=size,mean=.8,sd=.8))
  simUnattractive <- inv.logit(rnorm(n=size,mean=-.88,sd=1.35))
 return(data.frame(simAttractive,simUnattractive))
}

##################################################
# FUNCTION: do_t_test()
# performs an independent t-test and return the p-value
# input: x , y -> numerical vectors to test; ch -> character string identifying the alt hypothesis ('t'= tests for any difference in means, 'g'= tests if the mean of x > the mean of y, 'l'= tests if the mean of x < the mean of y)
# output: the p-value of the t-test
#------------------------------------------------- 
do_t_test <- function(x=runif(10),y=runif(10),ch='t',equal_var=F) {
  tt <- t.test(x,y,alternative=ch,var.equal=equal_var)
  return(tt$p.value)
}

##################################################
# FUNCTION: QED_dplot()
# produces a density plot of 2 QED distributions
# input: df -> dataframe containing at least 2 QED distributions as columns; xcol,ycol -> specifies the index of the 1st and 2nd distributions, respectively
# output: dplot -> the density plot object
#------------------------------------------------- 
QED_dplot<- function(df=data.frame(runif(10),runif(10)),xcol=1,ycol=2){
  d1 <- density(df[,xcol])
  d2 <- density(df[,ycol])
  if(max(d1$y)>max(d2$y)){
    plot(d1, lwd = 2, col = "red",
    main = "Simulated Data", xlab = "QED",ylab="density")
    lines(d2, col = "blue", lwd = 2)
  } else {
      plot(d2, lwd = 2, col = "red",
      main = "Simulated Data", xlab = "QED",ylab="density")
      lines(d1, col = "blue", lwd = 2)
  }
}

# Global variables
#----------------------------------------------
attractiveCol <- 1 # column with data for attractive molecules
unattractiveCol <- 2 # column with data for unattractive molecules
Ha <- 'g' # the alternative hypothesis for the t test
#----------------------------------------------

# Program body
temp1 <- simData() # construct the data frame
do_t_test(temp1[,attractiveCol],temp1[,unattractiveCol],ch=Ha) # do the t test

## [1] 3.160798e-236

QED_dplot(temp1,attractiveCol,unattractiveCol) # plot the results

##################################################
# FUNCTION: QED_histplot()
# produces an overlapping histogram of the 2 QED distributions
# input: df -> dataframe containing at least 2 QED distributions as columns; attractive, unattractive -> specifies the index of the 1st and 2nd distributions, respectively; pval
# output: histplot -> the histogram plot object
#------------------------------------------------- 
QED_histplot<- function(df=data.frame(x1=runif(1000,max=.75),x2=runif(1000,min=0.25)),atractive=1,unatrractive=2,pval=''){
  atrcol <- df[,1]
  unatrcol <- df[,2]
  df <- data.frame(Attractive=atrcol,Unattractive=unatrcol)
  df <- df %>% pivot_longer(cols=c(Attractive,Unattractive),names_to="Distribution")
  histplot <- ggplot(data=df,aes(x=value,fill=Distribution))+
    geom_histogram(color='black',position='identity',alpha=0.5,bins=50)+
    scale_fill_manual(values=c('red','blue'))+
    labs(x='QED',title="Simulated QED distributions of aesthetically-judged chemical compounds")+
    annotate(geom='text',x=0.25,y=60,label=pval,color='black')
  return(histplot)
}

# Program body
temp1 <- simData() # construct the data frame
pvalue <- paste("p =",as.character(do_t_test(temp1[,attractiveCol],temp1[,unattractiveCol],ch=Ha))) # do the t test
QED_histplot(temp1,attractiveCol,unattractiveCol,pval=pvalue) # plot the results

Homework_09

Noah Beckage

3/23/2022