Initialization

On your computer, create a folder/directory named RLabIntro. If one exists, you should probably delete its contents (or save the files elsewhere). Initialize R with the following commands:

# Session -> Clear Workspace
# Session -> Set Working Directory -> Choose Directory [i.e., the RlabIntro folder]
options(digits=4,width=80)
options(contrasts=c("contr.sum","contr.poly") )  # this is important for ANOVA!

RStudio Windows

RStudio contains windows that for viewing and editing scripts, entering commands, plotting figures, and inspecting variables.

Read & Write Data Files

# load data files from web:
load(file=url("http://pnb.mcmaster.ca/bennett/psy710/datasets/RLabIntro.rda"))
load(file=url("http://pnb.mcmaster.ca/bennett/psy710/labs/L1/corDataSet.rda"))
load(file=url("http://pnb.mcmaster.ca/bennett/psy710/labs/L1/iq2.rda"))
load(file=url("http://pnb.mcmaster.ca/bennett/psy710/labs/L1/aov_dat.rda"))

In the console (bottom left), type the following commands and inspect the results. After saving corDataSet to your hard disk, use a spreadsheet application (e.g., Excel) to open the file and inspect the data.

getwd() # list current directory
dir() # list contents of current directory
ls() # list variables in R's memory
# save data to disk as a CSV file:
write.csv(corDataSet,row.names=FALSE,file="corData.csv")
dir() # check to see it is there
save(t.wide.df,file="myNewDataFile.rda") # save data file to disk
write.csv(t.long.df,row.names=FALSE,file="myCSVfile.csv")
dir() # is it there? Open it with excel
myData <- read.csv(file="corData.csv") # read the file

Next, save several variables in a single file using save, delete them using rm, and then re-load them using load.

save(aov.dat,myData,corDataSet,file="allData.rda")
dir()
rm(aov.dat,corDataSet,myData) # delete these items from memory [make sure they are gone!]
load(file="allData.rda") # load them back into R

Help

You can get help on most (all?) features in R by typing ? or help in the command window. The documentation appears in RStudio’s lower-right window.

?mean # get help on mean function
?sd # get help on sd function
?help # get help on help function
help("var")
help(package="stats") # get help on the stats package

Create an R Script

Create a new script file with New Script command, which can be found on the File menu.

Creating a new script.

Type the following commands into the script file (one command per line).

# Name: YOUR_NAME_HERE
# Date: 08-Sep-2022
getwd() 
dir() 
ls() 

Save the script file in your working directory. Finally, execute the commands by 1) selecting the commands with the cursor/mouse; and then 2) entering command-return on the keyboard. The commands and results should appear in the console below the script window.

R is a Calculator

help(package="base") 
24 + 10 # addition
17 - 8 # subtraction
4*5 # multiplication
32/6 # division
2^3 # exponentiation
# logicals:
5 < 10
6 > 7
15.5==15.5
sqrt(17)
log10(1200) # log base 10
log(1200) # natural log
(e <- exp(1) ) # e
( y <- log(6.7) ) # compute, display result, and save in y
exp(y)
e^y

Creating Variables

The results of almost any operation or calculation can be stored in variables.

abc <- c("a","b","c")
class(abc) # TYPE of variable
length(abc) # length of variable
( myLetters <- letters[1:10] ) #get 1st 10 letters (see ?LETTERS)
class(myLetters)
length(myLetters)
myLetters[3:6]
myVar <- c(1,2,4,5) # try myVar <- seq(1,5) and myVar <- 1:5
class(myVar)
( myVar2 <- c(myVar,myVar/2,myVar*3) )
myVar3 <- seq(0,72,3) # sequence of numbers
length(myVar3)
myVar3[1:6] # first 6 elements of myVar3
myVar3[c(1,8,14)] # 1st, 8th, and 14th elements
range(myVar3) # print the min,max
mean(myVar3) # print the mean
repVar <- rep(c(1,2,3),each=3,times=2) # repeated sequences
(isLetterC <- myLetters=="c" ) # vector of LOGICAL values

# store everyting in a single "list":
varList <- list(v1=abc,v2=myLetters,v3=myVar,v4=myVar2,v5=myVar3,v6=repVar)
class(varList)
varList[["v2"]] # note double brackets!

Data frames

A data frame is a very important class of variable that consists of multiple, equal-length variables that are stored in different columns.

var1 <- rnorm(n=10,mean=100,sd=10) # normal random variables
var2 <- rep(c(1,2),each=5) # can you predict contents of var2
var3 <- factor(x=var2,labels="group") # very important type of variable!!
df0 <- data.frame(groupNumber=var2,group=var3,vNorm=var1)
head(df0) # first few rows
dim(df0) # dimensions [rows,columns]
summary(df0) # summary of each variable
sapply(df0,class) # the class of each variable (see ?sapply)
# data.frame(myVar3,myLetters) # doesn't work... why not?
# use $ to access individual variables INSIDE a dataframe:
median(df0$vNorm)
sort(df0$vNorm)
levels(df0$group) # names of levels in the factor, group
mean(df0$groupNumber)
# mean(df0$group) # does this work?

Using plot

R has many ways of graphing data. Here we will use the plot command to make a scatter plot.

set.seed(9810) # set seed for default random number generator [?set.seed]
N <- 100
x0 <- rnorm(n=N,mean=0,sd=1)
x1 <- (1/sqrt(2))*(x0 + rnorm(n=N,mean=0,sd=1))
# compute mean, range, and sd of x0 and x1
cor(x0,x1) # correlation
cor.test(x0,x1) # significance test [see ?cor.test]
op <- par() # get copy of graphical parameters
par(cex=2) # set size of various plot elements
plot(x=x0,
     y=x1,
     type="p", # type of plot is "points"
     xlab="x0", # label for x axis
     ylab="x1", # label for y axis
     xlim=c(-4,4), # range of x and y axes
     ylim=c(-4,4))

abline(a=0,b=1,lty=1,lwd=1) # add identity line x0=x1 [see ?abline]

Task:

  • Complete the following tasks with commands added to a new script.
  • Create two new variables: score1 = 100 + (2 times x0) and score1 = 100 + (2 times x1)
  • Put score1 and score2 into a data frame named score.df.
  • Use ?plot.default and ?points to read various options for the plot function.
  • Plot score1 and score2 using red triangles for the points.
  • Make sure the x and y axes have equal scales/ranges.
  • Add a dotted identity line to the plot.
  • Save and execute your script.