Primitive types

Numeric and integer

x <- 1
y <- 2L

cat("R is non-declarative. It means we don't have to declare the type of variables before creating them. \n It also means that the class (type) of the variables will be assigned automatically!")

R is non-declarative. It means we don’t have to declare the type of variables before creating them. It also means that the class (type) of the variables will be assigned automatically!

[1] “numeric”

[1] “integer”

cat("Change the class of a variable - coercing \n")

Change the class of a variable - coercing

[1] “integer”

z <- 1.314
v <- 1.678
v2 <- 2.1
class(z)

[1] “numeric”

[1] 1

[1] 1

[1] 2

Character

a <- "a"
a1 <- 'b'

b <- "The red fox"
class(a)
## [1] "character"
class(a1)
## [1] "character"
## [1] "character"
#"Careful!"
c <- "1"
class(c)
## [1] "character"

Logical

a <- TRUE
b <- FALSE

print(a)
## [1] TRUE
## [1] "logical"
#BUT!

c <- "TRUE"

class(c)
## [1] "character"
## [1] 1
## [1] 0
a <- as.logical(0L)
b <- as.logical(1)
print(a)
## [1] FALSE
## [1] TRUE

Operators

Arithmetic operators

Operator Description
+ Addition
- Substraction
* Multiplication
/ Division
^ Exponent
%% Modulus (Remainder from division)
%/% Integer Division

Relational operators

Operator Description
< Less than
> Greater than
<= Less than or equal to
>= Greater than or equal to
== Equal to
!= Not equal to

Logical operators

Operator Description
! Logical NOT
& Element-wise logical AND
&& Logical AND

Assignment operators

Operator Description
<-, <<-, = Leftwards assignment
->, ->> Rightwards assignment

Operations with primitive types

num1 <- 15.12
num2 <- 6.54
num3 <- 1

char1 <- "a"
char2 <- "b"
char3 <- "1"

logic1 <- TRUE
logic2 <- FALSE

num1+num2
## [1] 21.66
num1-num2
## [1] 8.58
num1*num2
## [1] 98.8848
num1/num2
## [1] 2.311927
num2^2
## [1] 42.7716
num1%%num2
## [1] 2.04
num1%/%num2
## [1] 2
try(char1+num1)
## Error in char1 + num1 : non-numeric argument to binary operator
try(char3+num1)
## Error in char3 + num1 : non-numeric argument to binary operator
try(as.numeric(char1)+num1)
## Warning in doTryCatch(return(expr), name, parentenv, handler): NAs introduced by
## coercion
## [1] NA
try(as.numeric(char3)+num1)
## [1] 16.12
try(logic1+logic2)
## [1] 1
try(num1+logic1)
## [1] 16.12
num1<num2
## [1] FALSE
num1!=num2
## [1] TRUE
num1==num2
## [1] FALSE
try(char1<char2) 
## [1] TRUE
# alphabetical order 
try(char1==char2)
## [1] FALSE
try(char1!=char2)
## [1] TRUE
#But!

try(char1>num2)
## [1] TRUE
try(char1<=num3)
## [1] FALSE
try(char3<=num3)
## [1] TRUE
logic1<logic2
## [1] FALSE
logic1!=logic2
## [1] TRUE
logic1==logic2
## [1] FALSE
#Logical operators

TRUE & FALSE
## [1] FALSE
TRUE | FALSE
## [1] TRUE
!TRUE
## [1] FALSE
#Depends on your computer/locale

x1 <- 0.5 - 0.3
x2 <- 0.3 - 0.1
x1 == x2
## [1] FALSE
# should use all.equal instead
all.equal(x1, x2)
## [1] TRUE
#concatenate characters

paste(char1, char2)
## [1] "a b"
paste(char1, char2, sep="_")
## [1] "a_b"
paste0(char1, char2)
## [1] "ab"

Simple types (S3 classes)

Vectors

Vectors are 1 dimensional collection of data of the same type. Indexing in R is always 1 based.

#empty vector

a <- vector()
a <- c()

# integer vector

a <- c(1L,2L,3L)
a <- 1:3

typeof(a)
## [1] "integer"
## [1] "integer"
#It can handle only one type! It will coerce the data into the most permissive type:
b <- c(1,2,3)

a <- c(1L,"two",3L)
typeof(a)
## [1] "character"
a <- c(1L,2,3L)
typeof(a)
## [1] "double"
#vector functions

length(a)
## [1] 3
a2 <- c(a,a) #combine vectors
a2
## [1] 1 2 3 1 2 3
#compare vectors

a==b
## [1] TRUE TRUE TRUE
a==a
## [1] TRUE TRUE TRUE
#but!
a==a2
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
a <- c(TRUE, TRUE, FALSE)
b <- c(TRUE, TRUE, TRUE)

a & b
## [1]  TRUE  TRUE FALSE
a && b
## [1] TRUE
a | b
## [1] TRUE TRUE TRUE
a || b
## [1] TRUE
# subsetting vectors 
# Indexing is 1-based

b <- c(1,2,4, NA)
a <- c(1,2,3)

a[1]
## [1] 1
b[c(1,2)]
## [1] 1 2
b[1:2]
## [1] 1 2
a[a>2]
## [1] 3
b[b>2]
## [1]  4 NA
b[!is.na(b)]
## [1] 1 2 4
## [1] 1 2 3
#named vectors

names(a) <- c("one", "two", "three")

a
##   one   two three 
##     1     2     3
a["one"]
## one 
##   1
paste(a, collapse = ",")
## [1] "1,2,3"
#other useful functions

sum(a)
## [1] 6
mean(a)
## [1] 2
## [1] 2
sd(a)
## [1] 1
mean(b)
## [1] NA
mean(b, na.rm = T)
## [1] 2.333333

Factors

Factors in R are quite unique type. It is very useful in some cases, but in my experience, it can be source of errors if not handled properly. It is used for categorical variables. It is internally represented as integers, but has labels. The set of labels are fixed.

a <- factor(c("a", "b", "c", "a"), levels = c("a", "b", "c", "d"))
a
## [1] a b c a
## Levels: a b c d
## [1] "factor"
## [1] "integer"
a[2] <- "d"
#but!
a[2] <- "e"
## Warning in `[<-.factor`(`*tmp*`, 2, value = "e"): invalid factor level, NA
## generated
a <- factor(c("1", "2", "3"), levels = c("1", "2", "3", "4"))
b <- factor(c("1", "4", "3"), levels = c("1", "4", "3", "2"))

as.numeric(b)==as.numeric(a)
## [1] TRUE TRUE TRUE
a==b
## [1]  TRUE FALSE  TRUE
sort(a)
## [1] 1 2 3
## Levels: 1 2 3 4
sort(b)
## [1] 1 4 3
## Levels: 1 4 3 2
## [1]  TRUE FALSE  TRUE

Date and time

x <- as.Date("1971-01-01")
unclass(x)
## [1] 365
## [1] "double"
## $class
## [1] "Date"
current_date <- Sys.time()
current_date
## [1] "2021-05-07 09:23:28 UTC"
format(current_date, "%d %b %Y")
## [1] "07 May 2021"
current_date <- Sys.Date()

format(current_date-1, "%d %b %Y")
## [1] "06 May 2021"

Lists

Similar to vectors, but can take lists as elements. The elements can have different types.

a <- list(c(1, 2),2,3)
a
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 3
a <- list(c(1:4), "two", "three")
a
## [[1]]
## [1] 1 2 3 4
## 
## [[2]]
## [1] "two"
## 
## [[3]]
## [1] "three"
a <- list(list(c(1, 2),2,3,4), "two", "three")
a
## [[1]]
## [[1]][[1]]
## [1] 1 2
## 
## [[1]][[2]]
## [1] 2
## 
## [[1]][[3]]
## [1] 3
## 
## [[1]][[4]]
## [1] 4
## 
## 
## [[2]]
## [1] "two"
## 
## [[3]]
## [1] "three"
#can have names

b <- list(numbers=c(1:4),letters=c("two", "three"))


#referring to an element/subsetting

a[[1]]
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 3
## 
## [[4]]
## [1] 4
a[[1]][[1]]
## [1] 1 2
a[[1]][[1]][1]
## [1] 1
b$letters[1]
## [1] "two"
#functions

length(a)
## [1] 3
length(a$letters)
## [1] 0

Matrix

Data format to store values in a matrix (rows and columns). The elements can be primitive types. All element has to be the same type.

letter_mat <- matrix(data=c(rep("a", 3), rep("b", 3)), nrow = 3)

num_mat <- matrix(1:9, nrow=3, ncol=3)

# matrix can have row names but not column names

num_mat*num_mat #element-wise multiplication  
##      [,1] [,2] [,3]
## [1,]    1   16   49
## [2,]    4   25   64
## [3,]    9   36   81
num_mat %*% num_mat #matrix multiplication 
##      [,1] [,2] [,3]
## [1,]   30   66  102
## [2,]   36   81  126
## [3,]   42   96  150
t(num_mat)  #Transpose
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
# many other matrix specific funtions and operators

#subsetting

letter_mat[1,2]
## [1] "b"
letter_mat[1,]
## [1] "a" "b"
letter_mat[1:2]
## [1] "a" "a"
letter_mat[3]
## [1] "a"
letter_mat[,1]
## [1] "a" "a" "a"
#combining
cbind(letter_mat, num_mat)
##      [,1] [,2] [,3] [,4] [,5]
## [1,] "a"  "b"  "1"  "4"  "7" 
## [2,] "a"  "b"  "2"  "5"  "8" 
## [3,] "a"  "b"  "3"  "6"  "9"
rbind(letter_mat, letter_mat)
##      [,1] [,2]
## [1,] "a"  "b" 
## [2,] "a"  "b" 
## [3,] "a"  "b" 
## [4,] "a"  "b" 
## [5,] "a"  "b" 
## [6,] "a"  "b"
#row and column-wise operations
#matrixStats package - fast!


num_mat <- matrix(runif(100), nrow=10, ncol=10)

rowMeans(num_mat)   
##  [1] 0.5537849 0.5104701 0.4483572 0.4573370 0.4705051 0.5576114 0.4747559
##  [8] 0.6315580 0.4261982 0.3760953
matrixStats::rowMeans2(num_mat)
##  [1] 0.5537849 0.5104701 0.4483572 0.4573370 0.4705051 0.5576114 0.4747559
##  [8] 0.6315580 0.4261982 0.3760953
rowSums(num_mat)    
##  [1] 5.537849 5.104701 4.483572 4.573370 4.705051 5.576114 4.747559 6.315580
##  [9] 4.261982 3.760953
colMeans(num_mat)
##  [1] 0.5879683 0.5693501 0.3580645 0.4077061 0.5715438 0.3225014 0.4858095
##  [8] 0.4974807 0.6395245 0.4667241
colSums(num_mat)    
##  [1] 5.879683 5.693501 3.580645 4.077061 5.715438 3.225014 4.858095 4.974807
##  [9] 6.395245 4.667241

Data frame

2D object type. Rows and columns with names. The columns can have different element types.

df <- data.frame(value=c(1,2,3), "First name"=as.factor(c("John", "John", "Peter")), last_name=as.factor(c("Smith", "Black", "Rabbit")), row.names = c("Patient 1", "Patient 2", "Patient 3"))


df[1:2,]
##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black
df[c(TRUE, TRUE, FALSE),]
##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black
df[c("Patient 1", "Patient 2"),]
##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black
df[-3,]
##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black