x <- 1
y <- 2L
cat("R is non-declarative. It means we don't have to declare the type of variables before creating them. \n It also means that the class (type) of the variables will be assigned automatically!")
R is non-declarative. It means we don’t have to declare the type of variables before creating them. It also means that the class (type) of the variables will be assigned automatically!
class(x)
[1] “numeric”
class(y)
[1] “integer”
cat("Change the class of a variable - coercing \n")
Change the class of a variable - coercing
class(as.integer(x))
[1] “integer”
z <- 1.314
v <- 1.678
v2 <- 2.1
class(z)
[1] “numeric”
print(as.integer(z))
[1] 1
print(as.integer(v))
[1] 1
print(as.integer(v2))
[1] 2
a <- "a"
a1 <- 'b'
b <- "The red fox"
class(a)
## [1] "character"
class(a1)
## [1] "character"
class(b)
## [1] "character"
#"Careful!"
c <- "1"
class(c)
## [1] "character"
a <- TRUE
b <- FALSE
print(a)
## [1] TRUE
class(a)
## [1] "logical"
#BUT!
c <- "TRUE"
class(c)
## [1] "character"
print(as.integer(a))
## [1] 1
print(as.integer(b))
## [1] 0
a <- as.logical(0L)
b <- as.logical(1)
print(a)
## [1] FALSE
print(b)
## [1] TRUE
Operator | Description |
---|---|
+ | Addition |
- | Substraction |
* | Multiplication |
/ | Division |
^ | Exponent |
%% | Modulus (Remainder from division) |
%/% | Integer Division |
num1 <- 15.12
num2 <- 6.54
num3 <- 1
char1 <- "a"
char2 <- "b"
char3 <- "1"
logic1 <- TRUE
logic2 <- FALSE
num1+num2
## [1] 21.66
num1-num2
## [1] 8.58
num1*num2
## [1] 98.8848
num1/num2
## [1] 2.311927
num2^2
## [1] 42.7716
num1%%num2
## [1] 2.04
num1%/%num2
## [1] 2
try(char1+num1)
## Error in char1 + num1 : non-numeric argument to binary operator
try(char3+num1)
## Error in char3 + num1 : non-numeric argument to binary operator
try(as.numeric(char1)+num1)
## Warning in doTryCatch(return(expr), name, parentenv, handler): NAs introduced by
## coercion
## [1] NA
try(as.numeric(char3)+num1)
## [1] 16.12
try(logic1+logic2)
## [1] 1
try(num1+logic1)
## [1] 16.12
num1<num2
## [1] FALSE
num1!=num2
## [1] TRUE
num1==num2
## [1] FALSE
try(char1<char2)
## [1] TRUE
# alphabetical order
try(char1==char2)
## [1] FALSE
try(char1!=char2)
## [1] TRUE
#But!
try(char1>num2)
## [1] TRUE
try(char1<=num3)
## [1] FALSE
try(char3<=num3)
## [1] TRUE
logic1<logic2
## [1] FALSE
logic1!=logic2
## [1] TRUE
logic1==logic2
## [1] FALSE
#Logical operators
TRUE & FALSE
## [1] FALSE
TRUE | FALSE
## [1] TRUE
!TRUE
## [1] FALSE
#Depends on your computer/locale
x1 <- 0.5 - 0.3
x2 <- 0.3 - 0.1
x1 == x2
## [1] FALSE
# should use all.equal instead
all.equal(x1, x2)
## [1] TRUE
#concatenate characters
paste(char1, char2)
## [1] "a b"
paste(char1, char2, sep="_")
## [1] "a_b"
paste0(char1, char2)
## [1] "ab"
Vectors are 1 dimensional collection of data of the same type. Indexing in R is always 1 based.
## [1] "integer"
class(a)
## [1] "integer"
#It can handle only one type! It will coerce the data into the most permissive type:
b <- c(1,2,3)
a <- c(1L,"two",3L)
typeof(a)
## [1] "character"
## [1] "double"
#vector functions
length(a)
## [1] 3
a2 <- c(a,a) #combine vectors
a2
## [1] 1 2 3 1 2 3
#compare vectors
a==b
## [1] TRUE TRUE TRUE
a==a
## [1] TRUE TRUE TRUE
#but!
a==a2
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
## [1] TRUE TRUE FALSE
a && b
## [1] TRUE
a | b
## [1] TRUE TRUE TRUE
a || b
## [1] TRUE
## [1] 1
b[c(1,2)]
## [1] 1 2
b[1:2]
## [1] 1 2
a[a>2]
## [1] 3
b[b>2]
## [1] 4 NA
b[!is.na(b)]
## [1] 1 2 4
## [1] 1 2 3
## one two three
## 1 2 3
a["one"]
## one
## 1
paste(a, collapse = ",")
## [1] "1,2,3"
#other useful functions
sum(a)
## [1] 6
mean(a)
## [1] 2
median(a)
## [1] 2
sd(a)
## [1] 1
mean(b)
## [1] NA
mean(b, na.rm = T)
## [1] 2.333333
Factors in R are quite unique type. It is very useful in some cases, but in my experience, it can be source of errors if not handled properly. It is used for categorical variables. It is internally represented as integers, but has labels. The set of labels are fixed.
## [1] a b c a
## Levels: a b c d
class(a)
## [1] "factor"
typeof(a)
## [1] "integer"
a[2] <- "d"
#but!
a[2] <- "e"
## Warning in `[<-.factor`(`*tmp*`, 2, value = "e"): invalid factor level, NA
## generated
a <- factor(c("1", "2", "3"), levels = c("1", "2", "3", "4"))
b <- factor(c("1", "4", "3"), levels = c("1", "4", "3", "2"))
as.numeric(b)==as.numeric(a)
## [1] TRUE TRUE TRUE
a==b
## [1] TRUE FALSE TRUE
sort(a)
## [1] 1 2 3
## Levels: 1 2 3 4
sort(b)
## [1] 1 4 3
## Levels: 1 4 3 2
as.numeric(as.character(b))==as.numeric(as.character(a))
## [1] TRUE FALSE TRUE
## [1] 365
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "Date"
current_date <- Sys.time()
current_date
## [1] "2021-05-07 09:23:28 UTC"
format(current_date, "%d %b %Y")
## [1] "07 May 2021"
## [1] "06 May 2021"
Similar to vectors, but can take lists as elements. The elements can have different types.
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
## [[1]]
## [1] 1 2 3 4
##
## [[2]]
## [1] "two"
##
## [[3]]
## [1] "three"
## [[1]]
## [[1]][[1]]
## [1] 1 2
##
## [[1]][[2]]
## [1] 2
##
## [[1]][[3]]
## [1] 3
##
## [[1]][[4]]
## [1] 4
##
##
## [[2]]
## [1] "two"
##
## [[3]]
## [1] "three"
#can have names
b <- list(numbers=c(1:4),letters=c("two", "three"))
#referring to an element/subsetting
a[[1]]
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
a[[1]][[1]]
## [1] 1 2
a[[1]][[1]][1]
## [1] 1
b$letters[1]
## [1] "two"
#functions
length(a)
## [1] 3
length(a$letters)
## [1] 0
Data format to store values in a matrix (rows and columns). The elements can be primitive types. All element has to be the same type.
letter_mat <- matrix(data=c(rep("a", 3), rep("b", 3)), nrow = 3)
num_mat <- matrix(1:9, nrow=3, ncol=3)
# matrix can have row names but not column names
num_mat*num_mat #element-wise multiplication
## [,1] [,2] [,3]
## [1,] 1 16 49
## [2,] 4 25 64
## [3,] 9 36 81
num_mat %*% num_mat #matrix multiplication
## [,1] [,2] [,3]
## [1,] 30 66 102
## [2,] 36 81 126
## [3,] 42 96 150
t(num_mat) #Transpose
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
# many other matrix specific funtions and operators
#subsetting
letter_mat[1,2]
## [1] "b"
letter_mat[1,]
## [1] "a" "b"
letter_mat[1:2]
## [1] "a" "a"
letter_mat[3]
## [1] "a"
letter_mat[,1]
## [1] "a" "a" "a"
#combining
cbind(letter_mat, num_mat)
## [,1] [,2] [,3] [,4] [,5]
## [1,] "a" "b" "1" "4" "7"
## [2,] "a" "b" "2" "5" "8"
## [3,] "a" "b" "3" "6" "9"
rbind(letter_mat, letter_mat)
## [,1] [,2]
## [1,] "a" "b"
## [2,] "a" "b"
## [3,] "a" "b"
## [4,] "a" "b"
## [5,] "a" "b"
## [6,] "a" "b"
#row and column-wise operations
#matrixStats package - fast!
num_mat <- matrix(runif(100), nrow=10, ncol=10)
rowMeans(num_mat)
## [1] 0.5537849 0.5104701 0.4483572 0.4573370 0.4705051 0.5576114 0.4747559
## [8] 0.6315580 0.4261982 0.3760953
matrixStats::rowMeans2(num_mat)
## [1] 0.5537849 0.5104701 0.4483572 0.4573370 0.4705051 0.5576114 0.4747559
## [8] 0.6315580 0.4261982 0.3760953
rowSums(num_mat)
## [1] 5.537849 5.104701 4.483572 4.573370 4.705051 5.576114 4.747559 6.315580
## [9] 4.261982 3.760953
colMeans(num_mat)
## [1] 0.5879683 0.5693501 0.3580645 0.4077061 0.5715438 0.3225014 0.4858095
## [8] 0.4974807 0.6395245 0.4667241
colSums(num_mat)
## [1] 5.879683 5.693501 3.580645 4.077061 5.715438 3.225014 4.858095 4.974807
## [9] 6.395245 4.667241
2D object type. Rows and columns with names. The columns can have different element types.
df <- data.frame(value=c(1,2,3), "First name"=as.factor(c("John", "John", "Peter")), last_name=as.factor(c("Smith", "Black", "Rabbit")), row.names = c("Patient 1", "Patient 2", "Patient 3"))
df[1:2,]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black
df[c(TRUE, TRUE, FALSE),]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black
df[c("Patient 1", "Patient 2"),]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black
df[-3,]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black