If-else statement

  1. load in the Orange dataset
  2. Create a new, empty variable
  3. Write a for loop to iterate over the rows of the dataset
  4. Write an if else statement that classifies trees based on their circumference: <= 65.5 – small >65.5 & <= 161.5 – medium >161.5 large
  5. Do the same with ifelse() in one step.
data("Orange")

Hmisc::describe(Orange)
## Orange 
## 
##  3  Variables      35  Observations
## --------------------------------------------------------------------------------
## Tree 
##        n  missing distinct 
##       35        0        5 
## 
## lowest : 3 1 5 2 4, highest: 3 1 5 2 4
##                               
## Value        3   1   5   2   4
## Frequency    7   7   7   7   7
## Proportion 0.2 0.2 0.2 0.2 0.2
## --------------------------------------------------------------------------------
## age 
##        n  missing distinct     Info     Mean      Gmd 
##       35        0        7     0.98    922.1      566 
## 
## lowest :  118  484  664 1004 1231, highest:  664 1004 1231 1372 1582
##                                                     
## Value        118   484   664  1004  1231  1372  1582
## Frequency      5     5     5     5     5     5     5
## Proportion 0.143 0.143 0.143 0.143 0.143 0.143 0.143
## --------------------------------------------------------------------------------
## circumference 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       35        0       30    0.999    115.9    66.97     30.0     32.4 
##      .25      .50      .75      .90      .95 
##     65.5    115.0    161.5    193.4    204.8 
## 
## lowest :  30  32  33  49  51, highest: 177 179 203 209 214
## --------------------------------------------------------------------------------
Orange$cc <- NA
#Orange$tree <- "something else"

for (i in 1:nrow(Orange)){
  print(i)
if (Orange$circumference[i] <= 65.5){
  Orange$cc[i] <- "small"
} else if (Orange$circumference[i]>65.5 & Orange$circumference[8]<=161.5){
  Orange$cc[i] <- "medium"
} else {
  Orange$cc[i] <- "large"
}
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
Orange$cc <- ifelse(Orange$circumference<=65.5, "small", 
                    ifelse(Orange$circumference<=161.5, "medium", "large"))

For, while loop

  1. for each tree (1 to 5), print the ages.
  2. for each tree, print the age, in which they were small.
  3. load esoph dataset.
  4. Using for loop(s), summarize ncontrols by agegp and alcgp, skipping the category alcgp==“120+” & agegp==“25-34”.
  5. Using a for loop simulate the flip a coin twenty times, keeping track of the individual outcomes (1 = heads, 0 = tails) in a vector that you preallocte.
  6. Use a while loop to investigate the number of terms required before the product 1234… reaches above 10 million
  7. Use a nested for loop (a for loop inside a for loop) that produces the following matrix, preallocate the matrix with NA values.
Orange[Orange$Tree==1,"age"]
## [1]  118  484  664 1004 1231 1372 1582
for (i in 1:5){
  cat("Tree: ", i, " Ages: ", paste0(Orange[Orange$Tree==i,"age"], collapse = ", "), "\n")
}
## Tree:  1  Ages:  118, 484, 664, 1004, 1231, 1372, 1582 
## Tree:  2  Ages:  118, 484, 664, 1004, 1231, 1372, 1582 
## Tree:  3  Ages:  118, 484, 664, 1004, 1231, 1372, 1582 
## Tree:  4  Ages:  118, 484, 664, 1004, 1231, 1372, 1582 
## Tree:  5  Ages:  118, 484, 664, 1004, 1231, 1372, 1582
for (i in 1:5){
  cat("Tree: ", i, " was small at age: ", paste0(Orange[Orange$Tree==i & Orange$cc=="small","age"], collapse = ", "), "\n")
}
## Tree:  1  was small at age:  118, 484 
## Tree:  2  was small at age:  118 
## Tree:  3  was small at age:  118, 484 
## Tree:  4  was small at age:  118, 484 
## Tree:  5  was small at age:  118, 484
# 0     1     2     3     4
# 1     0     1     2     3
# 2     1     0     1     2
# 3     2     1     0     1
# 4     3     2     1     0
data("esoph")
for (agegrp in levels(esoph$agegp)){
  print(sum(esoph$ncontrols[esoph$agegp==agegrp]))
}
## [1] 115
## [1] 190
## [1] 167
## [1] 166
## [1] 106
## [1] 31
for (agegrp in levels(esoph$agegp)){
  for (alcgrp in levels(esoph$alcgp)){
   #print(agegrp)
    #print(alcgrp)
  cat("Age group: ", agegrp, ", alcohol consumption group: ", alcgrp, " number of controls: ", sum(esoph$ncontrols[esoph$alcgp==alcgrp & esoph$agegp==agegrp]), "\n")
  }
}
## Age group:  25-34 , alcohol consumption group:  0-39g/day  number of controls:  61 
## Age group:  25-34 , alcohol consumption group:  40-79  number of controls:  45 
## Age group:  25-34 , alcohol consumption group:  80-119  number of controls:  5 
## Age group:  25-34 , alcohol consumption group:  120+  number of controls:  4 
## Age group:  35-44 , alcohol consumption group:  0-39g/day  number of controls:  88 
## Age group:  35-44 , alcohol consumption group:  40-79  number of controls:  76 
## Age group:  35-44 , alcohol consumption group:  80-119  number of controls:  20 
## Age group:  35-44 , alcohol consumption group:  120+  number of controls:  6 
## Age group:  45-54 , alcohol consumption group:  0-39g/day  number of controls:  77 
## Age group:  45-54 , alcohol consumption group:  40-79  number of controls:  61 
## Age group:  45-54 , alcohol consumption group:  80-119  number of controls:  27 
## Age group:  45-54 , alcohol consumption group:  120+  number of controls:  2 
## Age group:  55-64 , alcohol consumption group:  0-39g/day  number of controls:  77 
## Age group:  55-64 , alcohol consumption group:  40-79  number of controls:  62 
## Age group:  55-64 , alcohol consumption group:  80-119  number of controls:  19 
## Age group:  55-64 , alcohol consumption group:  120+  number of controls:  8 
## Age group:  65-74 , alcohol consumption group:  0-39g/day  number of controls:  60 
## Age group:  65-74 , alcohol consumption group:  40-79  number of controls:  28 
## Age group:  65-74 , alcohol consumption group:  80-119  number of controls:  16 
## Age group:  65-74 , alcohol consumption group:  120+  number of controls:  2 
## Age group:  75+ , alcohol consumption group:  0-39g/day  number of controls:  23 
## Age group:  75+ , alcohol consumption group:  40-79  number of controls:  8 
## Age group:  75+ , alcohol consumption group:  80-119  number of controls:  0 
## Age group:  75+ , alcohol consumption group:  120+  number of controls:  0
for (agegrp in levels(esoph$agegp)){
  for (alcgrp in levels(esoph$alcgp)){
    if (agegrp=="25-34" & alcgrp=="120+"){
      #next
    } else {
  cat("Age group: ", agegrp, ", alcohol consumption group: ", alcgrp, " number of controls: ", sum(esoph$ncontrols[esoph$alcgp==alcgrp & esoph$agegp==agegrp]), "\n")}
  }
}
## Age group:  25-34 , alcohol consumption group:  0-39g/day  number of controls:  61 
## Age group:  25-34 , alcohol consumption group:  40-79  number of controls:  45 
## Age group:  25-34 , alcohol consumption group:  80-119  number of controls:  5 
## Age group:  35-44 , alcohol consumption group:  0-39g/day  number of controls:  88 
## Age group:  35-44 , alcohol consumption group:  40-79  number of controls:  76 
## Age group:  35-44 , alcohol consumption group:  80-119  number of controls:  20 
## Age group:  35-44 , alcohol consumption group:  120+  number of controls:  6 
## Age group:  45-54 , alcohol consumption group:  0-39g/day  number of controls:  77 
## Age group:  45-54 , alcohol consumption group:  40-79  number of controls:  61 
## Age group:  45-54 , alcohol consumption group:  80-119  number of controls:  27 
## Age group:  45-54 , alcohol consumption group:  120+  number of controls:  2 
## Age group:  55-64 , alcohol consumption group:  0-39g/day  number of controls:  77 
## Age group:  55-64 , alcohol consumption group:  40-79  number of controls:  62 
## Age group:  55-64 , alcohol consumption group:  80-119  number of controls:  19 
## Age group:  55-64 , alcohol consumption group:  120+  number of controls:  8 
## Age group:  65-74 , alcohol consumption group:  0-39g/day  number of controls:  60 
## Age group:  65-74 , alcohol consumption group:  40-79  number of controls:  28 
## Age group:  65-74 , alcohol consumption group:  80-119  number of controls:  16 
## Age group:  65-74 , alcohol consumption group:  120+  number of controls:  2 
## Age group:  75+ , alcohol consumption group:  0-39g/day  number of controls:  23 
## Age group:  75+ , alcohol consumption group:  40-79  number of controls:  8 
## Age group:  75+ , alcohol consumption group:  80-119  number of controls:  0 
## Age group:  75+ , alcohol consumption group:  120+  number of controls:  0

apply, lapply, etc…

  1. define the following dataset:
dataset1 <- cbind(observationA = 16:8, observationB = c(20:19, 6:12))
  1. Calculate row and column means with apply
  2. Use apply to multiply the whole table by 3.
rowm <- apply(dataset1, 1, mean, na.rm=T)
colm <- apply(dataset1, 2, mean, na.rm=T)
#two ways

multiply <- function(x, number){
  x <- x*number
  x
}
res <- apply(dataset1,2, multiply, number=3)

res <- apply(dataset1,2, function(x) x*3)
  1. create a list with 3 list elements. Print the length of each element.
lll <- list(list(1,3,5,6,1), list(5,2,1), list(6,2,5,7,3,1,9))
lapply(lll, length)
## [[1]]
## [1] 5
## 
## [[2]]
## [1] 3
## 
## [[3]]
## [1] 7