Maho Takahashi

Linguistics PhD Student at UC San Diego

research

CV

code

mtakahas[at]ucsd[dot]edu

Different ways to subset rows and columns

Name <- c("Jon", "Bill", "Maria", "Ben", "Tina")
Age <- c(23, 41, 32, 58, 26)
Location_old <- c("New York", "Chicago", "New York", "Seattle", "Chicago")
Location_new <- c("Tokyo", "Tel Aviv", "Boston", "San Diego", "Toronto")

df <- data.frame(Name, Age, Location_old, Location_new)
df
##    Name Age Location_old Location_new
## 1   Jon  23     New York        Tokyo
## 2  Bill  41      Chicago     Tel Aviv
## 3 Maria  32     New York       Boston
## 4   Ben  58      Seattle    San Diego
## 5  Tina  26      Chicago      Toronto

select rows and columns using indexes

df[c(2,4), c(1,3)]
##   Name Location_old
## 2 Bill      Chicago
## 4  Ben      Seattle

select rows and columns using grep

df[grep("Chicago", df$Location_old), ]
##   Name Age Location_old Location_new
## 2 Bill  41      Chicago     Tel Aviv
## 5 Tina  26      Chicago      Toronto
df[grep("Age", names(df))]
##   Age
## 1  23
## 2  41
## 3  32
## 4  58
## 5  26

select columns using select and starts_with

Note: You might have to specify that the select function is the one from the dplyr package.

df %>% dplyr::select(Name, starts_with("Location"))
##    Name Location_old Location_new
## 1   Jon     New York        Tokyo
## 2  Bill      Chicago     Tel Aviv
## 3 Maria     New York       Boston
## 4   Ben      Seattle    San Diego
## 5  Tina      Chicago      Toronto

select rows with a condiiton

df[df$Age >= 40, ]
##   Name Age Location_old Location_new
## 2 Bill  41      Chicago     Tel Aviv
## 4  Ben  58      Seattle    San Diego