duplicated()
function is a general function that determines which elements are duplicated, it returns a logical vector:
The parameters of the function are:
x
: vector, array or data framefromLast
:logical indicating if duplication should be considered from the lastx = c(5:1,5:1,5)
x
## [1] 5 4 3 2 1 5 4 3 2 1 5
duplicated(x)
## [1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
duplicated(x, fromLast = TRUE)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## extract duplicated elements, those elements that duplicated(x) == TRUE, may be repeted elements:
x[duplicated(x)]
## [1] 5 4 3 2 1 5
## extract unique elements
x[!duplicated(x)]
## [1] 5 4 3 2 1
## extract unique elements starting from the righmost value (different order):
x[!duplicated(x, fromLast = TRUE)]
## [1] 4 3 2 1 5
#duplicated using a data frmae
duplicated(iris)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
duplicated(iris$Sepal.Length)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE
## [12] FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [23] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [34] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
## [45] TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
## [56] TRUE FALSE TRUE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [67] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [78] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [89] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [100] TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE
## [111] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [122] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [144] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
anyDuplicated()
function returns the position of the first element duplicated:x = c(9:1, 20, 10:6,21,10)
x
## [1] 9 8 7 6 5 4 3 2 1 20 10 9 8 7 6 21 10
duplicated(x)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] TRUE TRUE TRUE TRUE FALSE TRUE
anyDuplicated(x) #first element found to be duplicated
## [1] 12
anyDuplicated(x, fromLast = TRUE) #first element found to be duplicated
## [1] 11
duplicated(iris)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [89] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
anyDuplicated(iris) ## 143
## [1] 143
unique()
function is a generic function that extracts unique values from a vector, array or data frame.
The parameters of the function are:
x
: vector, array or data frame to remove duplicated valuesfromLast
:logical indicating if duplication should be considered from the last#`unique()` function using vectors:
x = c(10 + 0:5, 1:5, 8:1)
x
## [1] 10 11 12 13 14 15 1 2 3 4 5 8 7 6 5 4 3 2 1
u1 = unique(x)
u1
## [1] 10 11 12 13 14 15 1 2 3 4 5 8 7 6
u2 = unique(x, fromLast = TRUE) # different order
u2
## [1] 10 11 12 13 14 15 8 7 6 5 4 3 2 1
y = c(5:1,8:1, 10, 1:3)
y
## [1] 5 4 3 2 1 8 7 6 5 4 3 2 1 10 1 2 3
u3 = unique(y)
u3
## [1] 5 4 3 2 1 8 7 6 10
u4 = unique(y, fromLast = TRUE) # different order
u4
## [1] 8 7 6 5 4 10 1 2 3
#`unique()` function with data frames:
dim(ChickWeight)
## [1] 578 4
head(ChickWeight)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
nrow(unique(ChickWeight))
## [1] 578
unique(ChickWeight$Diet)
## [1] 1 2 3 4
## Levels: 1 2 3 4
length(unique(ChickWeight$weight))
## [1] 212