An R function is created by using the keyword function. The basic syntax of an R function definition is as follows.
function_name <- function(arg_1, arg_2, ...) {
Function body
}
# Define the function (in R)
print_list <- function(ls) {
for(x in ls) {
print(x)
}
}
# Use the function
y = list(2,3,4,2,10,44,4)
print_list(y)
Implement a function named 'my_mean' that takes a list of numbers as input and returns the mean of the list's elements, without using the built-in 'mean' function.
# Class solutions:
my_mean <- function(ls){
total = 0
num = 0
for(x in ls){
total = total + x
num = num + 1
}
mean_val = total/num
print(mean_val)
}
my_mean_2 <- function(ls){
total = 0
for(x in ls){
total = x + total
}
return (total/length(ls))
}
my_mean_3 <- function(ls) return (sum(unlist(ls))/length(ls))
y = list(2,3,4,2,10,44,4)
my_mean(y)
res <- my_mean_2(y)
print(res)
print(my_mean_3(y))
# Define the function
my_mean <- function(data) {
sum<- 0
for (x in data)
sum <- sum + x
sum/length(data)
}
# Use the function
ls = c(2,4,5,2,1,4,5,3)
m = my_mean(ls)
print(m)
# Define the function
my_mean <- function(data) {
s <- sum(data)
s/length(data)
# or..
# sum(data)/length(data)
}
# Use the function
d = c(2,2,1,2,3,3,1,3,3,1)
print(paste("The mean is: ",format(my_mean(d),digits= 2)))
Implement a function named 'my_var' that takes a list of numbers as input and returns the variance of the list's elements. To do so, exploit the above defined 'my_mean' function to compute the mean. Then, use the function 'my_var' to compute both the variance and the standard deviation of the data.
# Class solutions:
my_mean_3 <- function(ls) return (sum(unlist(ls))/length(ls))
my_var <- function(ls) return (sum((unlist(ls)-my_mean_3(ls))/(length(ls)-1))
# Unfold the function
my_var <- function(ls) return (
sum(
(
unlist(ls)-my_mean_3(ls)
)**2
)/(length(ls)-1))
y = list(2,3,4,2,10,44,4)
variance = my_var(y)
print(variance)
var(unlist(y)) # test
y = list(2,3,4,2,10,44,4)
mean(unlist(y))
# Define the function
my_var <- function(data) {
n <- length(data)
m <- my_mean(data)
s = 0
for (x in data)
s = s + (x - m)^2
s/(n-1)
}
# Use the function
d = c(2,2,1,2,3,3,1,3,3,1)
print(paste("The mean is: ",format(my_mean(d),digits= 2)))
print(paste("The variance is: ",format(my_var(d),digits= 2)))
print(paste("The variance is: ",format(var(d),digits= 2)))
print(paste("The standard dev. is: ",format(sqrt(my_var(d)),digits= 2)))
R does not have a built-in function to compute the mode. But we can create a proper function. To implement such a function we will exploit some useful R built-in functions to explore and filter data.
The function 'unique' returns a vector, data frame or array like x but with duplicate elements/rows removed.
d = c(2,2,1,2,3,3,1,3,3,1)
unique(d)
The tabulate function takes the integer-valued vector and counts the number of times each integer occurs in it.
d = c(2,2,1,2,3,3,1,3,3,3,3,3,3,3,1,1)
tabulate(d)
d = c(d,8,8,8)
tabulate(d)
The which function will return the position of the elements (i.e., row number/column number/array index) in a logical vector which are TRUE.
letters <- c('a','b','c','b','e','b')
which(letters == 'b')
numbers <- c(12,43,3,1,6)
which(numbers == 3)
which(numbers != 3)
which.max(numbers)
which.min(numbers)
The match Function in R , returns the position of first occurrence of elements of Vector 1 in Vector 2. If an element of vector 1 doesn’t match any element of vector 2 then it returns “NA”. Output of Match Function in R will be a vector.
print(match(5, c(1,2,9,5,3,6,7,4,5)))
v1 <- c('d','b','c','a')
v2 <- c('x','x','x','d','c')
print(match(v1,v2))
Now we have all the elements we need to define a function that takes an array and gets the mode.
# Create the 'getmode' function
getmode <- function(data) {
tab_d <- tabulate(data)
max_d <- max(tab_d)
index_d <- which.max(tab_d == max_d)
return (data[index_d])
}
d = c(2,2,2,2,2,2,2,3,3,3,1,4)
getmode(d)
# Alternative (using match and unique)
getmode <- function(data) {
uniq_d <- unique(data)
tab_d <- tabulate(match(data, uniq_d))
index_d <- which.max(tab_d)
return (uniq_d[index_d])
}
d = c(2,2,2,2,2,2,2,3,3,3,1,4)
getmode(d)
# Get the current working directory
getwd()
# Set the working directory
setwd("YOUR WANTED WORKING DIRECTORY PATH")
# We already observed how to read an existing CSV file
# '..' means 'previous directory'
auto = read.csv("../Datasets/Auto.csv")
head(auto)
# When needed, we can also create new CSV
# files with our data
my_data = data.frame(
"mpg" = auto$mpg,
"name" = auto$name)
head(my_data)
# Write the CSV file
write.csv(my_data,"mpgdata.csv")
data = read.csv("mpgdata.csv")
head(data)
Here the column X comes from the data set newper. This can be dropped using additional parameters while writing the file.
write.csv(my_data,"mpgdata.csv", row.names=FALSE)
data = read.csv("mpgdata.csv")
head(data)