0% found this document useful (0 votes)
16 views34 pages

FDA Assignment 4 (1)

Uploaded by

addankisai24
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views34 pages

FDA Assignment 4 (1)

Uploaded by

addankisai24
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 34

FDA ASSIGNMENT 4

Reading CSV FILE


CODE:
data1=read.csv("C:\\Users\\23BCE9857\\Downloads\\mtcars.csv")
data1
head(data1)
tail(data1)
str(data1)
summary(data1)
dim(data1)
any(is.na(data1))
data1[duplicated(data1),]
mean(data1$cyl)
median(data1$vs)
var(data1$gear)
sd(data1$carb)

OUTPUT:
Reading TEXT FILE
CODE:
data2=read.table("C:\\Users\\23BCE9857\\Downloads\\student.txt",head=TR
UE,sep=" ")
data2
dim(data2)

data3=read.table("C:\\Users\\23BCE9857\\Downloads\\sample.txt",head=TRU
E,sep="\t")
data3
head(data3)
tail(data3)
mean(data3$variable)
median(data3$variable)

data4=read.delim("C:\\Users\\23BCE9857\\Downloads\\sample.txt",head=TR
UE)
data4
head(data4)
tail(data4)
mean(data4$variable)
median(data4$variable)
Reading XL FILE
CODE:
library(readxl)
data=read_excel("C:\\Users\\23BCE9857\\Downloads\\wcars.xlsx")
data
head(data)
dim(data)
mean(data$cyl)

data2=read_excel("C:\\Users\\23BCE9857\\Downloads\\wcars.xlsx",skip=2)
data2
dim(data2)
data3 <-
read_excel("C:\\Users\\23BCE9857\\Downloads\\wcars.xlsx",range="A1:C5")
data3
dim(data3)

OUTPUT:
WRITING ALL FILES
CODE:
data=data.frame(
CustomerID=c(101,201,301,401),
Age=c(20, 40, 30, 50)
)
data
write.csv(data,"simple.csv",row.names=FALSE)
data1=read.csv("C:\\Users\\23BCE9857\\Documents\\simple.csv")
data1

write.table(data, "simple.txt", sep = "\t",row.names=FALSE,col.names=TRUE)


data2=read.table("C:\\Users\\23BCE9857\\Documents\\simple.csv",head=TRU
E, sep = "\t")
data2

library(writexl)
library(readxl)
write_xlsx(data,"C:\\Users\\23BCE9857\\Documents\\Simple2.xlsx")
data3=read_excel("C:\\Users\\23BCE9857\\Documents\\Simple2.xlsx")
data3
OUTPUT:

PROBLEM
CODE:
library(readxl)
data=read_excel("C:\\Users\\23BCE9857\\Documents\\employee.xlsx")
data
mean(data$salary)
max(data$salary)
min(data$salary)
data$designation=ifelse(data$salary<60000,"junior",
ifelse(data$salary>60000 & data$salary<75000,"mid","senior"))
data
sort(data$salary,decreasing=FALSE)

OUTPUT:
DPLYR
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,48000)
)
employeedata

col1=select(employeedata,starts_with("emp"))
col1
col2=select(employeedata,ends_with("nt"))
col2
col3=select(employeedata,contains("la"))
col3
print(select(employeedata,-salary))

OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
employeedata

highsalaryemp1=filter(employeedata,salary>60000)
highsalaryemp1
highsalaryemp2=filter(employeedata,salary>60000 & department=="IT")
highsalaryemp2
filteredemp=filter(employeedata,department%in% c("HR","finance"))
filteredemp
filteremp=filter(employeedata,!is.na(salary))
filteremp

OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
employeedata=mutate(employeedata,location=c("hyd","gnt","bza","vizag","ch
ennai"))
employeedata
employeedata=mutate(employeedata,salary=salary*1.05)
employeedata

OUTPUT:

CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
employeedata=rename(employeedata,empid=employee_id)
employeedata

OUTPUT:

CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
sorteddata=arrange(employeedata,name)
sorteddata
sorteddata1=arrange(employeedata,desc(salary))
sorteddata1

OUTPUT:

CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,48000),
role=c("junior","mid","senior","mid","junior")
)

deptcounts=count(employeedata,department)
deptrolecounts=count(employeedata,department,role)
totalsalbydept=count(employeedata,department,wt=salary)
deptcounts
deptrolecounts
totalsalbydept

OUTPUT:

CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","IT"),
salary=c(55000,62000,75000,80000,48000),
role=c("junior","mid","senior","mid","junior")
)

avgsalbydept=group_by(employeedata,department)%>%summarize(mean(sala
ry,na.rm=TRUE))
avgsalbydept
avgsalbydeptandrole=group_by(employeedata,department,role)%>%summariz
e(mean(salary,na.rm=TRUE))
avgsalbydeptandrole
maxsalbyrole=group_by(employeedata,role)%>%summarize(mean(salary,na.r
m=TRUE))
maxsalbyrole

OUTPUT:
CODE:
df=data.frame(
id=c(1,2,3,4,5,6),
name=c("alice smith","bob johnson","carol williams","david brown","eva
davis","frank martin"),

department=c("mathematics","physics","chemistry","biology","mathematics","
physics"),
salary=c(75000,82000,60000,68000,77000,85000)
)
df
avgdept=group_by(df,department)%>%summarize(mean(salary))
avgdept
maxsal=group_by(df,department)%>%summarize(max(salary,na.rm=TRUE))
maxsal
minsal=group_by(df,department)%>%summarize(min(salary,na.rm=TRUE))
minsal
tolmem=count(df,department)
tolmem
totsal=count(df,department,wt=salary)
totsal
empdatasar=arrange(df,salary)
empdatasar
emp=arrange(df,desc(salary))
emp
which(is.na(df))
median(df$salary)
high=filter(df,salary>80000)
high
se=select(df,id,name)
se

OUTPUT:
Module 4
df=read.csv("C:\\Users\\23BCE9857\\Downloads\\mtcars.csv")
df

boxplot(df$mpg,
main="miles per gallon",
col="pink")
boxplot(mpg~cyl,data=df,
main="mileage data",
xlab="no of cylinders",
ylab="miles per gallon",
col="green")

hist(df$hp,
main="horse power",
xlab="range",
ylab="frequency",
col="pink",
breaks=5)
hist(df$wt,
main="weight",
xlab="range",
ylab="frequency",
col="yellow",
breaks=5)
l=table(df$model)
l
pie(df$cyl,labels=paste(names(l)))
v=c(1,2,3,4,5,6,7,8,9,10)
df1=df[v,]
df1
plot(df1$mpg,type="l",
main="mpg trends",
xlab="mpg",
ylab="frequency",
col="red")
plot(df$wt,df$mpg,type="l",
main="line chart",
xlab="weight",
ylab="miles per gallon",
col="blue",
lwd=2)
a=table(df$gear)
a
barplot(a,
main="count of cars based on no of gears",
xlab="gears",
ylab="no of cars",
col="blue",
border="black")
meanby=tapply(df$hp,df$cyl,mean)
barplot(meanby,horiz=TRUE,xlab="horse power",
ylab="cyl",col="green")
plot(df$wt,df$mpg,
main="scatter plot",
xlab="weight",
ylab="miles per gallon",
col="blue")
plot(df$hp,df$qsec,
main="scatter plot",
xlab="horse power",
ylab="quater mile time",
col="blue")

You might also like