FDA Assignment 4 (1)
FDA Assignment 4 (1)
OUTPUT:
Reading TEXT FILE
CODE:
data2=read.table("C:\\Users\\23BCE9857\\Downloads\\student.txt",head=TR
UE,sep=" ")
data2
dim(data2)
data3=read.table("C:\\Users\\23BCE9857\\Downloads\\sample.txt",head=TRU
E,sep="\t")
data3
head(data3)
tail(data3)
mean(data3$variable)
median(data3$variable)
data4=read.delim("C:\\Users\\23BCE9857\\Downloads\\sample.txt",head=TR
UE)
data4
head(data4)
tail(data4)
mean(data4$variable)
median(data4$variable)
Reading XL FILE
CODE:
library(readxl)
data=read_excel("C:\\Users\\23BCE9857\\Downloads\\wcars.xlsx")
data
head(data)
dim(data)
mean(data$cyl)
data2=read_excel("C:\\Users\\23BCE9857\\Downloads\\wcars.xlsx",skip=2)
data2
dim(data2)
data3 <-
read_excel("C:\\Users\\23BCE9857\\Downloads\\wcars.xlsx",range="A1:C5")
data3
dim(data3)
OUTPUT:
WRITING ALL FILES
CODE:
data=data.frame(
CustomerID=c(101,201,301,401),
Age=c(20, 40, 30, 50)
)
data
write.csv(data,"simple.csv",row.names=FALSE)
data1=read.csv("C:\\Users\\23BCE9857\\Documents\\simple.csv")
data1
library(writexl)
library(readxl)
write_xlsx(data,"C:\\Users\\23BCE9857\\Documents\\Simple2.xlsx")
data3=read_excel("C:\\Users\\23BCE9857\\Documents\\Simple2.xlsx")
data3
OUTPUT:
PROBLEM
CODE:
library(readxl)
data=read_excel("C:\\Users\\23BCE9857\\Documents\\employee.xlsx")
data
mean(data$salary)
max(data$salary)
min(data$salary)
data$designation=ifelse(data$salary<60000,"junior",
ifelse(data$salary>60000 & data$salary<75000,"mid","senior"))
data
sort(data$salary,decreasing=FALSE)
OUTPUT:
DPLYR
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,48000)
)
employeedata
col1=select(employeedata,starts_with("emp"))
col1
col2=select(employeedata,ends_with("nt"))
col2
col3=select(employeedata,contains("la"))
col3
print(select(employeedata,-salary))
OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
employeedata
highsalaryemp1=filter(employeedata,salary>60000)
highsalaryemp1
highsalaryemp2=filter(employeedata,salary>60000 & department=="IT")
highsalaryemp2
filteredemp=filter(employeedata,department%in% c("HR","finance"))
filteredemp
filteremp=filter(employeedata,!is.na(salary))
filteremp
OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
employeedata=mutate(employeedata,location=c("hyd","gnt","bza","vizag","ch
ennai"))
employeedata
employeedata=mutate(employeedata,salary=salary*1.05)
employeedata
OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
employeedata=rename(employeedata,empid=employee_id)
employeedata
OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,NA)
)
sorteddata=arrange(employeedata,name)
sorteddata
sorteddata1=arrange(employeedata,desc(salary))
sorteddata1
OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","finance"),
salary=c(55000,62000,75000,80000,48000),
role=c("junior","mid","senior","mid","junior")
)
deptcounts=count(employeedata,department)
deptrolecounts=count(employeedata,department,role)
totalsalbydept=count(employeedata,department,wt=salary)
deptcounts
deptrolecounts
totalsalbydept
OUTPUT:
CODE:
library(dplyr)
employeedata=data.frame(
employee_id=1:5,
name=c("alice","bob","charlie","david","eva"),
department=c("HR","sales","IT","HR","IT"),
salary=c(55000,62000,75000,80000,48000),
role=c("junior","mid","senior","mid","junior")
)
avgsalbydept=group_by(employeedata,department)%>%summarize(mean(sala
ry,na.rm=TRUE))
avgsalbydept
avgsalbydeptandrole=group_by(employeedata,department,role)%>%summariz
e(mean(salary,na.rm=TRUE))
avgsalbydeptandrole
maxsalbyrole=group_by(employeedata,role)%>%summarize(mean(salary,na.r
m=TRUE))
maxsalbyrole
OUTPUT:
CODE:
df=data.frame(
id=c(1,2,3,4,5,6),
name=c("alice smith","bob johnson","carol williams","david brown","eva
davis","frank martin"),
department=c("mathematics","physics","chemistry","biology","mathematics","
physics"),
salary=c(75000,82000,60000,68000,77000,85000)
)
df
avgdept=group_by(df,department)%>%summarize(mean(salary))
avgdept
maxsal=group_by(df,department)%>%summarize(max(salary,na.rm=TRUE))
maxsal
minsal=group_by(df,department)%>%summarize(min(salary,na.rm=TRUE))
minsal
tolmem=count(df,department)
tolmem
totsal=count(df,department,wt=salary)
totsal
empdatasar=arrange(df,salary)
empdatasar
emp=arrange(df,desc(salary))
emp
which(is.na(df))
median(df$salary)
high=filter(df,salary>80000)
high
se=select(df,id,name)
se
OUTPUT:
Module 4
df=read.csv("C:\\Users\\23BCE9857\\Downloads\\mtcars.csv")
df
boxplot(df$mpg,
main="miles per gallon",
col="pink")
boxplot(mpg~cyl,data=df,
main="mileage data",
xlab="no of cylinders",
ylab="miles per gallon",
col="green")
hist(df$hp,
main="horse power",
xlab="range",
ylab="frequency",
col="pink",
breaks=5)
hist(df$wt,
main="weight",
xlab="range",
ylab="frequency",
col="yellow",
breaks=5)
l=table(df$model)
l
pie(df$cyl,labels=paste(names(l)))
v=c(1,2,3,4,5,6,7,8,9,10)
df1=df[v,]
df1
plot(df1$mpg,type="l",
main="mpg trends",
xlab="mpg",
ylab="frequency",
col="red")
plot(df$wt,df$mpg,type="l",
main="line chart",
xlab="weight",
ylab="miles per gallon",
col="blue",
lwd=2)
a=table(df$gear)
a
barplot(a,
main="count of cars based on no of gears",
xlab="gears",
ylab="no of cars",
col="blue",
border="black")
meanby=tapply(df$hp,df$cyl,mean)
barplot(meanby,horiz=TRUE,xlab="horse power",
ylab="cyl",col="green")
plot(df$wt,df$mpg,
main="scatter plot",
xlab="weight",
ylab="miles per gallon",
col="blue")
plot(df$hp,df$qsec,
main="scatter plot",
xlab="horse power",
ylab="quater mile time",
col="blue")