0% found this document useful (0 votes)
9 views

Test 1

Uploaded by

hoanglhse181582
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views

Test 1

Uploaded by

hoanglhse181582
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

1.

Install packages and set up libraries


library(RODBC)
library(odbc)
library(readr)
library(DBI)

2. Connect to the data server


# Thay đổi các thông số kết nối tùy theo cài đặt của bạn
server <- "localhost" # Địa chỉ máy chủ MySQL
port <- 3306 # Cổng mặc định của MySQL
database <- "school" # Tên cơ sở dữ liệu
user <- "FPTU" # Tên người dùng
password <- "haha123" # Mật khẩu

# Kết nối đến MySQL


con <- dbConnect(RMySQL::MySQL(),
dbname = database,
host = server,
port = port,
user = user,
password = password)

# Kiểm tra kết nối


if (is.null(con)) {
cat("Kết nối không thành công.\n")
} else {
cat("Kết nối thành công đến cơ sở dữ liệu ", database, ".\n")
}

## Kết nối thành công đến cơ sở dữ liệu school .

3. Import data into the database


chicago_data <- read_csv("D:/Fall24/dsr301m/ChicagoPublicSchools.csv")

## Rows: 566 Columns: 78


## ── Column specification
────────────────────────────────────────────────────────
## Delimiter: ","
## chr (59): NAME_OF_SCHOOL, Elementary, Middle, or High School,
Street_Address...
## dbl (19): School_ID, ZIP_Code, SAFETY_SCORE, Environment_Score,
Instruction_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this
message.

dbWriteTable(con,
name = "chicago_data_table",
value = chicago_data,
overwrite = TRUE)

## [1] TRUE

# Kiểm tra số lượng bản ghi đã được nhập


row_count <- dbGetQuery(con, "SELECT COUNT(*) AS count FROM
chicago_data_table")
cat("Số lượng bản ghi đã được nhập:", row_count$count, "\n")

## Số lượng bản ghi đã được nhập: 566

# In ra thông báo khi quá trình nhập dữ liệu hoàn thành


cat("Quá trình nhập dữ liệu đã hoàn thành thành công!\n")

## Quá trình nhập dữ liệu đã hoàn thành thành công!

4. Summarize the data


chicago_data <- dbReadTable(con, "chicago_data_table")
summary(chicago_data)

## School_ID NAME_OF_SCHOOL Elementary..Middle..or.High.School


## Min. :400018 Length:566 Length:566
## 1st Qu.:609873 Class :character Class :character
## Median :610053 Mode :character Mode :character
## Mean :609682
## 3rd Qu.:610213
## Max. :610544
##
## Street_Address City State ZIP_Code
## Length:566 Length:566 Length:566 Min. :60605
## Class :character Class :character Class :character 1st Qu.:60618
## Mode :character Mode :character Mode :character Median :60625
## Mean :60630
## 3rd Qu.:60639
## Max. :60827
##
## Phone_Number Link Network_Manager
Collaborative_Name
## Length:566 Length:566 Length:566 Length:566
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Adequate_Yearly_Progress_Made_ Track_Schedule
## Length:566 Length:566
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## CPS_Performance_Policy_Status CPS_Performance_Policy_Level
## Length:566 Length:566
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## HEALTHY_SCHOOL_CERTIFIED Safety_Icon SAFETY_SCORE
## Length:566 Length:566 Min. : 1.0
## Class :character Class :character 1st Qu.:35.0
## Mode :character Mode :character Median :48.0
## Mean :49.5
## 3rd Qu.:61.0
## Max. :99.0
## NA's :53
## Family_Involvement_Icon Family_Involvement_Score Environment_Icon
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Environment_Score Instruction_Icon Instruction_Score Leaders_Icon
## Min. : 1.00 Length:566 Min. : 1.00 Length:566
## 1st Qu.:37.00 Class :character 1st Qu.:37.00 Class :character
## Median :47.00 Mode :character Median :47.00 Mode :character
## Mean :47.77 Mean :48.29
## 3rd Qu.:58.00 3rd Qu.:59.00
## Max. :99.00 Max. :99.00
## NA's :53 NA's :53
## Leaders_Score Teachers_Icon Teachers_Score
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Parent_Engagement_Icon Parent_Engagement_Score Parent_Environment_Icon
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Parent_Environment_Score AVERAGE_STUDENT_ATTENDANCE
## Length:566 Length:566
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Rate_of_Misconducts__per_100_students_ Average_Teacher_Attendance
## Min. : 0.000 Length:566
## 1st Qu.: 4.525 Class :character
## Median : 12.250 Mode :character
## Mean : 21.086
## 3rd Qu.: 26.975
## Max. :251.600
##
## Individualized_Education_Program_Compliance_Rate Pk_2_Literacy__
## Length:566 Length:566
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Pk_2_Math__ Gr3_5_Grade_Level_Math__ Gr3_5_Grade_Level_Read__
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Gr3_5_Keep_Pace_Read__ Gr3_5_Keep_Pace_Math__ Gr6_8_Grade_Level_Math__
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Gr6_8_Grade_Level_Read__ Gr6_8_Keep_Pace_Math_ Gr6_8_Keep_Pace_Read__
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Gr_8_Explore_Math__ Gr_8_Explore_Read__ ISAT_Exceeding_Math__
## Length:566 Length:566 Min. : 0.000
## Class :character Class :character 1st Qu.: 9.175
## Mode :character Mode :character Median : 16.000
## Mean : 20.356
## 3rd Qu.: 24.900
## Max. :100.000
## NA's :90
## ISAT_Exceeding_Reading__ ISAT_Value_Add_Math ISAT_Value_Add_Read
## Min. : 0.00 Min. :-3.50000 Min. :-5.00000
## 1st Qu.: 6.10 1st Qu.:-0.60000 1st Qu.:-0.60000
## Median : 10.95 Median : 0.00000 Median : 0.00000
## Mean : 15.67 Mean : 0.06859 Mean : 0.02265
## 3rd Qu.: 19.52 3rd Qu.: 0.70000 3rd Qu.: 0.62500
## Max. :100.00 Max. : 3.60000 Max. : 4.90000
## NA's :90 NA's :98 NA's :98
## ISAT_Value_Add_Color_Math ISAT_Value_Add_Color_Read
Students_Taking__Algebra__
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Students_Passing__Algebra__ X9th.Grade.EXPLORE..2009.
## Length:566 Length:566
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## X9th.Grade.EXPLORE..2010. X10th.Grade.PLAN..2009. X10th.Grade.PLAN..2010.
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Net_Change_EXPLORE_and_PLAN X11th.Grade.Average.ACT..2011.
## Length:566 Length:566
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Net_Change_PLAN_and_ACT College_Eligibility__ Graduation_Rate__
## Length:566 Length:566 Length:566
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## College_Enrollment_Rate__ COLLEGE_ENROLLMENT General_Services_Route
## Length:566 Min. : 21.0 Min. :29.00
## Class :character 1st Qu.: 342.2 1st Qu.:34.00
## Mode :character Median : 504.0 Median :39.00
## Mean : 626.1 Mean :39.08
## 3rd Qu.: 790.5 3rd Qu.:45.00
## Max. :4368.0 Max. :49.00
##
## Freshman_on_Track_Rate__ X_COORDINATE Y_COORDINATE Latitude
## Length:566 Min. :1118114 Min. :1817242 Min.
:41.65
## Class :character 1st Qu.:1152166 1st Qu.:1860248 1st
Qu.:41.77
## Mode :character Median :1163553 Median :1886889 Median
:41.85
## Mean :1163016 Mean :1885663 Mean
:41.84
## 3rd Qu.:1174256 3rd Qu.:1911652 3rd
Qu.:41.91
## Max. :1202811 Max. :1950960 Max.
:42.02
##
## Longitude COMMUNITY_AREA_NUMBER COMMUNITY_AREA_NAME Ward
## Min. :-87.84 Min. : 1.00 Length:566 Min. : 1.00
## 1st Qu.:-87.72 1st Qu.:23.00 Class :character 1st Qu.:10.00
## Median :-87.68 Median :31.50 Mode :character Median :21.00
## Mean :-87.68 Mean :37.71 Mean :22.06
## 3rd Qu.:-87.64 3rd Qu.:58.00 3rd Qu.:32.75
## Max. :-87.53 Max. :77.00 Max. :50.00
##
## Police_District Location
## Min. : 1.00 Length:566
## 1st Qu.: 7.00 Class :character
## Median :10.00 Mode :character
## Mean :11.71
## 3rd Qu.:17.00
## Max. :25.00
##

5. Retrieve table structure information


columns_query <- dbGetQuery(con, "
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'chicago_data_table';
")
print(columns_query)

## COLUMN_NAME
## 1 row_names
## 2 School_ID
## 3 NAME_OF_SCHOOL
## 4 Elementary, Middle, or High School
## 5 Street_Address
## 6 City
## 7 State
## 8 ZIP_Code
## 9 Phone_Number
## 10 Link
## 11 Network_Manager
## 12 Collaborative_Name
## 13 Adequate_Yearly_Progress_Made_
## 14 Track_Schedule
## 15 CPS_Performance_Policy_Status
## 16 CPS_Performance_Policy_Level
## 17 HEALTHY_SCHOOL_CERTIFIED
## 18 Safety_Icon
## 19 SAFETY_SCORE
## 20 Family_Involvement_Icon
## 21 Family_Involvement_Score
## 22 Environment_Icon
## 23 Environment_Score
## 24 Instruction_Icon
## 25 Instruction_Score
## 26 Leaders_Icon
## 27 Leaders_Score
## 28 Teachers_Icon
## 29 Teachers_Score
## 30 Parent_Engagement_Icon
## 31 Parent_Engagement_Score
## 32 Parent_Environment_Icon
## 33 Parent_Environment_Score
## 34 AVERAGE_STUDENT_ATTENDANCE
## 35 Rate_of_Misconducts__per_100_students_
## 36 Average_Teacher_Attendance
## 37 Individualized_Education_Program_Compliance_Rate
## 38 Pk_2_Literacy__
## 39 Pk_2_Math__
## 40 Gr3_5_Grade_Level_Math__
## 41 Gr3_5_Grade_Level_Read__
## 42 Gr3_5_Keep_Pace_Read__
## 43 Gr3_5_Keep_Pace_Math__
## 44 Gr6_8_Grade_Level_Math__
## 45 Gr6_8_Grade_Level_Read__
## 46 Gr6_8_Keep_Pace_Math_
## 47 Gr6_8_Keep_Pace_Read__
## 48 Gr_8_Explore_Math__
## 49 Gr_8_Explore_Read__
## 50 ISAT_Exceeding_Math__
## 51 ISAT_Exceeding_Reading__
## 52 ISAT_Value_Add_Math
## 53 ISAT_Value_Add_Read
## 54 ISAT_Value_Add_Color_Math
## 55 ISAT_Value_Add_Color_Read
## 56 Students_Taking__Algebra__
## 57 Students_Passing__Algebra__
## 58 9th Grade EXPLORE (2009)
## 59 9th Grade EXPLORE (2010)
## 60 10th Grade PLAN (2009)
## 61 10th Grade PLAN (2010)
## 62 Net_Change_EXPLORE_and_PLAN
## 63 11th Grade Average ACT (2011)
## 64 Net_Change_PLAN_and_ACT
## 65 College_Eligibility__
## 66 Graduation_Rate__
## 67 College_Enrollment_Rate__
## 68 COLLEGE_ENROLLMENT
## 69 General_Services_Route
## 70 Freshman_on_Track_Rate__
## 71 X_COORDINATE
## 72 Y_COORDINATE
## 73 Latitude
## 74 Longitude
## 75 COMMUNITY_AREA_NUMBER
## 76 COMMUNITY_AREA_NAME
## 77 Ward
## 78 Police_District
## 79 Location

6. Rename columns in the chicago_public_schools table


dbExecute(con, "
ALTER TABLE chicago_data_table
CHANGE COLUMN `Elementary, Middle, or High School` `School Types`
VARCHAR(255);
")

## [1] 566

7 Count the number of elementary schools


query_elementary_count <- dbGetQuery(con, "
SELECT COUNT(*) as Elementary_Count
FROM chicago_data_table
WHERE `School Types` = 'ES';
")
print(query_elementary_count)
## Elementary_Count
## 1 462

8. Find the highest safety score


query_highest_safety_score <- dbGetQuery(con, "
SELECT MAX(Safety_Score) as Highest_Safety_Score
FROM chicago_data_table;
")
print(query_highest_safety_score)

## Highest_Safety_Score
## 1 99

9. Write R code to clean the column names of a data frame called chicago_schools.
Perform the following steps:
clean_column_names <- function(names){
names <- gsub("__+", "_", names)
names <- gsub("[_.]+$", "", names)
names <- gsub("[.]", "_", names)
return(names)
}
cleaned_names <- clean_column_names(colnames(chicago_data))
print(cleaned_names)

## [1] "School_ID"
## [2] "NAME_OF_SCHOOL"
## [3] "Elementary__Middle__or_High_School"
## [4] "Street_Address"
## [5] "City"
## [6] "State"
## [7] "ZIP_Code"
## [8] "Phone_Number"
## [9] "Link"
## [10] "Network_Manager"
## [11] "Collaborative_Name"
## [12] "Adequate_Yearly_Progress_Made"
## [13] "Track_Schedule"
## [14] "CPS_Performance_Policy_Status"
## [15] "CPS_Performance_Policy_Level"
## [16] "HEALTHY_SCHOOL_CERTIFIED"
## [17] "Safety_Icon"
## [18] "SAFETY_SCORE"
## [19] "Family_Involvement_Icon"
## [20] "Family_Involvement_Score"
## [21] "Environment_Icon"
## [22] "Environment_Score"
## [23] "Instruction_Icon"
## [24] "Instruction_Score"
## [25] "Leaders_Icon"
## [26] "Leaders_Score"
## [27] "Teachers_Icon"
## [28] "Teachers_Score"
## [29] "Parent_Engagement_Icon"
## [30] "Parent_Engagement_Score"
## [31] "Parent_Environment_Icon"
## [32] "Parent_Environment_Score"
## [33] "AVERAGE_STUDENT_ATTENDANCE"
## [34] "Rate_of_Misconducts_per_100_students"
## [35] "Average_Teacher_Attendance"
## [36] "Individualized_Education_Program_Compliance_Rate"
## [37] "Pk_2_Literacy"
## [38] "Pk_2_Math"
## [39] "Gr3_5_Grade_Level_Math"
## [40] "Gr3_5_Grade_Level_Read"
## [41] "Gr3_5_Keep_Pace_Read"
## [42] "Gr3_5_Keep_Pace_Math"
## [43] "Gr6_8_Grade_Level_Math"
## [44] "Gr6_8_Grade_Level_Read"
## [45] "Gr6_8_Keep_Pace_Math"
## [46] "Gr6_8_Keep_Pace_Read"
## [47] "Gr_8_Explore_Math"
## [48] "Gr_8_Explore_Read"
## [49] "ISAT_Exceeding_Math"
## [50] "ISAT_Exceeding_Reading"
## [51] "ISAT_Value_Add_Math"
## [52] "ISAT_Value_Add_Read"
## [53] "ISAT_Value_Add_Color_Math"
## [54] "ISAT_Value_Add_Color_Read"
## [55] "Students_Taking_Algebra"
## [56] "Students_Passing_Algebra"
## [57] "X9th_Grade_EXPLORE__2009"
## [58] "X9th_Grade_EXPLORE__2010"
## [59] "X10th_Grade_PLAN__2009"
## [60] "X10th_Grade_PLAN__2010"
## [61] "Net_Change_EXPLORE_and_PLAN"
## [62] "X11th_Grade_Average_ACT__2011"
## [63] "Net_Change_PLAN_and_ACT"
## [64] "College_Eligibility"
## [65] "Graduation_Rate"
## [66] "College_Enrollment_Rate"
## [67] "COLLEGE_ENROLLMENT"
## [68] "General_Services_Route"
## [69] "Freshman_on_Track_Rate"
## [70] "X_COORDINATE"
## [71] "Y_COORDINATE"
## [72] "Latitude"
## [73] "Longitude"
## [74] "COMMUNITY_AREA_NUMBER"
## [75] "COMMUNITY_AREA_NAME"
## [76] "Ward"
## [77] "Police_District"
## [78] "Location"

10. Check for missing data


missing_data <- colSums(is.na(chicago_data))
print(missing_data)

## School_ID
## 0
## NAME_OF_SCHOOL
## 0
## Elementary..Middle..or.High.School
## 0
## Street_Address
## 0
## City
## 0
## State
## 0
## ZIP_Code
## 0
## Phone_Number
## 0
## Link
## 1
## Network_Manager
## 0
## Collaborative_Name
## 0
## Adequate_Yearly_Progress_Made_
## 0
## Track_Schedule
## 0
## CPS_Performance_Policy_Status
## 0
## CPS_Performance_Policy_Level
## 0
## HEALTHY_SCHOOL_CERTIFIED
## 0
## Safety_Icon
## 0
## SAFETY_SCORE
## 53
## Family_Involvement_Icon
## 0
## Family_Involvement_Score
## 0
## Environment_Icon
## 0
## Environment_Score
## 53
## Instruction_Icon
## 0
## Instruction_Score
## 53
## Leaders_Icon
## 0
## Leaders_Score
## 0
## Teachers_Icon
## 0
## Teachers_Score
## 0
## Parent_Engagement_Icon
## 0
## Parent_Engagement_Score
## 0
## Parent_Environment_Icon
## 0
## Parent_Environment_Score
## 0
## AVERAGE_STUDENT_ATTENDANCE
## 1
## Rate_of_Misconducts__per_100_students_
## 0
## Average_Teacher_Attendance
## 0
## Individualized_Education_Program_Compliance_Rate
## 0
## Pk_2_Literacy__
## 0
## Pk_2_Math__
## 0
## Gr3_5_Grade_Level_Math__
## 0
## Gr3_5_Grade_Level_Read__
## 0
## Gr3_5_Keep_Pace_Read__
## 0
## Gr3_5_Keep_Pace_Math__
## 0
## Gr6_8_Grade_Level_Math__
## 0
## Gr6_8_Grade_Level_Read__
## 0
## Gr6_8_Keep_Pace_Math_
## 0
## Gr6_8_Keep_Pace_Read__
## 0
## Gr_8_Explore_Math__
## 0
## Gr_8_Explore_Read__
## 0
## ISAT_Exceeding_Math__
## 90
## ISAT_Exceeding_Reading__
## 90
## ISAT_Value_Add_Math
## 98
## ISAT_Value_Add_Read
## 98
## ISAT_Value_Add_Color_Math
## 0
## ISAT_Value_Add_Color_Read
## 0
## Students_Taking__Algebra__
## 0
## Students_Passing__Algebra__
## 0
## X9th.Grade.EXPLORE..2009.
## 0
## X9th.Grade.EXPLORE..2010.
## 0
## X10th.Grade.PLAN..2009.
## 0
## X10th.Grade.PLAN..2010.
## 0
## Net_Change_EXPLORE_and_PLAN
## 0
## X11th.Grade.Average.ACT..2011.
## 0
## Net_Change_PLAN_and_ACT
## 0
## College_Eligibility__
## 0
## Graduation_Rate__
## 0
## College_Enrollment_Rate__
## 0
## COLLEGE_ENROLLMENT
## 0
## General_Services_Route
## 0
## Freshman_on_Track_Rate__
## 0
## X_COORDINATE
## 0
## Y_COORDINATE
## 0
## Latitude
## 0
## Longitude
## 0
## COMMUNITY_AREA_NUMBER
## 0
## COMMUNITY_AREA_NAME
## 0
## Ward
## 0
## Police_District
## 0
## Location
## 0

You might also like