0% found this document useful (0 votes)
22 views1 page

( ) - ( ) ( ('Lower New - Vaccinations - Smoothed') ) ( ('Upper New - Vaccinations - Smoothed') ) - (., ,, 'B', )

Uploaded by

Nathaniel Handan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
22 views1 page

( ) - ( ) ( ('Lower New - Vaccinations - Smoothed') ) ( ('Upper New - Vaccinations - Smoothed') ) - (., ,, 'B', )

Uploaded by

Nathaniel Handan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 1

In [ ]: # IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES

# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,


# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'latest-covid-19-dataset-worldwide:https%3A%2F%2Fsiteproxy.ruqli.workers.dev%3A443%2Fhttps%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1852043%2F3023973%2Fbundle%2Farc

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null


shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
pass
try:
os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):


directory, download_url_encoded = data_source_mapping.split(':')
download_url = unquote(download_url_encoded)
filename = urlparse(download_url).path
destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
try:
with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
total_length = fileres.headers['content-length']
print(f'Downloading {directory}, {total_length} bytes compressed')
dl = 0
data = fileres.read(CHUNK_SIZE)
while len(data) > 0:
dl += len(data)
tfile.write(data)
done = int(50 * dl / int(total_length))
sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
sys.stdout.flush()
data = fileres.read(CHUNK_SIZE)
if filename.endswith('.zip'):
with ZipFile(tfile) as zfile:
zfile.extractall(destination_path)
else:
with tarfile.open(tfile.name) as tarfile:
tarfile.extractall(destination_path)
print(f'\nDownloaded and uncompressed: {directory}')
except HTTPError as e:
print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
continue
except OSError as e:
print(f'Failed to load {download_url} to path {destination_path}')
continue

print('Data source import complete.')

Downloading latest-covid-19-dataset-worldwide, 8526536 bytes compressed


[==================================================] 8526536 bytes downloaded
Downloaded and uncompressed: latest-covid-19-dataset-worldwide
Data source import complete.

In [ ]: import numpy as np # Lineal algebra


import pandas as pd # Data wrangling
from statsmodels.tsa.arima.model import ARIMA #ARIMA model (arima_model is deprecated, use this one instead!)
from statsmodels.tsa.stattools import adfuller #Dickey-Fuller test for stationarity

# Plotting
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [ ]: df = pd.read_csv('../input/latest-covid-19-dataset-worldwide/owid-covid-data (2).csv')

In [ ]: df['date'] = pd.to_datetime(df['date'])
df = df[df['iso_code']=='NGA']
df = df.set_index('date')
hospitalized = df[['new_vaccinations_smoothed']]
hospitalized = hospitalized.resample('W-MON').mean()

In [ ]: hospitalized.plot()

Out[ ]: <Axes: xlabel='date'>

In [ ]: hospitalized = hospitalized.dropna()

In [ ]: hospitalized.plot()

Out[ ]: <Axes: xlabel='date'>

In [ ]: #hospitalized = np.log(hospitalized) # Apply a log transformation to reduce heterocedasticity


hospitalized.plot()

Out[ ]: <Axes: xlabel='date'>

In [ ]: result = adfuller(hospitalized)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

ADF Statistic: -1.904860


p-value: 0.329778

In [ ]: diffhosp = hospitalized.diff().dropna() # Save the first difference


result = adfuller(diffhosp)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

ADF Statistic: -5.184212


p-value: 0.000009

In [ ]: diffhosp.plot()
pacf = plot_pacf(diffhosp)
acf = plot_acf(diffhosp)

In [ ]: model = ARIMA(hospitalized,order= (2,1,2)).fit()


model.summary()

Out[ ]: SARIMAX Results

Dep. Variable: new_vaccinations_smoothed No. Observations: 45

Model: ARIMA(2, 1, 2) Log Likelihood -500.258

Date: Thu, 02 May 2024 AIC 1010.516

Time: 13:12:17 BIC 1019.436

Sample: 03-08-2021 HQIC 1013.824

- 01-10-2022

Covariance Type: opg

coef std err z P>|z| [0.025 0.975]

ar.L1 0.1947 0.569 0.342 0.732 -0.920 1.310

ar.L2 0.3753 0.587 0.639 0.523 -0.776 1.526

ma.L1 0.0333 0.501 0.066 0.947 -0.949 1.016

ma.L2 -0.7574 0.533 -1.421 0.155 -1.802 0.287

sigma2 4.906e+08 3.17e-09 1.55e+17 0.000 4.91e+08 4.91e+08

Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 1.58

Prob(Q): 0.92 Prob(JB): 0.45

Heteroskedasticity (H): 2.53 Skew: 0.29

Prob(H) (two-sided): 0.08 Kurtosis: 3.73

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 4.63e+32. Standard errors may be unstable.

In [ ]: predictions = model.predict()
plt.figure(figsize=(15,10))
plt.plot(predictions[1:])
plt.plot(hospitalized)

Out[ ]: [<matplotlib.lines.Line2D at 0x7c9e30a300d0>]

In [ ]: train = hospitalized[:-5]
test = hospitalized[-5:]
model = ARIMA(train,order= (2,1,2)).fit()
model.summary()

Out[ ]: SARIMAX Results

Dep. Variable: new_vaccinations_smoothed No. Observations: 40

Model: ARIMA(2, 1, 2) Log Likelihood -439.084

Date: Thu, 02 May 2024 AIC 888.168

Time: 13:13:46 BIC 896.486

Sample: 03-08-2021 HQIC 891.153

- 12-06-2021

Covariance Type: opg

coef std err z P>|z| [0.025 0.975]

ar.L1 0.7882 0.495 1.591 0.112 -0.183 1.759

ar.L2 -0.3523 0.380 -0.927 0.354 -1.097 0.392

ma.L1 -0.5648 0.619 -0.913 0.361 -1.777 0.648

ma.L2 -0.2435 0.560 -0.435 0.664 -1.341 0.854

sigma2 3.94e+08 1.83e-09 2.16e+17 0.000 3.94e+08 3.94e+08

Ljung-Box (L1) (Q): 0.14 Jarque-Bera (JB): 7.62

Prob(Q): 0.71 Prob(JB): 0.02

Heteroskedasticity (H): 1.29 Skew: 0.62

Prob(H) (two-sided): 0.65 Kurtosis: 4.78

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.09e+33. Standard errors may be unstable.

In [ ]: predictions = model.get_forecast(5,alpha = 0.95)


plt.figure(figsize=(15,10))
plt.plot((train))
plt.plot((predictions.predicted_mean))
plt.plot((test))
conf= predictions.conf_int(alpha=0.05)
upper = (conf['lower new_vaccinations_smoothed'])
lower = (conf['upper new_vaccinations_smoothed'])
plt.fill_between(upper.index, upper,lower, color='b', alpha=.1)

---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-29-c01453b67fd5> in <cell line: 7>()
5 plt.plot((test))
6 conf= predictions.conf_int(alpha=0.05)
----> 7 upper = (conf['lower new_vaccinations_smoothed'])
8 lower = (conf['upper new_vaccinations_smoothed'])
9 plt.fill_between(upper.index, upper,lower, color='b', alpha=.1)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [ ]: size = int(len(hospitalized) * 0.66)


train, test = hospitalized[0:size], hospitalized[size:len(hospitalized)]
prediction = []
data = train.values
for t in test.values:
model = model = ARIMA(data,order= (2,1,2)).fit()
y = model.forecast()
prediction.append(y[0])
data = np.append(data, t)
test_res = pd.DataFrame(test)
test_res['preds'] = prediction

In [ ]: plt.figure(figsize=(15,6))
plt.plot((train))
plt.plot((test_res))

Out[ ]: [<matplotlib.lines.Line2D at 0x7c9e2ceb7010>,


<matplotlib.lines.Line2D at 0x7c9e2ceb6fe0>]

In [ ]: for column_name in df.columns:


df[column_name] = pd.to_numeric(df[column_name], errors='coerce')

corr = df.corr()
hospcorr = corr['new_vaccinations_smoothed']
hospcorr = hospcorr[np.abs(hospcorr)>0.4]
hospcorr.sort_values()

Out[ ]: stringency_index -0.574414


new_cases_smoothed 0.498517
new_cases_smoothed_per_million 0.498526
positive_rate 0.506732
total_tests 0.651580
total_tests_per_thousand 0.651585
total_deaths_per_million 0.686572
total_deaths 0.686583
total_cases_per_million 0.707083
total_cases 0.707083
people_fully_vaccinated 0.719701
people_fully_vaccinated_per_hundred 0.720430
total_vaccinations_per_hundred 0.770341
total_vaccinations 0.770371
people_vaccinated_per_hundred 0.778505
people_vaccinated 0.778598
new_vaccinations 0.804425
new_people_vaccinated_smoothed 0.947457
new_people_vaccinated_smoothed_per_hundred 0.947986
new_vaccinations_smoothed_per_million 0.999999
new_vaccinations_smoothed 1.000000
Name: new_vaccinations_smoothed, dtype: float64

In [ ]: exogfeats = df[['stringency_index','total_deaths_per_million','total_cases_per_million']]

In [ ]: for column in exogfeats.columns:


exogfeats[column]= exogfeats[column].fillna(exogfeats[column].mean())

In [ ]: exogfeats = exogfeats.resample('W-MON').mean()
exogfeats = exogfeats.iloc[(exogfeats.index >= hospitalized.first_valid_index()) & (exogfeats.index<= hospitalized.last_valid_index())]

In [ ]: fig, ax = plt.subplots(3, 1, figsize=(12, 8), tight_layout=True)


exogfeats.plot(ax=ax, subplots=True, rot=60)

Out[ ]: array([<Axes: xlabel='date'>, <Axes: xlabel='date'>,


<Axes: xlabel='date'>], dtype=object)

In [ ]: model = ARIMA(hospitalized,order= (2,1,2),exog=exogfeats).fit()


predictions = model.predict()
model.summary()

Out[ ]: SARIMAX Results

Dep. Variable: new_vaccinations_smoothed No. Observations: 45

Model: ARIMA(2, 1, 2) Log Likelihood -497.400

Date: Thu, 02 May 2024 AIC 1010.800

Time: 13:24:52 BIC 1025.074

Sample: 03-08-2021 HQIC 1016.094

- 01-10-2022

Covariance Type: opg

coef std err z P>|z| [0.025 0.975]

stringency_index -145.7513 1080.049 -0.135 0.893 -2262.608 1971.106

total_deaths_per_million 4.708e+04 2.33e+04 2.019 0.044 1365.727 9.28e+04

total_cases_per_million -164.9822 269.965 -0.611 0.541 -694.104 364.140

ar.L1 0.9271 0.176 5.279 0.000 0.583 1.271

ar.L2 -0.9354 0.162 -5.759 0.000 -1.254 -0.617

ma.L1 -0.8978 0.290 -3.092 0.002 -1.467 -0.329

ma.L2 0.8878 0.237 3.744 0.000 0.423 1.353

sigma2 4.544e+08 0.191 2.37e+09 0.000 4.54e+08 4.54e+08

Ljung-Box (L1) (Q): 0.66 Jarque-Bera (JB): 1.95

Prob(Q): 0.42 Prob(JB): 0.38

Heteroskedasticity (H): 4.95 Skew: -0.46

Prob(H) (two-sided): 0.00 Kurtosis: 3.45

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 5.09e+26. Standard errors may be unstable.

In [ ]: plt.figure(figsize=(15,6))
plt.legend(["a","b"])
plt.plot(hospitalized)
plt.plot(predictions[1:])
plt.show()

In [ ]: test_length = 5
train = hospitalized[:-test_length]
test = hospitalized[-test_length:]
train_exog = exogfeats[:-test_length]
test_exog = exogfeats[-test_length:]
model = ARIMA(train,order= (2,1,2),exog=train_exog).fit()

In [ ]: predictions = model.get_forecast(5,exog = test_exog,alpha = 0.95)


plt.figure(figsize=(15,10))
plt.plot((train))
plt.plot((predictions.predicted_mean))
plt.plot((test))
conf= predictions.conf_int(alpha=0.05)
upper = (conf['lower new_vaccinations_smoothed'])
lower = (conf['upper new_vaccinations_smoothed'])
plt.fill_between(upper.index, upper,lower, color='b', alpha=.1)

Out[ ]: <matplotlib.collections.PolyCollection at 0x7c9e2c72dd20>

In [ ]:

You might also like