数据分析4之numpy常用函数

本文深入探讨了使用Python进行股票数据的加载、分析和可视化的方法。从加载股票数据开始,介绍了如何计算数学均值、标准差等统计指标,进一步展示了如何绘制股价的5日均线和布林带,以及实现卷积操作。文章通过实例详细讲解了数据处理和可视化的全过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1.加载文件

import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import matplotlib.dates as md


# 日期转换函数
# def dmy2ymd(dmy):
#     dmy = str(dmy, encoding='utf-8')
#     time = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
#     t = time.strftime('%Y-%m-%d')
#     return t
#
#
# # 加载文件
# dates, opening_prices, highest_prices, lowest_prices, closing_prices = \
#     np.loadtxt('data/da_data/aapl.csv',
#                delimiter=',',
#                usecols=(1, 3, 4, 5, 6),
#                dtype='M8[D], f8, f8, f8, f8',
#                unpack=True,
#                converters={1: dmy2ymd}              # 日期转换
#                )
# # print(dates)
#
# plt.figure('aapl', facecolor='grey')
# plt.title('aapl', fontsize=20)
#
# plt.grid(linestyle=':')
# plt.xlabel('date', fontsize=14)
# plt.ylabel('closing_prices', fontsize=14)
#
# ax = plt.gca()
# # 刻度主定位器
# ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO))
# ax.xaxis.set_major_formatter(md.DateFormatter('%Y/%m/%d'))
# # # 刻度次定位器
# ax.xaxis.set_minor_locator(md.DayLocator())
# plt.tick_params(labelsize=8)
# dates = dates.astype(md.datetime.datetime)
#
# plt.plot(dates, closing_prices, linewidth=2, linestyle='--', label='closing_price')
# plt.gcf().autofmt_xdate()
# plt.legend()
# plt.show()


def dmy2ymd(dmy):
    dmy = str(dmy, encoding='utf-8')
    time = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
    t = time.strftime('%Y-%m-%d')
    return t


# 加载文件
dates, opening_prices, highest_prices, lowest_prices, closing_prices = \
    np.loadtxt('data/da_data/aapl.csv',
               delimiter=',',
               usecols=(1, 3, 4, 5, 6),
               dtype='M8[D], f8, f8, f8, f8',
               unpack=True,
               converters={1: dmy2ymd}              # 日期转换
               )
# print(dates)

plt.figure('aapl', facecolor='grey')
plt.title('aapl', fontsize=20)

plt.grid(linestyle=':')
plt.xlabel('date', fontsize=14)
plt.ylabel('closing_prices', fontsize=14)

ax = plt.gca()
# 刻度主定位器
ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO))
ax.xaxis.set_major_formatter(md.DateFormatter('%Y/%m/%d'))
# 刻度次定位器
ax.xaxis.set_minor_locator(md.DayLocator())
plt.tick_params(labelsize=8)
dates = dates.astype(md.datetime.datetime)

plt.plot(dates, closing_prices, linewidth=2, linestyle='--', label='closing_price')
# 绘制k线图
rise = closing_prices > opening_prices
# 填充色
color = ['white' if x else 'green' for x in rise]
# 边缘色
ecolor = np.zeros(rise.size, dtype='U5')
ecolor[:] = 'green'
ecolor[rise] = 'red'

plt.bar(dates, closing_prices-opening_prices, 0.8, opening_prices, color=color, edgecolor=ecolor)
# 绘制影线
plt.vlines(dates, lowest_prices, highest_prices, color=ecolor)
# 求出算数平均数
mean = np.mean(closing_prices)
plt.hlines(mean, dates[0], dates[-1], label='mean_closing price')
plt.gcf().autofmt_xdate()
plt.legend()
plt.show()

2.数学均值

import numpy as np
import matplotlib.pyplot as plt


# 算数平均值
arr = np.random.normal(0, 1, 10000)
avg = np.mean(arr)
print(avg)

# 加权平均值
weights = np.random.uniform(1, 100, 10000)
avg = np.average(arr, weights=weights)

print(avg)

closing_prices, volumes = np.loadtxt('data/da_data/aapl.csv',
                                     delimiter=',',
                                     usecols=(6, 7),
                                     unpack=True)
avg = np.average(closing_prices, weights=volumes)
print(avg)

# 最值
max_val = np.max(arr)
min_val = np.min(arr)
print(max_val, min_val)

# 最值索引
max_arg = np.argmax(arr)
min_arg = np.argmin(arr)
print(max_arg, min_arg)

# 最值合并
arr1 = np.random.normal(0, 1, 10000)
max_arr = np.maximum(arr, arr1)
min_arr = np.minimum(arr, arr1)
plt.figure('max-min')
plt.title('max-min', fontsize=20)
plt.plot(np.arange(10000), max_arr, label='max_arr')
plt.plot(np.arange(10000), min_arr, label='min_arr')
plt.legend()
plt.show()

# 中位数
medium = np.median(arr)
print(medium)

# 标准差
std = np.std(arr)
std_1 = np.std(arr, ddof=1)           # 样本标准差
print(std, std_1)

3.数组的轴向汇总

# 轴向汇总
data = np.array([[90, 34, 56, 78, 81],
                 [56, 45, 78, 87, 66],
                 [78, 45, 78, 98, 67],
                 [67, 56, 78, 98, 76]])

# 统计每个人成绩平均分,axis=1,表示水平汇总
avg_score = np.mean(data, axis=1)
print(avg_score)

# 统计每门成绩最高分,axis=1,表示垂直汇总
max_score = np.max(data, axis=0)
print(max_score)


# 自定义函数
def func(data):
    weights = np.random.uniform(1, 100, data.size)
    res = np.average(data, weights=weights)
    return res


result = np.apply_along_axis(func, 1, data)
print(result)

4.绘制5日均线

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as md
import datetime as dt


# 日期转换函数
def dmy2ymd(dmy):
    dmy = str(dmy, encoding='utf-8')
    time = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
    t = time.strftime('%Y-%m-%d')
    return t


# 加载文件
dates, opening_prices, highest_prices, lowest_prices, closing_prices = \
    np.loadtxt('data/da_data/aapl.csv',
               delimiter=',',
               usecols=(1, 3, 4, 5, 6),
               dtype='M8[D], f8, f8, f8, f8',
               unpack=True,
               converters={1: dmy2ymd}              # 日期转换
               )
# print(dates)

plt.figure('aapl', facecolor='grey')
plt.title('aapl', fontsize=20)

plt.grid(linestyle=':')
plt.xlabel('date', fontsize=14)
plt.ylabel('closing_prices', fontsize=14)

ax = plt.gca()
# 刻度主定位器
ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO))
ax.xaxis.set_major_formatter(md.DateFormatter('%Y/%m/%d'))
# # 刻度次定位器
ax.xaxis.set_minor_locator(md.DayLocator())
plt.tick_params(labelsize=8)
dates = dates.astype(md.datetime.datetime)

plt.plot(dates, closing_prices, linewidth=2, linestyle='--', label='closing_price')
# 绘制5日均线
ma5 = np.zeros(closing_prices.size-4)
for i in range(ma5.size):
    ma5[i] = closing_prices[i: i+5].mean()
plt.plot(dates[4:], ma5, label='ma5')
plt.gcf().autofmt_xdate()
plt.legend()
plt.show()

5.卷积

import numpy as np
import matplotlib.pyplot as plt
# 卷积
a = [1, 2, 3, 4, 5]           # 原数组
b = [8, 7, 6]                 # 卷积核
# 卷及类型valid:有效卷积,same:同维卷积,full:完全卷积
plt.figure('sm')
plt.title('sm')
sm3 = np.convolve(a, b, 'valid')
sm5 = np.convolve(a, b, 'same')
plt.plot(a[2:], sm3, label='sm3 valid')
plt.plot(a, sm5, label='sm5 same')

# 加权均线
kernel = np.exp(np.linspace(-1, 0, 3))
kernel /= kernel.sum()
ma53 = np.convolve(a, kernel[::-1], 'valid')
plt.plot(a[2:], ma53, label='ma53 valid')
plt.legend()
plt.show()

6.布林带

# 日期转换函数
def dmy2ymd(dmy):
    dmy = str(dmy, encoding='utf-8')
    time = dt.datetime.strptime(dmy, '%d-%m-%Y').date()
    t = time.strftime('%Y-%m-%d')
    return t


# 加载文件
dates, opening_prices, highest_prices, lowest_prices, closing_prices = \
    np.loadtxt('data/da_data/aapl.csv',
               delimiter=',',
               usecols=(1, 3, 4, 5, 6),
               dtype='M8[D], f8, f8, f8, f8',
               unpack=True,
               converters={1: dmy2ymd}              # 日期转换
               )
# print(dates)

plt.figure('aapl', facecolor='grey')
plt.title('aapl', fontsize=20)

plt.grid(linestyle=':')
plt.xlabel('date', fontsize=14)
plt.ylabel('closing_prices', fontsize=14)

ax = plt.gca()
# 刻度主定位器
ax.xaxis.set_major_locator(md.WeekdayLocator(byweekday=md.MO))
ax.xaxis.set_major_formatter(md.DateFormatter('%Y/%m/%d'))
# # 刻度次定位器
ax.xaxis.set_minor_locator(md.DayLocator())
plt.tick_params(labelsize=8)
dates = dates.astype(md.datetime.datetime)

plt.plot(dates, closing_prices, linewidth=2, linestyle='--', label='closing_price')
"""
布林带
布林带由三条线组成:
中轨:移动平均线
上轨:中轨+2x5日收盘价标准差	(顶部的压力)
下轨:中轨-2x5日收盘价标准差 	(底部的支撑力)
布林带收窄代表稳定的趋势,布林带张开代表有较大的波动空间的趋势。
"""
weights = np.linspace(-1, 0, 5)
weights /= weights.sum()
em5 = np.convolve(closing_prices, weights[::-1], 'valid')
std = np.zeros(em5.size)
for i in range(em5.size):
    std[i] = closing_prices[i: i+5].std()
upper = em5 - 2*std
lower = em5 + 2*std
plt.plot(dates[4:], em5, label='middle')
plt.plot(dates[4:], upper, label='upper')
plt.plot(dates[4:], lower, label='lower')
plt.fill_between(dates[4:], lower, upper, lower < upper, color='orangered')

plt.gcf().autofmt_xdate()
plt.legend()
plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值