import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
np.random.seed(42)
data = pd.DataFrame(np.random.rand(50, 4), columns=['Feature 1', 'Feature 2', 'Feature 3', 'Feature 4'])
# Number of features
num_features = len(data.columns)
# Create figure
fig, axes = plt.subplots(num_features, num_features, figsize=(10, 10))
# Loop through each pair of features
for i in range(num_features):
for j in range(num_features):
ax = axes[i, j]
if i == j:
# Plot histogram on the diagonal
ax.hist(data.iloc[:, i], bins=10, color="skyblue", edgecolor="black")
else:
# Scatter plot
x = data.iloc[:, j]
y = data.iloc[:, i]
ax.scatter(x, y, alpha=0.7, s=10, color="blue")
# Add Regression Line
m, b = np.polyfit(x, y, 1) # Linear regression
ax.plot(x, m*x + b, color="red", linewidth=1)
# Labels
if j == 0:
ax.set_ylabel(data.columns[i], fontsize=10)
if i == num_features - 1:
ax.set_xlabel(data.columns[j], fontsize=10)
# Hide ticks for cleaner look
ax.set_xticks([])
ax.set_yticks([])
# Adjust layout
plt.tight_layout()
plt.show()