I have a dataset with two issues:
- The data have a hidden parameter.
- There is a known multi-modality in the model.
Hidden Parameter Problem
First, let’s consider the hidden parameter. Here is a simple demonstration of how the data are generated. The parameters R
and azimuth
are hidden, but can be inferred from the observed v
and the known values angle
.
import pymc as pm
import pytensor.tensor as pt
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
# known constants
R0 = 1.0
A = 1.0
def calc_v(R, angle, R0=R0, A=A):
return R * pt.sin(angle) * (A / R - A / R0)
# simulate data
num_obs = 1000
R_true = stats.maxwell().rvs(num_obs)
azimuth_true = stats.uniform().rvs(num_obs) * 2.0 * np.pi
Y_true = R_true * np.cos(azimuth_true)
X_true = R_true * np.sin(azimuth_true)
d_true = np.sqrt(R_true**2.0 + R0**2.0 - 2.0 * R_true * R0 * np.cos(azimuth_true))
angle = (-np.arctan2(X_true, Y_true - R0) + np.pi) % (2.0 * np.pi)
v_sigma_true = 0.1 # intrinsic scatter
v_true = calc_v(R_true, angle, R0=R0, A=A).eval()
v_true += stats.norm().rvs(num_obs) * v_sigma_true
Here then is “reality”:
# plot "reality"
fig, ax = plt.subplots(layout="constrained")
cax = ax.scatter(X_true, Y_true, c=v_true)
fig.colorbar(cax, label="v (observed)")
ax.set_xlabel("x (unobserved)")
ax.set_ylabel("y (unobserved)")
fig.show()
plt.close(fig)
And here is “reality” as observed. angle
is known, and we observe v
. R
is hidden, but this shows the relationship between v
and R
.
# plot "reality" as observed
fig, ax = plt.subplots()
cax = ax.scatter(angle, v_true, c=R_true)
fig.colorbar(cax, label="R (unobserved)")
ax.set_xlabel("angle")
ax.set_ylabel("v")
fig.tight_layout()
fig.show()
plt.close(fig)
Here is a model that attempts to infer v_sigma
from noise-less observations of v
.
# define model
with pm.Model(coords={"datum": range(num_obs)}) as model:
# data
angle_observed = pm.Data("angle_observed", angle, dims="datum")
v_observed = pm.Data("v_observed", v_true, dims="datum")
# priors
R = pm.HalfNormal("R", dims="datum")
v_sigma = pm.Exponential("v_sigma")
# v likelihood
v_mu = calc_v(R, angle_observed, R0=R0, A=A)
_ = pm.Normal(
"v",
mu=v_mu,
sigma=v_sigma,
observed=v_observed,
dims="datum",
)
with model:
trace = pm.sample()
pm.summary(trace, var_names=["v_sigma"])
which outputs
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
v_sigma 0.255 0.025 0.208 0.299 0.002 0.001 121.0 270.0 1.05
As we can see, NUTS struggles with this model and v_sigma
is overestimated.
Any suggestions about how I could parameterize this model differently?
Known Multi-modality
The other issue is that I have another observed parameter, f
, that depends on a transformation of R
and v
. Specifically, f
is derived from d
, and d
is related to R
and v
like so:
angle_test = np.deg2rad(np.array([30.0, -120.0]))
d_test = np.linspace(0.0, 2.0, 1000)
R_test = np.sqrt(
R0**2.0 + d_test[:, None] ** 2.0 - 2.0 * R0 * d_test[:, None] * np.cos(angle_test)
)
v_test = calc_v(R_test, angle_test, R0=R0, A=A).eval()
fig, ax = plt.subplots(layout="constrained")
ax.plot(d_test, v_test[:, 0], "k-", label="angle = 30 deg")
ax.plot(d_test, v_test[:, 1], "r-", label="angle = -120 deg")
ax.legend(loc="best")
ax.set_xlabel("d (unobserved)")
ax.set_ylabel("v (observed)")
fig.show()
Depending on the known angle
and observed v
, there could be zero, one, or two values of d
.
Any suggestions about how I could parameterize this model to handle the multi-modality?