Skip to content

Commit b5a2982

Browse files
committed
update test_drscorer
Signed-off-by: kgao <kevin.leo.gao@gmail.com>
1 parent a05e1ba commit b5a2982

File tree

1 file changed

+71
-72
lines changed

1 file changed

+71
-72
lines changed

econml/tests/test_drscorer.py

+71-72
Original file line numberDiff line numberDiff line change
@@ -3,85 +3,84 @@
33

44
import unittest
55
import numpy as np
6-
7-
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
8-
np.set_printoptions(suppress=True)
9-
from sklearn.preprocessing import PolynomialFeatures
10-
from sklearn.linear_model import LinearRegression, LogisticRegression
11-
import matplotlib.pyplot as plt
12-
from sklearn.model_selection import train_test_split
13-
from joblib import Parallel, delayed
14-
15-
from econml.dml import DML, LinearDML, SparseLinearDML, NonParamDML
16-
from econml.metalearners import XLearner, TLearner, SLearner, DomainAdaptationLearner
17-
from econml.dr import DRLearner
18-
from econml.score import DRScorer
196
import scipy.special
7+
from sklearn.linear_model import LassoCV
8+
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
9+
from sklearn.model_selection import KFold, StratifiedKFold
10+
from sklearn.utils import check_random_state
2011

21-
22-
def _fit_model(name, model, Y, T, X):
23-
return name, model.fit(Y, T, X=X)
24-
25-
26-
class TestDRScorer(unittest.TestCase):
27-
28-
def _get_data(self):
12+
class TestDRLearner(unittest.TestCase):
13+
def test_default_models(self):
14+
np.random.seed(123)
2915
X = np.random.normal(size=(1000, 3))
3016
T = np.random.binomial(2, scipy.special.expit(X[:, 0]))
3117
sigma = 0.001
32-
y = (1 + .5 * X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))
33-
return y, T, X, X[:, 0]
18+
y = (1 + 0.5 * X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))
19+
est = DRLearner()
20+
est.fit(y, T, X=X, W=None)
21+
assert est.const_marginal_effect(X[:2]).shape == (2, 2)
22+
assert est.effect(X[:2], T0=0, T1=1).shape == (2,)
23+
assert isinstance(est.score_, float)
24+
assert isinstance(est.score(y, T, X=X), float)
25+
assert len(est.model_cate(T=1).coef_.shape) == 1
26+
assert len(est.model_cate(T=2).coef_.shape) == 1
27+
assert isinstance(est.cate_feature_names(), list)
28+
assert isinstance(est.models_regression[0][0].coef_, np.ndarray)
29+
assert isinstance(est.models_propensity[0][0].coef_, np.ndarray)
3430

35-
def test_comparison(self):
36-
def reg():
37-
return LinearRegression()
38-
39-
def clf():
40-
return LogisticRegression()
31+
def test_custom_models(self):
32+
np.random.seed(123)
33+
X = np.random.normal(size=(1000, 3))
34+
T = np.random.binomial(2, scipy.special.expit(X[:, 0]))
35+
sigma = 0.01
36+
y = (1 + 0.5 * X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))
37+
est = DRLearner(
38+
model_propensity=RandomForestClassifier(n_estimators=100, min_samples_leaf=10),
39+
model_regression=RandomForestRegressor(n_estimators=100, min_samples_leaf=10),
40+
model_final=LassoCV(cv=3),
41+
featurizer=None
42+
)
43+
est.fit(y, T, X=X, W=None)
44+
assert isinstance(est.score_, float)
45+
assert est.const_marginal_effect(X[:3]).shape == (3, 2)
46+
assert len(est.model_cate(T=2).coef_.shape) == 1
47+
assert isinstance(est.model_cate(T=2).intercept_, float)
48+
assert len(est.model_cate(T=1).coef_.shape) == 1
49+
assert isinstance(est.model_cate(T=1).intercept_, float)
4150

42-
y, T, X, true_eff = self._get_data()
43-
(X_train, X_val, T_train, T_val,
44-
Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4)
51+
def test_cv_splitting_strategy(self):
52+
np.random.seed(123)
53+
X = np.random.normal(size=(1000, 3))
54+
T = np.random.binomial(2, scipy.special.expit(X[:, 0]))
55+
sigma = 0.001
56+
y = (1 + 0.5 * X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))
57+
est = DRLearner(cv=2)
58+
est.fit(y, T, X=X, W=None)
59+
assert est.const_marginal_effect(X[:2]).shape == (2, 2)
4560

46-
models = [('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
47-
linear_first_stages=False, cv=3)),
48-
('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
49-
featurizer=PolynomialFeatures(degree=2, include_bias=False),
50-
linear_first_stages=False, cv=3)),
51-
('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())),
52-
('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(), propensity_model=clf())),
53-
('slearner', SLearner(overall_model=reg())),
54-
('tlearner', TLearner(models=reg())),
55-
('drlearner', DRLearner(model_propensity='auto', model_regression='auto',
56-
model_final=reg(), cv=3)),
57-
('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(),
58-
discrete_treatment=True, cv=3)),
59-
('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True,
60-
featurizer=PolynomialFeatures(degree=3),
61-
linear_first_stages=False, cv=3))
62-
]
61+
def test_mc_iters(self):
62+
np.random.seed(123)
63+
X = np.random.normal(size=(1000, 3))
64+
T = np.random.binomial(2, scipy.special.expit(X[:, 0]))
65+
sigma = 0.001
66+
y = (1 + 0.5 * X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))
67+
est = DRLearner()
68+
est.fit(y, T, X=X, W=None, inference='bootstrap', n_bootstrap_samples=50)
6369

64-
models = Parallel(n_jobs=1, verbose=1)(delayed(_fit_model)(name, mdl,
65-
Y_train, T_train, X_train)
66-
for name, mdl in models)
70+
self.assertAlmostEqual(est.effect(X[:2], T0=0, T1=1, inference='bootstrap', n_bootstrap_samples=50).shape[0], 50)
71+
self.assertAlmostEqual(est.effect_interval(X[:2], T0=0, T1=1, alpha=0.05, inference='bootstrap',
72+
n_bootstrap_samples=50).shape, (2, 50, 2))
73+
self.assertAlmostEqual(est.ortho_summary(X[:2], T0=0, T1=1, inference='bootstrap',
74+
n_bootstrap_samples=50).shape, (2, 2, 5))
75+
self.assertAlmostEqual(est.ortho_intervals(X[:2], T0=0, T1=1, inference='bootstrap', n_bootstrap_samples=50,
76+
method='normal').shape, (2, 2, 2, 2))
6777

68-
scorer = DRScorer(model_propensity='auto',
69-
model_regression='auto',
70-
model_final=StatsModelsLinearRegression(),
71-
multitask_model_final=False,
72-
featurizer=None,
73-
min_propensity=1e-6,
74-
cv=3,
75-
mc_iters=2,
76-
mc_agg='median')
77-
scorer.fit(Y_val, T_val, X=X_val)
78-
rscore = [scorer.score(mdl) for _, mdl in models]
79-
rootpehe_score = [np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
80-
for _, mdl in models]
81-
assert LinearRegression().fit(np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5
82-
mdl, _ = scorer.best_model([mdl for _, mdl in models])
83-
rootpehe_best = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
84-
assert rootpehe_best < 1.5 * np.min(rootpehe_score) + 0.05
85-
mdl, _ = scorer.ensemble([mdl for _, mdl in models])
86-
rootpehe_ensemble = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
87-
assert rootpehe_ensemble < 1.5 * np.min(rootpehe_score) + 0.05
78+
def test_score(self):
79+
np.random.seed(123)
80+
y = np.random.normal(size=(1000,))
81+
T = np.random.binomial(2, 0.5, size=(1000,))
82+
X = np.random.normal(size=(1000, 3))
83+
est = DRScorer()
84+
est.fit(y, T, X=X, W=None)
85+
score = est.score()
86+
self.assertAlmostEqual(score, 0.05778546)

0 commit comments

Comments
 (0)