Winner’s curse for binary events
# Simulate winner’s curse of choosing only those with p of less than 0.05 of binary dist with logistic reg in 1000 clinical trials using python
import numpy as np
import pandas as pd
from scipy.stats import bernoulli, norm
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import statsmodels.api as sm
# Set random seed for reproducibility
np.random.seed(42)
# Define simulation parameters
n_trials = 1000 # Number of trials to simulate
n_patients = 500 # Number of patients per trial
true_odds_ratio = 1.5 # True odds ratio for the treatment effect
def simulate_trial(n_patients, true_odds_ratio):
# Randomly assign patients to treatment (1) or control (0)
treatment = np.random.randint(2, size=n_patients)
# Calculate probability of success for each patient based on treatment and true odds ratio
p_success = 1 / (1 + np.exp(-np.log(true_odds_ratio) * treatment))
# Simulate outcomes using Bernoulli distribution
outcomes = bernoulli.rvs(p_success)
return treatment, outcomes
# Simulate all trials
trials = [simulate_trial(n_patients, true_odds_ratio) for _ in range(n_trials)]
results = []
for treatment, outcomes in trials:
# Add a constant to the model (intercept)
X = sm.add_constant(treatment.reshape(-1, 1))
# Fit the logistic regression model using statsmodels
model = sm.Logit(outcomes, X).fit()
# Extract odds ratio
odds_ratio = np.exp(model.params[1])
# Get the p-value directly from the model summary
p_value = model.pvalues[1]
results.append({'odds_ratio': odds_ratio, 'p_value': p_value})
# Selecting "Significant" Trials
significant_trials = [result for result in results if result['p_value'] < 0.05]
# Calculate the mean odds ratio of significant trials
mean_odds_ratio_significant = np.mean([result['odds_ratio'] for result in significant_trials])
print(f'Mean odds ratio of significant trials: {mean_odds_ratio_significant:.2f}')
print(f'Number of significant trials: {len(significant_trials)}')
# Visualizing the Winner's Curse
plt.hist([result['odds_ratio'] for result in results], bins=20, label='All Trials', alpha=0.7)
plt.hist([result['odds_ratio'] for result in significant_trials], bins=20, label=f'Significant Trials (p < 0.05) - {len(significant_trials)}', alpha=0.7)
plt.axvline(true_odds_ratio, color='red', linestyle='dashed', label='True Odds Ratio')
plt.axvline(mean_odds_ratio_significant, color='green', linestyle='dashed', label=f'Mean Odds Ratio of Significant Trials - {mean_odds_ratio_significant:.2f}')
plt.xlabel('Estimated Odds Ratio')
plt.ylabel('Frequency')
plt.legend()
plt.title('Winner\'s Curse Simulation')
plt.show()
留言
張貼留言