Winner’s curse for binary events

# Simulate winner’s curse of choosing only those with p of less than 0.05 of binary dist with logistic reg in 1000 clinical trials using python

import numpy as np

import pandas as pd

from scipy.stats import bernoulli, norm

from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

import statsmodels.api as sm

# Set random seed for reproducibility

np.random.seed(42)

# Define simulation parameters

n_trials = 1000  # Number of trials to simulate

n_patients = 500  # Number of patients per trial

true_odds_ratio = 1.5  # True odds ratio for the treatment effect

def simulate_trial(n_patients, true_odds_ratio):

    # Randomly assign patients to treatment (1) or control (0)

    treatment = np.random.randint(2, size=n_patients)

    # Calculate probability of success for each patient based on treatment and true odds ratio

    p_success = 1 / (1 + np.exp(-np.log(true_odds_ratio) * treatment))   

    # Simulate outcomes using Bernoulli distribution

    outcomes = bernoulli.rvs(p_success) 

    return treatment, outcomes

# Simulate all trials

trials = [simulate_trial(n_patients, true_odds_ratio) for _ in range(n_trials)]

results = []

for treatment, outcomes in trials:

    # Add a constant to the model (intercept)

    X = sm.add_constant(treatment.reshape(-1, 1)) 

    # Fit the logistic regression model using statsmodels

    model = sm.Logit(outcomes, X).fit()    

    # Extract odds ratio 

    odds_ratio = np.exp(model.params[1])

    # Get the p-value directly from the model summary

    p_value = model.pvalues[1]

    results.append({'odds_ratio': odds_ratio, 'p_value': p_value})

# Selecting "Significant" Trials

significant_trials = [result for result in results if result['p_value'] < 0.05]

# Calculate the mean odds ratio of significant trials

mean_odds_ratio_significant = np.mean([result['odds_ratio'] for result in significant_trials])

print(f'Mean odds ratio of significant trials: {mean_odds_ratio_significant:.2f}')

print(f'Number of significant trials: {len(significant_trials)}')

# Visualizing the Winner's Curse

plt.hist([result['odds_ratio'] for result in results], bins=20, label='All Trials', alpha=0.7)

plt.hist([result['odds_ratio'] for result in significant_trials], bins=20, label=f'Significant Trials (p < 0.05) - {len(significant_trials)}', alpha=0.7)

plt.axvline(true_odds_ratio, color='red', linestyle='dashed', label='True Odds Ratio')

plt.axvline(mean_odds_ratio_significant, color='green', linestyle='dashed', label=f'Mean Odds Ratio of Significant Trials - {mean_odds_ratio_significant:.2f}')

plt.xlabel('Estimated Odds Ratio')

plt.ylabel('Frequency')

plt.legend()

plt.title('Winner\'s Curse Simulation')

plt.show()

留言

這個網誌中的熱門文章

可轉移性、普遍性、代表性和外部有效性

頻率學派 vs 貝氏學派

貝氏分析計算器