import numpy as np, pandas as pd, array as a from scipy.stats import norm # Bootstrap algorithm from # http://nbviewer.ipython.org/gist/aflaxman/6871948 def bootstrap_resample(X, n=None): """ Bootstrap resample an array_like Parameters ---------- X : array_like data to resample n : int, optional length of resampled array, equal to len(X) if n==None Results ------- returns X_resamples """ if n == None: n = len(X) resample_i = np.floor(np.random.rand(n)*len(X)).astype(int) # print resample_i X_resample = X[resample_i] return X_resample N=1e4 # This is the number of bootstrap resamples, if that is necessary f=open('/home/dana/ab_test/scenario1_params.txt') # scenario1_params.txt has one line and four columns: numer of flips for coing A, successes for A, flips for B, successes for B # 2-sided CI covering 1minus_alpha t=f.readline() tt=t.split(',', 5) n1=int(tt[0]) x1=int(tt[1]) # p1=float(tt[1])/n1 n2=int(tt[2]) x2=int(tt[3]) p2=float(tt[3])/n2 _1minus_alpha=float(tt[4]) # print _1minus_alpha z_alpha_div_2=norm.ppf(1-(1-_1minus_alpha)/2) # print z_alpha_div_2 diff=p1-p2 if x1>=5 & x2>= 5: # if 1==1: end=z_alpha_div_2*(((p1*(1-p1))/n1 + (p2*(1-p2))/n2)**(0.5)) llim=diff-end ulim=diff+end txt='Point estimate'+str(diff)+'Lower lim CI', str(llim)+'Upper lim CI'+ str(ulim) x_norm2binom=[diff, llim, ulim] td= ["%.4f" % xx for xx in x_norm2binom] else: # Do bootstrapping # print 'here' # if 1==1: diff_array=range(int(N)) for x in range(int(N)): x1_boot=np.random.choice(2, n1, p=[1-p1, p1]) tmp=np.sum(x1_boot) p1_boot=float(tmp)/n1 x2_boot=np.random.choice(2, n2, p=[1-p2, p2]) tmp=np.sum(x2_boot) p2_boot=float(tmp)/n2 diff_boot=p1_boot-p2_boot # print diff_boot diff_array[x]=diff_boot # print diff_array lims=np.percentile(diff_array, [2.5, 97.5]) x_boot=[diff, lims[0], lims[1]] td = ["%.4f" % xx for xx in x_boot] # print td_norm2binom, "\n", td_boot with open("scenario1_output.txt", "wt") as out_file: out_file.write('Point estimate: '+str(td[0])+' Lower lim CI: '+str(td[1])+' Upper lim CI: '+ str(td[2]))