import numpy as np, pandas as pd, array as a
from scipy.stats import norm
# Bootstrap algorithm from
# http://nbviewer.ipython.org/gist/aflaxman/6871948
def bootstrap_resample(X, n=None):
    """ Bootstrap resample an array_like
    Parameters
    ----------
    X : array_like
      data to resample
    n : int, optional
      length of resampled array, equal to len(X) if n==None
    Results
    -------
    returns X_resamples
    """
    if n == None:
        n = len(X)
        
    resample_i = np.floor(np.random.rand(n)*len(X)).astype(int)
    # print resample_i 
    X_resample = X[resample_i]
    return X_resample


N=1e4 # This is the number of bootstrap resamples, if that is necessary
f=open('/home/dana/ab_test/scenario1_params.txt')
# scenario1_params.txt has one line and four columns: numer of flips for coing A, successes for A, flips for B, successes for B 
# 2-sided CI covering 1minus_alpha
t=f.readline()
tt=t.split(',', 5)
n1=int(tt[0])
x1=int(tt[1]) # 
p1=float(tt[1])/n1
n2=int(tt[2])
x2=int(tt[3])
p2=float(tt[3])/n2

_1minus_alpha=float(tt[4])
# print _1minus_alpha
z_alpha_div_2=norm.ppf(1-(1-_1minus_alpha)/2)
# print z_alpha_div_2
diff=p1-p2
if x1>=5 & x2>= 5:
# if 1==1:
	end=z_alpha_div_2*(((p1*(1-p1))/n1 + (p2*(1-p2))/n2)**(0.5))
	llim=diff-end
	ulim=diff+end
	txt='Point estimate'+str(diff)+'Lower lim CI', str(llim)+'Upper lim CI'+ str(ulim)
	x_norm2binom=[diff, llim, ulim]
	td=  ["%.4f" % xx for xx in x_norm2binom]
else: # Do bootstrapping
	# print 'here'
# if 1==1:
	diff_array=range(int(N))
	for x in range(int(N)):
		x1_boot=np.random.choice(2, n1, p=[1-p1, p1])
		tmp=np.sum(x1_boot)
		p1_boot=float(tmp)/n1
		x2_boot=np.random.choice(2, n2, p=[1-p2, p2])
		tmp=np.sum(x2_boot)
		p2_boot=float(tmp)/n2
		diff_boot=p1_boot-p2_boot
		# print diff_boot
		diff_array[x]=diff_boot
	# print diff_array
	lims=np.percentile(diff_array, [2.5, 97.5])
	x_boot=[diff, lims[0], lims[1]]
	td = ["%.4f" % xx for xx in x_boot]
# print td_norm2binom, "\n", td_boot
with open("scenario1_output.txt", "wt") as out_file:
	out_file.write('Point estimate: '+str(td[0])+'  Lower lim CI: '+str(td[1])+'  Upper lim CI: '+ str(td[2]))