forked from facebook/react
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze.py
More file actions
executable file
·111 lines (91 loc) · 3.29 KB
/
analyze.py
File metadata and controls
executable file
·111 lines (91 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# Copyright 2015-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import math
import sys
import numpy as np
import numpy.random as npr
import scipy.stats
def _bootstrap_mean_sem(samples):
"""Return the estimated standard error for a distribution's mean."""
samples = np.array(samples)
n = len(samples)
indices = npr.randint(0, n, (10000, n))
samples = samples[indices]
means = np.sort(np.mean(samples, axis=1))
return np.std(means, ddof=1)
def _read_measurements(f):
"""Read measurements from a file.
Returns {'a': [1.0, 2.0, 3.0], 'b': [5.0, 5.0, 5.0]} for a file containing
the six lines: ['a 1', 'a 2', 'a 3', 'b 5', 'b 5', 'b 5'].
"""
measurements = {}
for line in f:
label, value = line.split(None, 1)
measurements.setdefault(label, []).append(float(value))
return measurements
def _compute_mean_and_sd_of_ratio_from_delta_method(
mean_test,
sem_test,
mean_control,
sem_control
):
mean = (
((mean_test - mean_control) / mean_control) -
(pow(sem_control, 2) * mean_test / pow(mean_control, 3))
)
var = (
pow(sem_test / mean_control, 2) +
(pow(sem_control * mean_test, 2) / pow(mean_control, 4))
)
return (mean, math.sqrt(var))
def _main():
if len(sys.argv) != 3:
sys.stderr.write("usage: analyze.py control.txt test.txt\n")
return 1
ci_size = 0.99
p_value = scipy.stats.norm.ppf(0.5 * (1 + ci_size))
control, test = sys.argv[1:]
with open(control) as f:
control_measurements = _read_measurements(f)
with open(test) as f:
test_measurements = _read_measurements(f)
keys = set()
keys.update(control_measurements.iterkeys())
keys.update(test_measurements.iterkeys())
print "Comparing %s (control) vs %s (test)" % (control, test)
print "Significant differences marked by ***"
print "%% change from control to test, with %g%% CIs:" % (ci_size * 100,)
print
any_sig = False
for key in sorted(keys):
print "* %s" % (key,)
control_nums = control_measurements.get(key, [])
test_nums = test_measurements.get(key, [])
if not control_nums or not test_nums:
print " skipping..."
continue
mean_control = np.mean(control_nums)
mean_test = np.mean(test_nums)
sem_control = _bootstrap_mean_sem(control_nums)
sem_test = _bootstrap_mean_sem(test_nums)
rat_mean, rat_sem = _compute_mean_and_sd_of_ratio_from_delta_method(
mean_test, sem_test, mean_control, sem_control
)
rat_low = rat_mean - p_value * rat_sem
rat_high = rat_mean + p_value * rat_sem
sig = rat_high < 0 or rat_low > 0
any_sig = any_sig or sig
print " %% change: %+6.2f%% [%+6.2f%%, %+6.2f%%]%s" % (
100 * rat_mean,
100 * rat_low,
100 * rat_high,
' ***' if sig else ''
)
print " means: %g (control), %g (test)" % (mean_control, mean_test)
if __name__ == '__main__':
sys.exit(_main())