calculate_p_value: use empirical method when sd=None

Refs greenelab/connectivity-search-backend#18 (comment)
hetio · Mar 4, 2019 · 4de8877 · 4de8877
1 parent 6c615a6
commit 4de8877
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 2 deletions.
diff --git a/hetmatpy/pipeline.py b/hetmatpy/pipeline.py
@@ -12,6 +12,14 @@
 FLOAT_ERROR_TOLERANCE = 1e-5
 
 
+def sd_is_positive(sd):
+    """
+    Tests whether the standard deviation is greater than zero or if it is
+    zero/NaN/None
+    """
+    return pandas.notna(sd) and sd > 0
+
+
 def calculate_sd(sum_of_squares, unsquared_sum, number_nonzero):
     """
     Calculate the standard deviation and validate the incoming data
@@ -86,7 +94,7 @@ def calculate_empirical_p_value(row):
         # No nonzero DWPCs are found in the permuted network, but paths are
         # observed in the true network
         return 0.0
-    if row['sd_nz'] == 0:
+    if not sd_is_positive(row['sd_nz']):
         # The DWPCs in the permuted network are identical
         if row['dwpc'] <= row['mean_nz'] + FLOAT_ERROR_TOLERANCE:
             # The DWPC you found in the true network is smaller than or equal
@@ -103,7 +111,7 @@ def calculate_p_value(row):
     """
     Calculate the p_value for a given metapath
     """
-    if row['nnz'] == 0 or path_does_not_exist(row) or row['sd_nz'] == 0:
+    if row['nnz'] == 0 or path_does_not_exist(row) or not sd_is_positive(row['sd_nz']):
         return calculate_empirical_p_value(row)
     else:
         return calculate_gamma_hurdle_p_value(row)

diff --git a/hetmatpy/tests/test_pipeline.py b/hetmatpy/tests/test_pipeline.py
@@ -140,6 +140,17 @@ def test_calculate_sd(sum_of_squares, unsquared_sum, number_nonzero, expected_ou
       'mean_nz': 7.323728709931212,
       'sd_nz': 0.0
       }, 0.02556372549),
+    # standard deviation is None
+    ({'path_count': 5,
+      'sd_nz': None,
+      'dwpc': 1.5,
+      'nnz': 10,
+      'n': 100,
+      'alpha': 1.0,
+      'beta': 1.0,
+      'sum': 1.0,
+      'mean_nz': 2
+      }, .1),
 ])
 def test_calculate_p_value(row, expected_output):
     assert calculate_p_value(row) == pytest.approx(expected_output, rel=1e-4)