alchemistry · orbeckst · Dec 5, 2022 · Nov 20, 2022 · Nov 20, 2022 · Dec 4, 2022
diff --git a/CHANGES b/CHANGES
@@ -22,6 +22,7 @@ Fixes
     is not found (issue #272, PR #273).
   - The regex in the AMBER parser now reads also 'field=value' pairs where
     there are no spaces around the equal sign (issue #272, PR #273).
+  - Pre-processing function slicing will not drop NaN rows (issue #274, PR #275).
 
 
 10/31/2022 orbeckst, xiki-tempula, DrDomenicoMarson 

@@ -380,6 +380,9 @@ def slicing(df, lower=None, upper=None, step=None, force=False):
     DataFrame
         `df` subsampled.
 
+
+    .. versionchanged:: 1.0.1
+       The rows with NaN values are not dropped by default.
     """
     try:
         df = df.loc[lower:upper:step]
@@ -391,9 +394,6 @@ def slicing(df, lower=None, upper=None, step=None, force=False):
                        "to use slicing on DataFrames with unique time values "
                        "for each row. Use `force=True` to ignore this error.")
 
-    # drop any rows that have missing values
-    df = df.dropna()
-
     return df
 
 

@@ -1,21 +1,21 @@
 """Tests for preprocessing functions.
 
 """
-import pytest
-
+import alchemtest.gmx
 import numpy as np
+import pytest
+from alchemtest.gmx import load_benzene
+from alchemtest.namd import load_idws
 from numpy.testing import assert_allclose
 
 import alchemlyb
-from alchemlyb.parsing import gmx
+from alchemlyb.parsing import gmx, namd
+from alchemlyb.parsing.gmx import extract_u_nk, extract_dHdl
 from alchemlyb.preprocessing import (slicing, statistical_inefficiency,
                                      equilibrium_detection,
                                      decorrelate_u_nk, decorrelate_dhdl,
                                      u_nk2series, dhdl2series)
-from alchemlyb.parsing.gmx import extract_u_nk, extract_dHdl
-from alchemtest.gmx import load_benzene, load_ABFE
 
-import alchemtest.gmx
 
 def gmx_benzene_dHdl():
     dataset = alchemtest.gmx.load_benzene()
@@ -86,6 +86,19 @@ def slicer(self, *args, **kwargs):
     def test_basic_slicing(self, data, size):
         assert len(self.slicer(data, lower=1000, upper=34000, step=5)) == size
 
+    def test_unchanged(self):
+        # NAMD energy files only have dE for adjacent lambdas, this ensures
+        # that the slicer will not drop these rows as they have NaN values.
+        file = load_idws().data['forward'][0]
+        u_nk = namd.extract_u_nk(file, 298)
+
+        # Do the pre-processing as the u_nk are from all lambdas
+        groups = u_nk.groupby('fep-lambda')
+        for key, group in groups:
+            group = group[~group.index.duplicated(keep='first')]
+            df = self.slicer(group, None, None, None)
+            assert len(df) == len(group)
+
     @pytest.mark.parametrize(('dataloader', 'lower', 'upper'),
                              [
                                  ('gmx_benzene_dHdl_fixture', 1000, 34000),