From 4da0e66fb3538bb2bb7827cd530043f5a0002ea3 Mon Sep 17 00:00:00 2001 From: Nikhil Malkari Date: Tue, 26 Nov 2024 19:41:38 -0600 Subject: [PATCH] Fix FillMissing processor by removing inplace=True and added test for fillna behavior --- fastai/tabular/core.py | 2 +- tests/test_tabular_core.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tests/test_tabular_core.py diff --git a/fastai/tabular/core.py b/fastai/tabular/core.py index 84850462ab..6af54fef7e 100644 --- a/fastai/tabular/core.py +++ b/fastai/tabular/core.py @@ -311,7 +311,7 @@ def encodes(self, to): for n in missing.any()[missing.any()].keys(): assert n in self.na_dict, f"nan values in `{n}` but not in setup training set" for n in self.na_dict.keys(): - to[n].fillna(self.na_dict[n], inplace=True) + to[n] = to[n].fillna(self.na_dict[n]) if self.add_col: to.loc[:,n+'_na'] = missing[n] if n+'_na' not in to.cat_names: to.cat_names.append(n+'_na') diff --git a/tests/test_tabular_core.py b/tests/test_tabular_core.py new file mode 100644 index 0000000000..e6af2cf50a --- /dev/null +++ b/tests/test_tabular_core.py @@ -0,0 +1,27 @@ +import pandas as pd +from fastai.tabular.core import FillMissing, TabularPandas + +def test_fillna(): + # Mock data + df = pd.DataFrame({"a": [1, None, 3], "b": [4, 5, None]}) + na_dict = {"a": 0, "b": -1} + + # Initialize TabularPandas with appropriate columns + tab_pandas = TabularPandas( + df, + procs=[], # No preprocessing steps required + cont_names=["a", "b"], + cat_names=[], + y_names=[] + ) + + # Initialize FillMissing + fill_missing = FillMissing(add_col=False) + fill_missing.na_dict = na_dict # Manually set the na_dict for testing + + # Apply the transformation + fill_missing.encodes(tab_pandas) + + # Check results + assert (tab_pandas["a"] == [1, 0, 3]).all() + assert (tab_pandas["b"] == [4, 5, -1]).all()