From e479e9e341085fce59314de1cbb18c41059b61f6 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Tue, 11 Jul 2017 14:29:27 -0700 Subject: [PATCH 1/2] enforce dataframe weights are ordered by the dataframe itself --- pysal/weights/Contiguity.py | 62 +++++++++++++++++++++++-------------- pysal/weights/Distance.py | 2 ++ pysal/weights/weights.py | 1 + 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/pysal/weights/Contiguity.py b/pysal/weights/Contiguity.py index 9a2b7fc6c..346aa9a8d 100644 --- a/pysal/weights/Contiguity.py +++ b/pysal/weights/Contiguity.py @@ -31,9 +31,9 @@ def __init__(self, polygons, method='binning', **kw): :class:`pysal.weights.W` """ criterion = 'rook' - ids = kw.pop('ids', None) - neighbors, ids = _build(polygons, criterion=criterion, - ids=ids, method=method) + ids = kw.pop('ids', None) + neighbors, ids = _build(polygons, ids=ids, + criterion=criterion, method=method) W.__init__(self, neighbors, ids=ids, **kw) @classmethod @@ -82,6 +82,7 @@ def from_shapefile(cls, filepath, idVariable=None, full=False, **kwargs): ids = get_ids(filepath, idVariable) else: ids = None + iterable = FileIO(filepath) w = cls(FileIO(filepath), ids=ids, **kwargs) w.set_shapefile(filepath, idVariable=idVariable, full=full) if sparse: @@ -89,7 +90,7 @@ def from_shapefile(cls, filepath, idVariable=None, full=False, **kwargs): return w @classmethod - def from_iterable(cls, iterable, **kwargs): + def from_iterable(cls, iterable, sparse=False, **kwargs): """ Construct a weights object from a collection of arbitrary polygons. This will cast the polygons to PySAL polygons, then build the W. @@ -109,11 +110,15 @@ def from_iterable(cls, iterable, **kwargs): :class:`pysal.weights.Rook` """ new_iterable = [asShape(shape) for shape in iterable] - return cls(new_iterable, **kwargs) + + w = cls(new_iterable, **kwargs) + if sparse: + w = WSP.from_W(w) + + return w @classmethod - def from_dataframe(cls, df, geom_col='geometry', - idVariable=None, ids=None, id_order=None, **kwargs): + def from_dataframe(cls, df, geom_col='geometry', **kwargs): """ Construct a weights object from a pandas dataframe with a geometry column. This will cast the polygons to PySAL polygons, then build the W @@ -144,20 +149,27 @@ def from_dataframe(cls, df, geom_col='geometry', :class:`pysal.weights.W` :class:`pysal.weights.Rook` """ - if id_order is not None: - if id_order is True and ((idVariable is not None) - or (ids is not None)): + idVariable = kwargs.pop('idVariable', None) + ids = kwargs.pop('ids', None) + id_order = kwargs.pop('id_order', True) + if id_order is True and ((idVariable is not None) + or (ids is not None)): # if idVariable is None, we want ids. Otherwise, we want the # idVariable column - id_order = list(df.get(idVariable, ids)) - else: - id_order = df.get(id_order, ids) + ids = list(df.get(idVariable, ids)) + id_order = ids + elif isinstance(id_order, str): + ids = df.get(id_order, ids) + id_order = ids elif idVariable is not None: ids = df.get(idVariable).tolist() elif isinstance(ids, str): ids = df.get(ids).tolist() - return cls.from_iterable(df[geom_col].tolist(), ids=ids, - id_order=id_order, **kwargs) + else: + id_order = list(df.index) + ids = list(df.index) + w = cls.from_iterable(df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs) + return w class Queen(W): def __init__(self, polygons,method='binning', **kw): @@ -303,21 +315,23 @@ def from_dataframe(cls, df, geom_col='geometry', **kwargs): """ idVariable = kwargs.pop('idVariable', None) ids = kwargs.pop('ids', None) - id_order = kwargs.pop('id_order', None) - if id_order is not None: - if id_order is True and ((idVariable is not None) - or (ids is not None)): + id_order = kwargs.pop('id_order', True) + if id_order is True and ((idVariable is not None) + or (ids is not None)): # if idVariable is None, we want ids. Otherwise, we want the # idVariable column - ids = list(df.get(idVariable, ids)) - id_order = ids - elif isinstance(id_order, str): - ids = df.get(id_order, ids) - id_order = ids + ids = list(df.get(idVariable, ids)) + id_order = ids + elif isinstance(id_order, str): + ids = df.get(id_order, ids) + id_order = ids elif idVariable is not None: ids = df.get(idVariable).tolist() elif isinstance(ids, str): ids = df.get(ids).tolist() + else: + id_order = list(df.index) + ids = list(df.index) w = cls.from_iterable(df[geom_col].tolist(), ids=ids, id_order=id_order, **kwargs) return w diff --git a/pysal/weights/Distance.py b/pysal/weights/Distance.py index 91f2fadac..2ac60f1ab 100644 --- a/pysal/weights/Distance.py +++ b/pysal/weights/Distance.py @@ -859,6 +859,8 @@ def from_dataframe(cls, df, threshold, geom_col='geometry', ids=None, **kwargs): ids = df.index.tolist() elif isinstance(ids, str): ids = df[ids].tolist() + else: + ids = df.index.tolist() return cls(pts, threshold, ids=ids, **kwargs) def _band(self): diff --git a/pysal/weights/weights.py b/pysal/weights/weights.py index 5c5627202..af7a3427c 100644 --- a/pysal/weights/weights.py +++ b/pysal/weights/weights.py @@ -426,6 +426,7 @@ def cardinalities(self): """Number of neighbors for each observation. """ + print(self.neighbors) if 'cardinalities' not in self._cache: c = {} for i in self._id_order: From b565cac24fef793f3af08692e51f8d3763549ea1 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Tue, 11 Jul 2017 14:41:43 -0700 Subject: [PATCH 2/2] add test assert for order preserving weights from dataframe --- pysal/weights/tests/test_Contiguity.py | 7 +++++++ pysal/weights/tests/test_Distance.py | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pysal/weights/tests/test_Contiguity.py b/pysal/weights/tests/test_Contiguity.py index 26778fca1..03e187bcd 100644 --- a/pysal/weights/tests/test_Contiguity.py +++ b/pysal/weights/tests/test_Contiguity.py @@ -92,6 +92,13 @@ def test_from_dataframe(self): w = self.cls.from_dataframe(df, geom_col='the_geom', idVariable=self.idVariable) self.assertEqual(w[self.known_name], self.known_namedw) + # order preserving + permute = df.sample(frac=1) + w = self.cls.from_dataframe(permute, geom_col='the_geom') + with self.assertRaises(AssertionError): + assert w.id_order == df.index.tolist() + self.assertEqual(w.id_order, permute.index.tolist()) + class Test_Queen(ut.TestCase, Contiguity_Mixin): def setUp(self): Contiguity_Mixin.setUp(self) diff --git a/pysal/weights/tests/test_Distance.py b/pysal/weights/tests/test_Distance.py index d8eec70d7..b3ce48ed1 100644 --- a/pysal/weights/tests/test_Distance.py +++ b/pysal/weights/tests/test_Distance.py @@ -86,6 +86,12 @@ def test_from_dataframe(self): w = d.KNN.from_dataframe(df, k=4) self.assertEqual(w.neighbors[self.known_wi0], self.known_w0) self.assertEqual(w.neighbors[self.known_wi1], self.known_w1) + perm = df.sample(frac=1) + w = d.KNN.from_dataframe(perm, k=4) + with self.assertRaises(AssertionError): + assert w.id_order == df.index.tolist() + self.assertEqual(perm.index.tolist(), w.id_order) + def test_from_array(self): w = d.KNN.from_array(self.poly_centroids, k=4) @@ -146,6 +152,11 @@ def test_from_dataframe(self): w = d.DistanceBand.from_dataframe(df, 1) for k,v in w: self.assertEquals(v, self.grid_rook_w[k]) + perm = df.sample(frac=1) + w = d.DistanceBand.from_dataframe(perm, 1) + with self.assertRaises(AssertionError): + assert w.id_order == df.index.tolist() + self.assertEqual(w.id_order, perm.index.tolist()) ########################## # Function/User tests # @@ -252,7 +263,12 @@ def test_from_dataframe(self): w = d.Kernel.from_dataframe(df) for k,v in w[self.known_wi5-1].items(): np.testing.assert_allclose(v, self.known_w5[k+1], rtol=RTOL) - + perm = df.sample(frac=1) + w = d.Kernel.from_dataframe(perm) + with self.assertRaises(AssertionError): + assert w.id_order == df.index.tolist() + self.assertEqual(w.id_order, perm.index.tolist()) + ########################## # Function/User tests # ##########################