From 4c84e12c30513cb42fca77cd7288545f46a5fd16 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Nov 2020 05:25:03 -0800 Subject: [PATCH 01/10] TST/REF: collect indexing tests by method (#38005) --- pandas/tests/frame/indexing/test_getitem.py | 12 +++ pandas/tests/indexing/test_at.py | 12 ++- pandas/tests/indexing/test_categorical.py | 19 ----- pandas/tests/indexing/test_datetime.py | 47 ++++------- pandas/tests/indexing/test_floats.py | 49 ++++++----- pandas/tests/indexing/test_iat.py | 15 +++- pandas/tests/indexing/test_iloc.py | 30 +++++++ pandas/tests/indexing/test_indexing.py | 86 ++++++-------------- pandas/tests/indexing/test_loc.py | 17 ++-- pandas/tests/indexing/test_scalar.py | 32 +------- pandas/tests/series/indexing/test_getitem.py | 22 +++++ 11 files changed, 166 insertions(+), 175 deletions(-) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 2e65770d7afad..868df82a43a91 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -68,6 +68,18 @@ def test_getitem_sparse_column_return_type_and_dtype(self): tm.assert_series_equal(result, expected) +class TestGetitemListLike: + def test_getitem_list_missing_key(self): + # GH#13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}) + df.columns = ["x", "x", "z"] + + # Check that we get the correct value in the KeyError + with pytest.raises(KeyError, match=r"\['y'\] not in index"): + df[["x", "y", "z"]] + + class TestGetitemCallable: def test_getitem_callable(self, float_frame): # GH#12533 diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index d410a4137554b..c721ba2e6daad 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import DataFrame, Series, Timestamp import pandas._testing as tm @@ -27,6 +27,16 @@ def test_at_setitem_mixed_index_assignment(self): assert ser.iat[3] == 22 +class TestAtSetItemWithExpansion: + def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): + # GH#25506 + ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) + result = Series(ts) + result.at[1] = ts + expected = Series([ts, ts]) + tm.assert_series_equal(result, expected) + + class TestAtWithDuplicates: def test_at_with_duplicate_axes_requires_scalar_lookup(self): # GH#33041 check that falling back to loc doesn't allow non-scalar diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 94fc3960f24c5..6fff706e27cd2 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -430,25 +430,6 @@ def test_ix_categorical_index(self): ) tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) - def test_read_only_source(self): - # GH 10043 - rw_array = np.eye(10) - rw_df = DataFrame(rw_array) - - ro_array = np.eye(10) - ro_array.setflags(write=False) - ro_df = DataFrame(ro_array) - - tm.assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]]) - tm.assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]]) - tm.assert_series_equal(rw_df.iloc[1], ro_df.iloc[1]) - tm.assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3]) - - tm.assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]]) - tm.assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]]) - tm.assert_series_equal(rw_df.loc[1], ro_df.loc[1]) - tm.assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3]) - def test_loc_slice(self): # GH9748 with pytest.raises(KeyError, match="1"): diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index e7bf186ae6456..d00fe58265a2e 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -160,15 +160,22 @@ def test_indexing_with_datetimeindex_tz(self): expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) - def test_series_partial_set_datetime(self): + @pytest.mark.parametrize("to_period", [True, False]) + def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): # GH 11497 idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") + if to_period: + idx = idx.to_period("D") ser = Series([0.1, 0.2], index=idx, name="s") - result = ser.loc[[Timestamp("2011-01-01"), Timestamp("2011-01-02")]] + keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + if to_period: + keys = [x.to_period("D") for x in keys] + result = ser.loc[keys] exp = Series([0.1, 0.2], index=idx, name="s") - exp.index = exp.index._with_freq(None) + if not to_period: + exp.index = exp.index._with_freq(None) tm.assert_series_equal(result, exp, check_index_type=True) keys = [ @@ -176,8 +183,10 @@ def test_series_partial_set_datetime(self): Timestamp("2011-01-02"), Timestamp("2011-01-01"), ] + if to_period: + keys = [x.to_period("D") for x in keys] exp = Series( - [0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s" + [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s" ) result = ser.loc[keys] tm.assert_series_equal(result, exp, check_index_type=True) @@ -187,35 +196,9 @@ def test_series_partial_set_datetime(self): Timestamp("2011-01-02"), Timestamp("2011-01-03"), ] - with pytest.raises(KeyError, match="with any missing labels"): - ser.loc[keys] - - def test_series_partial_set_period(self): - # GH 11497 - - idx = pd.period_range("2011-01-01", "2011-01-02", freq="D", name="idx") - ser = Series([0.1, 0.2], index=idx, name="s") - - result = ser.loc[ - [pd.Period("2011-01-01", freq="D"), pd.Period("2011-01-02", freq="D")] - ] - exp = Series([0.1, 0.2], index=idx, name="s") - tm.assert_series_equal(result, exp, check_index_type=True) + if to_period: + keys = [x.to_period("D") for x in keys] - keys = [ - pd.Period("2011-01-02", freq="D"), - pd.Period("2011-01-02", freq="D"), - pd.Period("2011-01-01", freq="D"), - ] - exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s") - result = ser.loc[keys] - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [ - pd.Period("2011-01-03", freq="D"), - pd.Period("2011-01-02", freq="D"), - pd.Period("2011-01-03", freq="D"), - ] with pytest.raises(KeyError, match="with any missing labels"): ser.loc[keys] diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 1b78ba6defd69..9f86e78fc36c4 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -140,8 +140,11 @@ def test_scalar_with_mixed(self): expected = 3 assert result == expected + @pytest.mark.parametrize( + "idxr,getitem", [(lambda x: x.loc, False), (lambda x: x, True)] + ) @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) - def test_scalar_integer(self, index_func, frame_or_series): + def test_scalar_integer(self, index_func, frame_or_series, idxr, getitem): # test how scalar float indexers work on int indexes @@ -150,37 +153,39 @@ def test_scalar_integer(self, index_func, frame_or_series): obj = gen_obj(frame_or_series, i) # coerce to equal int - for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: - result = idxr(obj)[3.0] - self.check(result, obj, 3, getitem) + result = idxr(obj)[3.0] + self.check(result, obj, 3, getitem) - # coerce to equal int - for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: - - if isinstance(obj, Series): + if isinstance(obj, Series): - def compare(x, y): - assert x == y + def compare(x, y): + assert x == y - expected = 100 + expected = 100 + else: + compare = tm.assert_series_equal + if getitem: + expected = Series(100, index=range(len(obj)), name=3) else: - compare = tm.assert_series_equal - if getitem: - expected = Series(100, index=range(len(obj)), name=3) - else: - expected = Series(100.0, index=range(len(obj)), name=3) + expected = Series(100.0, index=range(len(obj)), name=3) - s2 = obj.copy() - idxr(s2)[3.0] = 100 + s2 = obj.copy() + idxr(s2)[3.0] = 100 - result = idxr(s2)[3.0] - compare(result, expected) + result = idxr(s2)[3.0] + compare(result, expected) - result = idxr(s2)[3] - compare(result, expected) + result = idxr(s2)[3] + compare(result, expected) + @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) + def test_scalar_integer_contains_float(self, index_func, frame_or_series): # contains + # integer index + index = index_func(5) + obj = gen_obj(frame_or_series, index) + # coerce to equal int assert 3.0 in obj diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index b1025b99e9bd5..84bd1d63f6bbc 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -1,4 +1,6 @@ -import pandas as pd +import numpy as np + +from pandas import DataFrame, Series, period_range def test_iat(float_frame): @@ -12,5 +14,14 @@ def test_iat(float_frame): def test_iat_duplicate_columns(): # https://github.com/pandas-dev/pandas/issues/11754 - df = pd.DataFrame([[1, 2]], columns=["x", "x"]) + df = DataFrame([[1, 2]], columns=["x", "x"]) assert df.iat[0, 0] == 1 + + +def test_iat_getitem_series_with_period_index(): + # GH#4390, iat incorrectly indexing + index = period_range("1/1/2001", periods=10) + ser = Series(np.random.randn(10), index=index) + expected = ser[index[0]] + result = ser.iat[0] + assert expected == result diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index bc40079e3169b..9ae9566ac87ef 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -801,6 +801,36 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): with pytest.raises(ValueError, match=msg): obj.iloc[nd3] = 0 + @pytest.mark.parametrize("indexer", [lambda x: x.loc, lambda x: x.iloc]) + def test_iloc_getitem_read_only_values(self, indexer): + # GH#10043 this is fundamentally a test for iloc, but test loc while + # we're here + rw_array = np.eye(10) + rw_df = DataFrame(rw_array) + + ro_array = np.eye(10) + ro_array.setflags(write=False) + ro_df = DataFrame(ro_array) + + tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]]) + tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]]) + tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1]) + tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3]) + + def test_iloc_getitem_readonly_key(self): + # GH#17192 iloc with read-only array raising TypeError + df = DataFrame({"data": np.ones(100, dtype="float64")}) + indices = np.array([1, 3, 6]) + indices.flags.writeable = False + + result = df.iloc[indices] + expected = df.loc[[1, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df["data"].iloc[indices] + expected = df["data"].loc[[1, 3, 6]] + tm.assert_series_equal(result, expected) + def test_iloc_assign_series_to_df_cell(self): # GH 37593 df = DataFrame(columns=["a"], index=[0]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 87ee23dc78f89..b52c2ebbbc584 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -17,6 +17,23 @@ from .test_floats import gen_obj + +def getitem(x): + return x + + +def setitem(x): + return x + + +def loc(x): + return x.loc + + +def iloc(x): + return x.iloc + + # ------------------------------------------------------------------------ # Indexing test cases @@ -55,15 +72,8 @@ def test_setitem_ndarray_1d(self): with pytest.raises(ValueError, match=msg): df[2:5] = np.arange(1, 4) * 1j - @pytest.mark.parametrize( - "idxr, idxr_id", - [ - (lambda x: x, "getitem"), - (lambda x: x.loc, "loc"), - (lambda x: x.iloc, "iloc"), - ], - ) - def test_getitem_ndarray_3d(self, index, frame_or_series, idxr, idxr_id): + @pytest.mark.parametrize("idxr", [getitem, loc, iloc]) + def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): # GH 25567 obj = gen_obj(frame_or_series, index) idxr = idxr(obj) @@ -85,26 +95,19 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, idxr, idxr_id): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] - @pytest.mark.parametrize( - "idxr, idxr_id", - [ - (lambda x: x, "setitem"), - (lambda x: x.loc, "loc"), - (lambda x: x.iloc, "iloc"), - ], - ) - def test_setitem_ndarray_3d(self, index, frame_or_series, idxr, idxr_id): + @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + def test_setitem_ndarray_3d(self, index, frame_or_series, indexer): # GH 25567 obj = gen_obj(frame_or_series, index) - idxr = idxr(obj) + idxr = indexer(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) - if idxr_id == "iloc": + if indexer.__name__ == "iloc": err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}" elif ( isinstance(index, pd.IntervalIndex) - and idxr_id == "setitem" + and indexer.__name__ == "setitem" and obj.ndim == 1 ): err = AttributeError @@ -294,7 +297,7 @@ def test_dups_fancy_indexing2(self): result = df.loc[[1, 2], ["a", "b"]] tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("case", [lambda s: s, lambda s: s.loc]) + @pytest.mark.parametrize("case", [getitem, loc]) def test_duplicate_int_indexing(self, case): # GH 17347 s = Series(range(3), index=[1, 1, 3]) @@ -591,7 +594,7 @@ def test_astype_assignment(self): expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("indexer", [lambda x: x.loc, lambda x: x]) + @pytest.mark.parametrize("indexer", [getitem, loc]) def test_index_type_coercion(self, indexer): # GH 11836 @@ -998,43 +1001,6 @@ def test_extension_array_cross_section_converts(): tm.assert_series_equal(result, expected) -def test_readonly_indices(): - # GH#17192 iloc with read-only array raising TypeError - df = DataFrame({"data": np.ones(100, dtype="float64")}) - indices = np.array([1, 3, 6]) - indices.flags.writeable = False - - result = df.iloc[indices] - expected = df.loc[[1, 3, 6]] - tm.assert_frame_equal(result, expected) - - result = df["data"].iloc[indices] - expected = df["data"].loc[[1, 3, 6]] - tm.assert_series_equal(result, expected) - - -def test_1tuple_without_multiindex(): - ser = Series(range(5)) - key = (slice(3),) - - result = ser[key] - expected = ser[key[0]] - tm.assert_series_equal(result, expected) - - -def test_duplicate_index_mistyped_key_raises_keyerror(): - # GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError - ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) - with pytest.raises(KeyError, match="None"): - ser[None] - - with pytest.raises(KeyError, match="None"): - ser.index.get_loc(None) - - with pytest.raises(KeyError, match="None"): - ser.index._engine.get_loc(None) - - def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): # GH 30567 ser = Series([None] * 10) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 28846bcf2f14d..07b7c5c6767c3 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1573,6 +1573,14 @@ def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start): with tm.assert_produces_warning(FutureWarning): obj.loc[start:"2022"] + @pytest.mark.parametrize("value", [1, 1.5]) + def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value): + # GH: 26491 + obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) + result = obj.loc[value:"third"] + expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) + tm.assert_equal(result, expected) + class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): @@ -1999,12 +2007,3 @@ def test_loc_setitem_dt64tz_values(self): s2["a"] = expected result = s2["a"] assert result == expected - - -@pytest.mark.parametrize("value", [1, 1.5]) -def test_loc_int_in_object_index(frame_or_series, value): - # GH: 26491 - obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) - result = obj.loc[value:"third"] - expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) - tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 230725d8ee11d..dd01f4e6a4f49 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series, Timedelta, Timestamp, date_range, period_range +from pandas import DataFrame, Series, Timedelta, Timestamp, date_range import pandas._testing as tm from pandas.tests.indexing.common import Base @@ -146,18 +146,7 @@ def test_frame_at_with_duplicate_axes(self): expected = Series([2.0, 2.0], index=["A", "A"], name=1) tm.assert_series_equal(df.iloc[1], expected) - # TODO: belongs somewhere else? - def test_getitem_list_missing_key(self): - # GH 13822, incorrect error string with non-unique columns when missing - # column is accessed - df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}) - df.columns = ["x", "x", "z"] - - # Check that we get the correct value in the KeyError - with pytest.raises(KeyError, match=r"\['y'\] not in index"): - df[["x", "y", "z"]] - - def test_at_with_tz(self): + def test_at_getitem_dt64tz_values(self): # gh-15822 df = DataFrame( { @@ -178,14 +167,6 @@ def test_at_with_tz(self): result = df.at[0, "date"] assert result == expected - def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): - # GH 25506 - ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) - result = Series(ts) - result.at[1] = ts - expected = Series([ts, ts]) - tm.assert_series_equal(result, expected) - def test_mixed_index_at_iat_loc_iloc_series(self): # GH 19860 s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) @@ -259,15 +240,6 @@ def test_iat_dont_wrap_object_datetimelike(): assert not isinstance(result, Timedelta) -def test_iat_series_with_period_index(): - # GH 4390, iat incorrectly indexing - index = period_range("1/1/2001", periods=10) - ser = Series(np.random.randn(10), index=index) - expected = ser[index[0]] - result = ser.iat[0] - assert expected == result - - def test_at_with_tuple_index_get(): # GH 26989 # DataFrame.at getter works with Index of tuples diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 7b794668803c3..3686337141420 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -427,3 +427,25 @@ def test_getitem_assignment_series_aligment(): ser[idx] = Series([10, 11, 12]) expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12]) tm.assert_series_equal(ser, expected) + + +def test_getitem_duplicate_index_mistyped_key_raises_keyerror(): + # GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError + ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) + with pytest.raises(KeyError, match="None"): + ser[None] + + with pytest.raises(KeyError, match="None"): + ser.index.get_loc(None) + + with pytest.raises(KeyError, match="None"): + ser.index._engine.get_loc(None) + + +def test_getitem_1tuple_slice_without_multiindex(): + ser = Series(range(5)) + key = (slice(3),) + + result = ser[key] + expected = ser[key[0]] + tm.assert_series_equal(result, expected) From c2b133701b4a12deb2d64506d4db9f8a152a5bc0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Nov 2020 05:25:34 -0800 Subject: [PATCH 02/10] REF: ensure_arraylike in algos.isin (#38004) --- pandas/core/algorithms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index be091314e6c25..a3abfaa48500c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -218,7 +218,8 @@ def _ensure_arraylike(values): """ if not is_array_like(values): inferred = lib.infer_dtype(values, skipna=False) - if inferred in ["mixed", "string"]: + if inferred in ["mixed", "string", "mixed-integer"]: + # "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160 if isinstance(values, tuple): values = list(values) values = construct_1d_object_array_from_listlike(values) @@ -424,6 +425,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: values = construct_1d_object_array_from_listlike(list(values)) # TODO: could use ensure_arraylike here + comps = _ensure_arraylike(comps) comps = extract_array(comps, extract_numpy=True) if is_categorical_dtype(comps): # TODO(extension) From 3788d74e67e3a510723e28035c4f0961292f9d96 Mon Sep 17 00:00:00 2001 From: attack68 <24256554+attack68@users.noreply.github.com> Date: Mon, 23 Nov 2020 14:26:13 +0100 Subject: [PATCH 03/10] DOC: add a link to new styler method (#37998) --- doc/source/reference/style.rst | 1 + pandas/io/formats/style.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 24a47336b0522..e80dc1b57ff80 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -36,6 +36,7 @@ Style application Styler.where Styler.format Styler.set_precision + Styler.set_td_classes Styler.set_table_styles Styler.set_table_attributes Styler.set_caption diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 4b7a5e76cb475..298a7836bcb58 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -565,7 +565,6 @@ def set_td_classes(self, classes: DataFrame) -> "Styler": ' 1' ' ' '' - """ classes = classes.reindex_like(self.data) From fe97aa26946133af57e77679d63d149c3ad830df Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Nov 2020 05:28:00 -0800 Subject: [PATCH 04/10] TST/REF: collect tests from test_multilevel (#38006) --- pandas/tests/frame/test_repr_info.py | 8 ++++++ pandas/tests/indexing/test_loc.py | 34 ++++++++++++++++++++++++ pandas/tests/test_multilevel.py | 39 ++++------------------------ 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index ef43319d11464..a7b3333e7c690 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -23,6 +23,14 @@ class TestDataFrameReprInfoEtc: + def test_repr_unicode_level_names(self, frame_or_series): + index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) + + obj = DataFrame(np.random.randn(2, 4), index=index) + if frame_or_series is Series: + obj = obj[0] + repr(obj) + def test_assign_index_sequences(self): # GH#2200 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 07b7c5c6767c3..e7831475932d9 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1222,6 +1222,40 @@ def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] tm.assert_series_equal(result, ser) + def test_loc_getitem_sorted_index_level_with_duplicates(self): + # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes + mi = MultiIndex.from_tuples( + [ + ("foo", "bar"), + ("foo", "bar"), + ("bah", "bam"), + ("bah", "bam"), + ("foo", "bar"), + ("bah", "bam"), + ], + names=["A", "B"], + ) + df = DataFrame( + [ + [1.0, 1], + [2.0, 2], + [3.0, 3], + [4.0, 4], + [5.0, 5], + [6.0, 6], + ], + index=mi, + columns=["C", "D"], + ) + df = df.sort_index(level=0) + + expected = DataFrame( + [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4]) + ) + + result = df.loc[("foo", "bar")] + tm.assert_frame_equal(result, expected) + class TestLocSetitemWithExpansion: @pytest.mark.slow diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 189c792ac228b..84aa8ec6f970f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -135,7 +135,9 @@ def test_groupby_level_no_obs(self): result = grouped.sum() assert (result.columns == ["f2", "f3"]).all() - def test_insert_index(self, multiindex_year_month_day_dataframe_random_data): + def test_setitem_with_expansion_multiindex_columns( + self, multiindex_year_month_day_dataframe_random_data + ): ymd = multiindex_year_month_day_dataframe_random_data df = ymd[:5].T @@ -242,12 +244,11 @@ def test_std_var_pass_ddof(self): expected = df.groupby(level=0).agg(alt) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("klass", [Series, DataFrame]) def test_agg_multiple_levels( - self, multiindex_year_month_day_dataframe_random_data, klass + self, multiindex_year_month_day_dataframe_random_data, frame_or_series ): ymd = multiindex_year_month_day_dataframe_random_data - if klass is Series: + if frame_or_series is Series: ymd = ymd["A"] result = ymd.sum(level=["year", "month"]) @@ -349,14 +350,6 @@ def test_reindex_level_partial_selection(self, multiindex_dataframe_random_data) result = frame.T.loc[:, ["foo", "qux"]] tm.assert_frame_equal(result, expected.T) - def test_unicode_repr_level_names(self): - index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) - - s = Series(range(2), index=index) - df = DataFrame(np.random.randn(2, 4), index=index) - repr(s) - repr(df) - @pytest.mark.parametrize("d", [4, "d"]) def test_empty_frame_groupby_dtypes_consistency(self, d): # GH 20888 @@ -386,28 +379,6 @@ def test_duplicate_groupby_issues(self): result = s.groupby(s.index).first() assert len(result) == 3 - def test_duplicate_mi(self): - # GH 4516 - df = DataFrame( - [ - ["foo", "bar", 1.0, 1], - ["foo", "bar", 2.0, 2], - ["bah", "bam", 3.0, 3], - ["bah", "bam", 4.0, 4], - ["foo", "bar", 5.0, 5], - ["bah", "bam", 6.0, 6], - ], - columns=list("ABCD"), - ) - df = df.set_index(["A", "B"]) - df = df.sort_index(level=0) - expected = DataFrame( - [["foo", "bar", 1.0, 1], ["foo", "bar", 2.0, 2], ["foo", "bar", 5.0, 5]], - columns=list("ABCD"), - ).set_index(["A", "B"]) - result = df.loc[("foo", "bar")] - tm.assert_frame_equal(result, expected) - def test_subsets_multiindex_dtype(self): # GH 20757 data = [["x", 1]] From fa0e40546f53e823e184f9ab98e231d3edfaf1dc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Nov 2020 05:32:23 -0800 Subject: [PATCH 05/10] CLN: avoid try/except in Index methods (#37990) --- pandas/core/indexes/base.py | 10 ++--- pandas/core/indexes/datetimelike.py | 63 +++++++++++++---------------- 2 files changed, 33 insertions(+), 40 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7658230d9e1dd..a296310d92ff1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2490,12 +2490,10 @@ def _get_unique_index(self, dropna: bool = False): else: values = self._values - if dropna: - try: - if self.hasnans: - values = values[~isna(values)] - except NotImplementedError: - pass + if dropna and not isinstance(self, ABCMultiIndex): + # isna not defined for MultiIndex + if self.hasnans: + values = values[~isna(values)] return self._shallow_copy(values) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c30abb144cea5..ce5d62aec4f9f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -27,7 +27,6 @@ from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin -from pandas.core.base import IndexOpsMixin import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs @@ -217,10 +216,6 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): result._data._freq = freq return result - @doc(IndexOpsMixin.searchsorted, klass="Datetime-like Index") - def searchsorted(self, value, side="left", sorter=None): - return self._data.searchsorted(value, side=side, sorter=sorter) - _can_hold_na = True _na_value = NaT @@ -256,23 +251,23 @@ def min(self, axis=None, skipna=True, *args, **kwargs): return self._na_value i8 = self.asi8 - try: + + if len(i8) and self.is_monotonic_increasing: # quick check - if len(i8) and self.is_monotonic: - if i8[0] != iNaT: - return self._data._box_func(i8[0]) - - if self.hasnans: - if skipna: - min_stamp = self[~self._isnan].asi8.min() - else: - return self._na_value - else: - min_stamp = i8.min() - return self._data._box_func(min_stamp) - except ValueError: + if i8[0] != iNaT: + return self._data._box_func(i8[0]) + + if self.hasnans: + if not skipna: + return self._na_value + i8 = i8[~self._isnan] + + if not len(i8): return self._na_value + min_stamp = i8.min() + return self._data._box_func(min_stamp) + def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. @@ -313,23 +308,23 @@ def max(self, axis=None, skipna=True, *args, **kwargs): return self._na_value i8 = self.asi8 - try: + + if len(i8) and self.is_monotonic: # quick check - if len(i8) and self.is_monotonic: - if i8[-1] != iNaT: - return self._data._box_func(i8[-1]) - - if self.hasnans: - if skipna: - max_stamp = self[~self._isnan].asi8.max() - else: - return self._na_value - else: - max_stamp = i8.max() - return self._data._box_func(max_stamp) - except ValueError: + if i8[-1] != iNaT: + return self._data._box_func(i8[-1]) + + if self.hasnans: + if not skipna: + return self._na_value + i8 = i8[~self._isnan] + + if not len(i8): return self._na_value + max_stamp = i8.max() + return self._data._box_func(max_stamp) + def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. @@ -463,7 +458,7 @@ def _partial_date_slice( vals = self._data._ndarray unbox = self._data._unbox - if self.is_monotonic: + if self.is_monotonic_increasing: if len(self) and ( (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1]) From 589a89e092fdf3750b8e7a3aa28e04de39ce8b0f Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Mon, 23 Nov 2020 07:34:25 -0600 Subject: [PATCH 06/10] CLN: remove panel compat shim (#37983) --- ci/deps/travis-37-locale.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/reference/index.rst | 1 - doc/source/reference/panel.rst | 10 ---------- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/__init__.py | 17 +---------------- pandas/compat/_optional.py | 2 +- pandas/tests/test_downstream.py | 1 - scripts/validate_rst_title_capitalization.py | 1 - 9 files changed, 5 insertions(+), 32 deletions(-) delete mode 100644 doc/source/reference/panel.rst diff --git a/ci/deps/travis-37-locale.yaml b/ci/deps/travis-37-locale.yaml index e93a86910bf34..4e442b10482a7 100644 --- a/ci/deps/travis-37-locale.yaml +++ b/ci/deps/travis-37-locale.yaml @@ -34,7 +34,7 @@ dependencies: - pyarrow>=0.17 - pytables>=3.5.1 - scipy - - xarray=0.12.0 + - xarray=0.12.3 - xlrd - xlsxwriter - xlwt diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index df481e8c986f7..c823ad01f10bf 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -284,7 +284,7 @@ pyxlsb 1.0.6 Reading for xlsb files qtpy Clipboard I/O s3fs 0.4.0 Amazon S3 access tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_) -xarray 0.12.0 pandas-like API for N-dimensional data +xarray 0.12.3 pandas-like API for N-dimensional data xclip Clipboard I/O on linux xlrd 1.2.0 Excel reading xlwt 1.3.0 Excel writing diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index 9d5649c37e92f..f7c5eaf242b34 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -30,7 +30,6 @@ public functions related to data types in pandas. series frame arrays - panel indexing offset_frequency window diff --git a/doc/source/reference/panel.rst b/doc/source/reference/panel.rst deleted file mode 100644 index 37d48c2dadf2e..0000000000000 --- a/doc/source/reference/panel.rst +++ /dev/null @@ -1,10 +0,0 @@ -{{ header }} - -.. _api.panel: - -===== -Panel -===== -.. currentmodule:: pandas - -``Panel`` was removed in 0.25.0. For prior documentation, see the `0.24 documentation `_ diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 996fb828010ba..766c418741ada 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -664,6 +664,7 @@ I/O - Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`) - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`) - :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`) +- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`) Period ^^^^^^ diff --git a/pandas/__init__.py b/pandas/__init__.py index b9b7d5d064855..cc5d835a52833 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -189,25 +189,10 @@ # GH 27101 -# TODO: remove Panel compat in 1.0 def __getattr__(name): import warnings - if name == "Panel": - - warnings.warn( - "The Panel class is removed from pandas. Accessing it " - "from the top-level namespace will also be removed in the next version", - FutureWarning, - stacklevel=2, - ) - - class Panel: - pass - - return Panel - - elif name == "datetime": + if name == "datetime": warnings.warn( "The pandas.datetime class is deprecated " "and will be removed from pandas in a future version. " diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index d3c7888cac704..533e67acfa2f4 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -25,7 +25,7 @@ "sqlalchemy": "1.2.8", "tables": "3.5.1", "tabulate": "0.8.3", - "xarray": "0.12.0", + "xarray": "0.12.3", "xlrd": "1.2.0", "xlwt": "1.3.0", "xlsxwriter": "1.0.2", diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 392be699b6fc0..83016a08de90b 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -41,7 +41,6 @@ def test_dask(df): assert ddf.compute() is not None -@pytest.mark.filterwarnings("ignore:Panel class is removed") def test_xarray(df): xarray = import_module("xarray") # noqa diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index b8839c83d00b9..d521f2ee421be 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -138,7 +138,6 @@ "Google", "CategoricalDtype", "UTC", - "Panel", "False", "Styler", "os", From 25a1d9166ff0d131541a65d496e9b37ca7737f25 Mon Sep 17 00:00:00 2001 From: xinrong-databricks <47337188+xinrong-databricks@users.noreply.github.com> Date: Mon, 23 Nov 2020 05:37:28 -0800 Subject: [PATCH 07/10] [WIP] DOC: MultiIndex EX01 errors (#37993) --- pandas/core/indexes/base.py | 27 +++++++++ pandas/core/indexes/multi.py | 107 ++++++++++++++++++++++++++++++++++- 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a296310d92ff1..7b72196c3c2f3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1575,6 +1575,33 @@ def droplevel(self, level=0): Returns ------- Index or MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays( + ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + >>> mi + MultiIndex([(1, 3, 5), + (2, 4, 6)], + names=['x', 'y', 'z']) + + >>> mi.droplevel() + MultiIndex([(3, 5), + (4, 6)], + names=['y', 'z']) + + >>> mi.droplevel(2) + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.droplevel('z') + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.droplevel(['x', 'y']) + Int64Index([5, 6], dtype='int64', name='z') """ if not isinstance(level, (tuple, list)): level = [level] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9c80236129155..ca9612258a890 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -893,6 +893,15 @@ def set_levels(self, levels, level=None, inplace=None, verify_integrity=True): def nlevels(self) -> int: """ Integer number of levels in this MultiIndex. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi + MultiIndex([('a', 'b', 'c')], + ) + >>> mi.nlevels + 3 """ return len(self._levels) @@ -900,6 +909,15 @@ def nlevels(self) -> int: def levshape(self): """ A tuple with the length of each level. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi + MultiIndex([('a', 'b', 'c')], + ) + >>> mi.levshape + (1, 1, 1) """ return tuple(len(x) for x in self.levels) @@ -1436,7 +1454,22 @@ def _set_names(self, names, level=None, validate=True): self._reset_cache() names = property( - fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n""" + fset=_set_names, + fget=_get_names, + doc=""" + Names of levels in MultiIndex. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays( + ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) + >>> mi + MultiIndex([(1, 3, 5), + (2, 4, 6)], + names=['x', 'y', 'z']) + >>> mi.names + FrozenList(['x', 'y', 'z']) + """, ) # -------------------------------------------------------------------- @@ -1680,6 +1713,32 @@ def to_frame(self, index=True, name=None): -------- DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) + >>> mi + MultiIndex([('a', 'c'), + ('b', 'd')], + ) + + >>> df = mi.to_frame() + >>> df + 0 1 + a c a c + b d b d + + >>> df = mi.to_frame(index=False) + >>> df + 0 1 + 0 a c + 1 b d + + >>> df = mi.to_frame(name=['x', 'y']) + >>> df + x y + a c a c + b d b d """ from pandas import DataFrame @@ -2217,6 +2276,24 @@ def reorder_levels(self, order): Returns ------- MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) + >>> mi + MultiIndex([(1, 3), + (2, 4)], + names=['x', 'y']) + + >>> mi.reorder_levels(order=[1, 0]) + MultiIndex([(3, 1), + (4, 2)], + names=['y', 'x']) + + >>> mi.reorder_levels(order=['y', 'x']) + MultiIndex([(3, 1), + (4, 2)], + names=['y', 'x']) """ order = [self._get_level_number(i) for i in order] if len(order) != self.nlevels: @@ -2275,6 +2352,34 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): Resulting index. indexer : np.ndarray Indices of output values in original index. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]]) + >>> mi + MultiIndex([(0, 2), + (0, 1)], + ) + + >>> mi.sortlevel() + (MultiIndex([(0, 1), + (0, 2)], + ), array([1, 0])) + + >>> mi.sortlevel(sort_remaining=False) + (MultiIndex([(0, 2), + (0, 1)], + ), array([0, 1])) + + >>> mi.sortlevel(1) + (MultiIndex([(0, 1), + (0, 2)], + ), array([1, 0])) + + >>> mi.sortlevel(1, ascending=False) + (MultiIndex([(0, 2), + (0, 1)], + ), array([0, 1])) """ if isinstance(level, (str, int)): level = [level] From 2c1e981732e948d53b3c8251e0d87a6964e82f52 Mon Sep 17 00:00:00 2001 From: Fabian Gebhart Date: Mon, 23 Nov 2020 19:59:07 +0100 Subject: [PATCH 08/10] TST: add test to verify column does not lose categorical type when using loc (#37988) --- pandas/tests/indexing/test_loc.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index e7831475932d9..61e44a87bb70c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -12,6 +12,7 @@ import pandas as pd from pandas import ( + Categorical, CategoricalIndex, DataFrame, Index, @@ -1319,6 +1320,13 @@ def test_loc_setitem_datetime_keys_cast(self): expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) tm.assert_frame_equal(df, expected) + def test_loc_setitem_categorical_column_retains_dtype(self, ordered): + # GH16360 + result = DataFrame({"A": [1]}) + result.loc[:, "B"] = Categorical(["b"], ordered=ordered) + expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)}) + tm.assert_frame_equal(result, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): From 20f7ffa0e9a87fe207a316574d7343673b99d9d9 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com> Date: Mon, 23 Nov 2020 19:24:00 -0500 Subject: [PATCH 09/10] REGR: fix inplace operations for EAs with non-EA arg (#37986) --- doc/source/whatsnew/v1.1.5.rst | 2 +- pandas/core/generic.py | 7 +++++- pandas/tests/series/test_arithmetic.py | 34 ++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 323342cb43950..609c3650c8cc2 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -17,7 +17,7 @@ Fixed regressions - Regression in addition of a timedelta-like scalar to a :class:`DatetimeIndex` raising incorrectly (:issue:`37295`) - Fixed regression in :meth:`Series.groupby` raising when the :class:`Index` of the :class:`Series` had a tuple as its name (:issue:`37755`) - Fixed regression in :meth:`DataFrame.loc` and :meth:`Series.loc` for ``__setitem__`` when one-dimensional tuple was given to select from :class:`MultiIndex` (:issue:`37711`) -- +- Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3aa692c5d3d43..e2b3406c6b1c5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -70,6 +70,7 @@ is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like, + is_dtype_equal, is_extension_array_dtype, is_float, is_list_like, @@ -11266,7 +11267,11 @@ def _inplace_method(self, other, op): """ result = op(self, other) - if self.ndim == 1 and result._indexed_same(self) and result.dtype == self.dtype: + if ( + self.ndim == 1 + and result._indexed_same(self) + and is_dtype_equal(result.dtype, self.dtype) + ): # GH#36498 this inplace op can _actually_ be inplace. self._values[:] = result._values return self diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 6aad2cadf78ba..c5196cea5d3bb 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -832,6 +832,40 @@ def test_scalarop_preserve_name(self, datetime_series): assert result.name == datetime_series.name +class TestInplaceOperations: + @pytest.mark.parametrize( + "dtype1, dtype2, dtype_expected, dtype_mul", + ( + ("Int64", "Int64", "Int64", "Int64"), + ("float", "float", "float", "float"), + ("Int64", "float", "float", "float"), + pytest.param( + "Int64", + "Float64", + "Float64", + "Float64", + marks=pytest.mark.xfail(reason="Not implemented yet"), + ), + ), + ) + def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul): + # GH 37910 + + ser1 = Series([1], dtype=dtype1) + ser2 = Series([2], dtype=dtype2) + ser1 += ser2 + expected = Series([3], dtype=dtype_expected) + tm.assert_series_equal(ser1, expected) + + ser1 -= ser2 + expected = Series([1], dtype=dtype_expected) + tm.assert_series_equal(ser1, expected) + + ser1 *= ser2 + expected = Series([2], dtype=dtype_mul) + tm.assert_series_equal(ser1, expected) + + def test_none_comparison(series_with_simple_index): series = series_with_simple_index if isinstance(series.index, IntervalIndex): From 24e881d46ef21ba40c95e6d48472b259bcf22458 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Nov 2020 18:46:25 -0800 Subject: [PATCH 10/10] REF: Implement isin on DTA instead of DTI (#38012) --- pandas/core/algorithms.py | 6 ++-- pandas/core/arrays/datetimelike.py | 55 ++++++++++++++++++++++++++++- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/datetimelike.py | 53 --------------------------- pandas/core/indexes/numeric.py | 7 ---- pandas/core/series.py | 2 +- 6 files changed, 58 insertions(+), 67 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a3abfaa48500c..b79905796f7cd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -433,10 +433,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: return cast("Categorical", comps).isin(values) if needs_i8_conversion(comps): - # Dispatch to DatetimeLikeIndexMixin.isin - from pandas import Index - - return Index(comps).isin(values) + # Dispatch to DatetimeLikeArrayMixin.isin + return array(comps).isin(values) comps, dtype = _ensure_data(comps) values, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3b419f8d1da2a..c482eae35b313 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -62,7 +62,7 @@ from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna from pandas.core import nanops, ops -from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts +from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts from pandas.core.arraylike import OpsMixin from pandas.core.arrays._mixins import NDArrayBackedExtensionArray import pandas.core.common as com @@ -697,6 +697,59 @@ def map(self, mapper): return Index(self).map(mapper).array + def isin(self, values) -> np.ndarray: + """ + Compute boolean array of whether each value is found in the + passed set of values. + + Parameters + ---------- + values : set or sequence of values + + Returns + ------- + ndarray[bool] + """ + if not hasattr(values, "dtype"): + values = np.asarray(values) + + if values.dtype.kind in ["f", "i", "u", "c"]: + # TODO: de-duplicate with equals, validate_comparison_value + return np.zeros(self.shape, dtype=bool) + + if not isinstance(values, type(self)): + inferrable = [ + "timedelta", + "timedelta64", + "datetime", + "datetime64", + "date", + "period", + ] + if values.dtype == object: + inferred = lib.infer_dtype(values, skipna=False) + if inferred not in inferrable: + if inferred == "string": + pass + + elif "mixed" in inferred: + return isin(self.astype(object), values) + else: + return np.zeros(self.shape, dtype=bool) + + try: + values = type(self)._from_sequence(values) + except ValueError: + return isin(self.astype(object), values) + + try: + self._check_compatible_with(values) + except (TypeError, ValueError): + # Includes tzawareness mismatch and IncompatibleFrequencyError + return np.zeros(self.shape, dtype=bool) + + return isin(self.asi8, values.asi8) + # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7b72196c3c2f3..b5900ead246f3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5145,7 +5145,7 @@ def isin(self, values, level=None): """ if level is not None: self._validate_index_level(level) - return algos.isin(self, values) + return algos.isin(self._values, values) def _get_string_slice(self, key: str_t): # this is for partial string indexing, diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ce5d62aec4f9f..d0f818410f96a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -24,7 +24,6 @@ from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCIndex, ABCSeries -from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin import pandas.core.common as com @@ -500,58 +499,6 @@ def _partial_date_slice( __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") - def isin(self, values, level=None): - """ - Compute boolean array of whether each index value is found in the - passed set of values. - - Parameters - ---------- - values : set or sequence of values - - Returns - ------- - is_contained : ndarray (boolean dtype) - """ - if level is not None: - self._validate_index_level(level) - - if not hasattr(values, "dtype"): - values = np.asarray(values) - - if values.dtype.kind in ["f", "i", "u", "c"]: - # TODO: de-duplicate with equals, validate_comparison_value - return np.zeros(self.shape, dtype=bool) - - if not isinstance(values, type(self)): - inferrable = [ - "timedelta", - "timedelta64", - "datetime", - "datetime64", - "date", - "period", - ] - if values.dtype == object: - inferred = lib.infer_dtype(values, skipna=False) - if inferred not in inferrable: - if "mixed" in inferred: - return self.astype(object).isin(values) - return np.zeros(self.shape, dtype=bool) - - try: - values = type(self)(values) - except ValueError: - return self.astype(object).isin(values) - - try: - self._data._check_compatible_with(values) - except (TypeError, ValueError): - # Includes tzawareness mismatch and IncompatibleFrequencyError - return np.zeros(self.shape, dtype=bool) - - return algorithms.isin(self.asi8, values.asi8) - def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 24aaf5885fe0e..7778b1e264cd8 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -27,7 +27,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna -from pandas.core import algorithms import pandas.core.common as com from pandas.core.indexes.base import Index, maybe_extract_name @@ -434,12 +433,6 @@ def __contains__(self, other: Any) -> bool: def is_unique(self) -> bool: return super().is_unique and self._nan_idxs.size < 2 - @doc(Index.isin) - def isin(self, values, level=None): - if level is not None: - self._validate_index_level(level) - return algorithms.isin(np.array(self), values) - def _can_union_without_object_cast(self, other) -> bool: # See GH#26778, further casting may occur in NumericIndex._union return is_numeric_dtype(other.dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index d59e72a04209c..4c3ad38c8a922 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4691,7 +4691,7 @@ def isin(self, values) -> "Series": 5 False Name: animal, dtype: bool """ - result = algorithms.isin(self, values) + result = algorithms.isin(self._values, values) return self._constructor(result, index=self.index).__finalize__( self, method="isin" )