CLN: EAFP for count

pandas-dev · cpcloud · May 5, 2014 · Apr 30, 2014 · May 2, 2014 · May 2, 2014
commit a83c186b2ea4b1785346fca5b4b43f025d743ea6
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1405,10 +1405,10 @@ def aggregate(self, values, how, axis=0):
 
         if self._filter_empty_groups:
             if result.ndim == 2:
-                if is_numeric:
+                try:
                     result = lib.row_bool_subset(
                         result, (counts > 0).view(np.uint8))
-                else:
+                except ValueError:
                     result = lib.row_bool_subset_object(
                         result, (counts > 0).view(np.uint8))
             else:
@@ -1442,6 +1442,7 @@ def _aggregate(self, result, counts, values, how, is_numeric):
                 chunk = chunk.squeeze()
                 agg_func(result[:, :, i], counts, chunk, comp_ids)
         else:
+            #import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
             agg_func(result, counts, values, comp_ids)
 
         return trans_func(result)

diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py
@@ -146,7 +146,8 @@ def f():
 """
 
 groupby_multi_count = Benchmark("df.groupby(['key1', 'key2']).count()",
-                                setup, start_date=datetime(2014, 5, 5))
+                                setup, name='groupby_multi_count',
+                                start_date=datetime(2014, 5, 5))
 #----------------------------------------------------------------------
 # Series.value_counts
 
@@ -180,11 +181,11 @@ def f():
 ind2 = np.random.randint(0, 2, size=100000)
 
 df = DataFrame({'key1': fac1.take(ind1),
-                'key2': fac2.take(ind2),
-                'key3': fac2.take(ind2),
-                'value1' : np.random.randn(100000),
-                'value2' : np.random.randn(100000),
-                'value3' : np.random.randn(100000)})
+'key2': fac2.take(ind2),
+'key3': fac2.take(ind2),
+'value1' : np.random.randn(100000),
+'value2' : np.random.randn(100000),
+'value3' : np.random.randn(100000)})
 """
 
 stmt = "df.pivot_table(rows='key1', cols=['key2', 'key3'])"
@@ -221,13 +222,13 @@ def f():
                           start_date=datetime(2012, 5, 1))
 
 groupby_first_float32 = Benchmark('data2.groupby(labels).first()', setup,
-                          start_date=datetime(2013, 1, 1))
+                                  start_date=datetime(2013, 1, 1))
 
 groupby_last = Benchmark('data.groupby(labels).last()', setup,
                          start_date=datetime(2012, 5, 1))
 
 groupby_last_float32 = Benchmark('data2.groupby(labels).last()', setup,
-                         start_date=datetime(2013, 1, 1))
+                                 start_date=datetime(2013, 1, 1))
 
 
 #----------------------------------------------------------------------
@@ -285,9 +286,9 @@ def f():
 labels = np.random.randint(0, 2000, size=N)
 labels2 = np.random.randint(0, 3, size=N)
 df = DataFrame({'key': labels,
-                'key2': labels2,
-                'value1': randn(N),
-                'value2': ['foo', 'bar', 'baz', 'qux'] * (N / 4)})
+'key2': labels2,
+'value1': randn(N),
+'value2': ['foo', 'bar', 'baz', 'qux'] * (N / 4)})
 def f(g):
     return 1
 """