1010
1111from pandas .compat import long
1212
13- from pandas import DataFrame , MultiIndex , Series , Timestamp , compat , date_range
13+ from pandas import (
14+ CategoricalDtype , DataFrame , MultiIndex , Series , Timestamp , compat ,
15+ date_range )
1416from pandas .tests .frame .common import TestData
1517import pandas .util .testing as tm
1618
@@ -220,6 +222,12 @@ def test_to_records_with_categorical(self):
220222 dtype = [("index" , "<i8" ), ("A" , "<U" ),
221223 ("B" , "<U" ), ("C" , "<U" )])),
222224
225+ # Pass in a dtype instance.
226+ (dict (column_dtypes = np .dtype ('unicode' )),
227+ np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
228+ dtype = [("index" , "<i8" ), ("A" , "<U" ),
229+ ("B" , "<U" ), ("C" , "<U" )])),
230+
223231 # Pass in a dictionary (name-only).
224232 (dict (column_dtypes = {"A" : np .int8 , "B" : np .float32 , "C" : "<U2" }),
225233 np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
@@ -249,6 +257,12 @@ def test_to_records_with_categorical(self):
249257 dtype = [("index" , "<i8" ), ("A" , "i1" ),
250258 ("B" , "<f4" ), ("C" , "O" )])),
251259
260+ # Names / indices not in dtype mapping default to array dtype.
261+ (dict (column_dtypes = {"A" : np .dtype ('int8' ), "B" : np .dtype ('float32' )}),
262+ np .rec .array ([("0" , "1" , "0.2" , "a" ), ("1" , "2" , "1.5" , "bc" )],
263+ dtype = [("index" , "<i8" ), ("A" , "i1" ),
264+ ("B" , "<f4" ), ("C" , "O" )])),
265+
252266 # Mixture of everything.
253267 (dict (column_dtypes = {"A" : np .int8 , "B" : np .float32 },
254268 index_dtypes = "<U2" ),
@@ -258,17 +272,26 @@ def test_to_records_with_categorical(self):
258272
259273 # Invalid dype values.
260274 (dict (index = False , column_dtypes = list ()),
261- "Invalid dtype \\ [\\ ] specified for column A" ),
275+ ( ValueError , "Invalid dtype \\ [\\ ] specified for column A" ) ),
262276
263277 (dict (index = False , column_dtypes = {"A" : "int32" , "B" : 5 }),
264- "Invalid dtype 5 specified for column B" ),
278+ (ValueError , "Invalid dtype 5 specified for column B" )),
279+
280+ # Numpy can't handle EA types, so check error is raised
281+ (dict (index = False , column_dtypes = {"A" : "int32" ,
282+ "B" : CategoricalDtype (['a' , 'b' ])}),
283+ (ValueError , 'Invalid dtype category specified for column B' )),
284+
285+ # Check that bad types raise
286+ (dict (index = False , column_dtypes = {"A" : "int32" , "B" : "foo" }),
287+ (TypeError , 'data type "foo" not understood' )),
265288 ])
266289 def test_to_records_dtype (self , kwargs , expected ):
267290 # see gh-18146
268291 df = DataFrame ({"A" : [1 , 2 ], "B" : [0.2 , 1.5 ], "C" : ["a" , "bc" ]})
269292
270- if isinstance (expected , str ):
271- with pytest .raises (ValueError , match = expected ):
293+ if not isinstance (expected , np . recarray ):
294+ with pytest .raises (expected [ 0 ] , match = expected [ 1 ] ):
272295 df .to_records (** kwargs )
273296 else :
274297 result = df .to_records (** kwargs )
0 commit comments