18
18
import sys
19
19
from datetime import datetime
20
20
21
- import six
22
21
from six .moves import cStringIO as StringIO
23
22
24
23
import psutil
25
24
import pynvml as N
26
25
from blessings import Terminal
27
26
28
27
NOT_SUPPORTED = 'Not Supported'
28
+ MB = 1024 * 1024
29
29
30
30
31
31
class GPUStat (object ):
32
32
33
33
def __init__ (self , entry ):
34
34
if not isinstance (entry , dict ):
35
- raise TypeError ('entry should be a dict, {} given' .format (type (entry )))
35
+ raise TypeError (
36
+ 'entry should be a dict, {} given' .format (type (entry ))
37
+ )
36
38
self .entry = entry
37
39
38
- # Handle '[Not Supported] for old GPU cards (#6)
39
- for k in self .entry .keys ():
40
- if isinstance (self .entry [k ], six .string_types ) and NOT_SUPPORTED in self .entry [k ]:
41
- self .entry [k ] = None
42
-
43
40
def __repr__ (self ):
44
41
return self .print_to (StringIO ()).getvalue ()
45
42
@@ -96,7 +93,8 @@ def memory_free(self):
96
93
@property
97
94
def memory_available (self ):
98
95
"""
99
- Returns the available memory (in MB) as an integer. Alias of memory_free.
96
+ Returns the available memory (in MB) as an integer.
97
+ Alias of memory_free.
100
98
"""
101
99
return self .memory_free
102
100
@@ -141,8 +139,7 @@ def processes(self):
141
139
"""
142
140
Get the list of running processes on the GPU.
143
141
"""
144
- return list (self .entry ['processes' ])
145
-
142
+ return self .entry ['processes' ]
146
143
147
144
def print_to (self , fp ,
148
145
with_colors = True , # deprecated arg
@@ -159,38 +156,41 @@ def print_to(self, fp,
159
156
def _conditional (cond_fn , true_value , false_value ,
160
157
error_value = term .bold_black ):
161
158
try :
162
- if cond_fn (): return true_value
163
- else : return false_value
164
- except :
159
+ return cond_fn () and true_value or false_value
160
+ except Exception :
165
161
return error_value
166
162
167
163
colors ['C0' ] = term .normal
168
164
colors ['C1' ] = term .cyan
169
165
colors ['CName' ] = term .blue
170
- colors ['CTemp' ] = _conditional (lambda : int ( self .entry [ ' temperature.gpu' ]) < 50 ,
166
+ colors ['CTemp' ] = _conditional (lambda : self .temperature < 50 ,
171
167
term .red , term .bold_red )
172
168
colors ['CMemU' ] = term .bold_yellow
173
169
colors ['CMemT' ] = term .yellow
174
170
colors ['CMemP' ] = term .yellow
175
171
colors ['CUser' ] = term .bold_black # gray
176
- colors ['CUtil' ] = _conditional (lambda : int ( self .entry [ ' utilization.gpu' ]) < 30 ,
172
+ colors ['CUtil' ] = _conditional (lambda : self .utilization < 30 ,
177
173
term .green , term .bold_green )
178
- colors ['CPowU' ] = _conditional (lambda : float (self .entry ['power.draw' ]) / self .entry ['enforced.power.limit' ] < 0.4 ,
179
- term .magenta , term .bold_magenta )
174
+ colors ['CPowU' ] = _conditional (
175
+ lambda : float (self .power_draw ) / self .power_limit < 0.4 ,
176
+ term .magenta , term .bold_magenta
177
+ )
180
178
colors ['CPowL' ] = term .magenta
181
179
182
180
if not with_colors :
183
181
for k in list (colors .keys ()):
184
182
colors [k ] = ''
185
183
186
184
def _repr (v , none_value = '??' ):
187
- if v is None : return none_value
188
- else : return str (v )
185
+ return none_value if v is None else v
189
186
190
187
# build one-line display information
191
- # we want power use optional, but if deserves being grouped with temperature and utilization
192
- reps = "%(C1)s[{entry[index]}]%(C0)s %(CName)s{entry[name]:{gpuname_width}}%(C0)s |" \
193
- "%(CTemp)s{entry[temperature.gpu]:>3}'C%(C0)s, %(CUtil)s{entry[utilization.gpu]:>3} %%%(C0)s"
188
+ # we want power use optional, but if deserves being grouped with
189
+ # temperature and utilization
190
+ reps = "%(C1)s[{entry[index]}]%(C0)s " \
191
+ "%(CName)s{entry[name]:{gpuname_width}}%(C0)s |" \
192
+ "%(CTemp)s{entry[temperature.gpu]:>3}'C%(C0)s, " \
193
+ "%(CUtil)s{entry[utilization.gpu]:>3} %%%(C0)s"
194
194
195
195
if show_power :
196
196
reps += ", %(CPowU)s{entry[power.draw]:>3}%(C0)s "
@@ -200,31 +200,40 @@ def _repr(v, none_value='??'):
200
200
else :
201
201
reps += "%(CPowU)sW%(C0)s"
202
202
203
- reps += " | %(C1)s%(CMemU)s{entry[memory.used]:>5}%(C0)s / %(CMemT)s{entry[memory.total]:>5}%(C0)s MB"
203
+ reps += " | %(C1)s%(CMemU)s{entry[memory.used]:>5}%(C0)s " \
204
+ "/ %(CMemT)s{entry[memory.total]:>5}%(C0)s MB"
204
205
reps = (reps ) % colors
205
- reps = reps .format (entry = {k : _repr (v ) for ( k , v ) in self .entry .items ()},
206
+ reps = reps .format (entry = {k : _repr (v ) for k , v in self .entry .items ()},
206
207
gpuname_width = gpuname_width )
207
208
reps += " |"
208
209
209
210
def process_repr (p ):
210
211
r = ''
211
212
if not show_cmd or show_user :
212
- r += "{CUser}{}{C0}" .format (_repr (p ['username' ], '--' ), ** colors )
213
+ r += "{CUser}{}{C0}" .format (
214
+ _repr (p ['username' ], '--' ), ** colors
215
+ )
213
216
if show_cmd :
214
- if r : r += ':'
215
- r += "{C1}{}{C0}" .format (_repr (p .get ('command' , p ['pid' ]), '--' ), ** colors )
217
+ if r :
218
+ r += ':'
219
+ r += "{C1}{}{C0}" .format (
220
+ _repr (p .get ('command' , p ['pid' ]), '--' ), ** colors
221
+ )
216
222
217
223
if show_pid :
218
224
r += ("/%s" % _repr (p ['pid' ], '--' ))
219
- r += '({CMemP}{}M{C0})' .format (_repr (p ['gpu_memory_usage' ], '?' ), ** colors )
225
+ r += '({CMemP}{}M{C0})' .format (
226
+ _repr (p ['gpu_memory_usage' ], '?' ), ** colors
227
+ )
220
228
return r
221
229
222
- if self .entry ['processes' ] is not None :
223
- for p in self .entry ['processes' ]:
230
+ processes = self .entry ['processes' ]
231
+ if processes :
232
+ for p in processes :
224
233
reps += ' ' + process_repr (p )
225
234
else :
226
235
# None (not available)
227
- reps += ' (Not Supported)'
236
+ reps += ' ({})' . format ( NOT_SUPPORTED )
228
237
229
238
fp .write (reps )
230
239
return fp
@@ -259,14 +268,16 @@ def get_process_info(nv_process):
259
268
process = {}
260
269
ps_process = psutil .Process (pid = nv_process .pid )
261
270
process ['username' ] = ps_process .username ()
262
- # cmdline returns full path; as in `ps -o comm`, get short cmdnames.
271
+ # cmdline returns full path;
272
+ # as in `ps -o comm`, get short cmdnames.
263
273
_cmdline = ps_process .cmdline ()
264
- if not _cmdline : # sometimes, zombie or unknown (e.g. [kworker/8:2H])
274
+ if not _cmdline :
275
+ # sometimes, zombie or unknown (e.g. [kworker/8:2H])
265
276
process ['command' ] = '?'
266
277
else :
267
278
process ['command' ] = os .path .basename (_cmdline [0 ])
268
279
# Bytes to MBytes
269
- process ['gpu_memory_usage' ] = int ( nv_process .usedGpuMemory / 1024 / 1024 )
280
+ process ['gpu_memory_usage' ] = nv_process .usedGpuMemory // MB
270
281
process ['pid' ] = nv_process .pid
271
282
return process
272
283
@@ -279,12 +290,14 @@ def _decode(b):
279
290
uuid = _decode (N .nvmlDeviceGetUUID (handle ))
280
291
281
292
try :
282
- temperature = N .nvmlDeviceGetTemperature (handle , N .NVML_TEMPERATURE_GPU )
293
+ temperature = N .nvmlDeviceGetTemperature (
294
+ handle , N .NVML_TEMPERATURE_GPU
295
+ )
283
296
except N .NVMLError :
284
297
temperature = None # Not supported
285
298
286
299
try :
287
- memory = N .nvmlDeviceGetMemoryInfo (handle ) # in Bytes
300
+ memory = N .nvmlDeviceGetMemoryInfo (handle ) # in Bytes
288
301
except N .NVMLError :
289
302
memory = None # Not supported
290
303
@@ -295,39 +308,39 @@ def _decode(b):
295
308
296
309
try :
297
310
power = N .nvmlDeviceGetPowerUsage (handle )
298
- except :
311
+ except N . NVMLError :
299
312
power = None
300
313
301
314
try :
302
315
power_limit = N .nvmlDeviceGetEnforcedPowerLimit (handle )
303
- except :
316
+ except N . NVMLError :
304
317
power_limit = None
305
318
306
319
processes = []
307
320
try :
308
- nv_comp_processes = N .nvmlDeviceGetComputeRunningProcesses (handle )
321
+ nv_comp_processes = \
322
+ N .nvmlDeviceGetComputeRunningProcesses (handle )
309
323
except N .NVMLError :
310
324
nv_comp_processes = None # Not supported
311
325
try :
312
- nv_graphics_processes = N .nvmlDeviceGetGraphicsRunningProcesses (handle )
326
+ nv_graphics_processes = \
327
+ N .nvmlDeviceGetGraphicsRunningProcesses (handle )
313
328
except N .NVMLError :
314
329
nv_graphics_processes = None # Not supported
315
330
316
- if nv_comp_processes is None and nv_graphics_processes is None :
317
- processes = None # Not supported (in both cases)
318
- else :
319
- nv_comp_processes = nv_comp_processes or []
320
- nv_graphics_processes = nv_graphics_processes or []
321
- for nv_process in (nv_comp_processes + nv_graphics_processes ):
322
- # TODO: could be more information such as system memory usage,
323
- # CPU percentage, create time etc.
324
- try :
325
- process = get_process_info (nv_process )
326
- processes .append (process )
327
- except psutil .NoSuchProcess :
328
- # TODO: add some reminder for NVML broken context
329
- # e.g. nvidia-smi reset or reboot the system
330
- pass
331
+ processes = []
332
+ nv_comp_processes = nv_comp_processes or []
333
+ nv_graphics_processes = nv_graphics_processes or []
334
+ for nv_process in nv_comp_processes + nv_graphics_processes :
335
+ # TODO: could be more information such as system memory usage,
336
+ # CPU percentage, create time etc.
337
+ try :
338
+ process = get_process_info (nv_process )
339
+ processes .append (process )
340
+ except psutil .NoSuchProcess :
341
+ # TODO: add some reminder for NVML broken context
342
+ # e.g. nvidia-smi reset or reboot the system
343
+ pass
331
344
332
345
index = N .nvmlDeviceGetIndex (handle )
333
346
gpu_info = {
@@ -336,11 +349,12 @@ def _decode(b):
336
349
'name' : name ,
337
350
'temperature.gpu' : temperature ,
338
351
'utilization.gpu' : utilization .gpu if utilization else None ,
339
- 'power.draw' : int (power / 1000 ) if power is not None else None ,
340
- 'enforced.power.limit' : int (power_limit / 1000 ) if power_limit is not None else None ,
352
+ 'power.draw' : power // 1000 if power is not None else None ,
353
+ 'enforced.power.limit' : power_limit // 1000
354
+ if power_limit is not None else None ,
341
355
# Convert bytes into MBytes
342
- 'memory.used' : int ( memory .used / 1024 / 1024 ) if memory else None ,
343
- 'memory.total' : int ( memory .total / 1024 / 1024 ) if memory else None ,
356
+ 'memory.used' : memory .used // MB if memory else None ,
357
+ 'memory.total' : memory .total // MB if memory else None ,
344
358
'processes' : processes ,
345
359
}
346
360
return gpu_info
@@ -382,7 +396,8 @@ def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
382
396
):
383
397
# ANSI color configuration
384
398
if force_color and no_color :
385
- raise ValueError ("--color and --no_color can't be used at the same time" )
399
+ raise ValueError ("--color and --no_color can't"
400
+ " be used at the same time" )
386
401
387
402
if force_color :
388
403
t_color = Terminal (kind = 'xterm-color' , force_styling = True )
@@ -395,17 +410,19 @@ def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
395
410
if show_header :
396
411
time_format = locale .nl_langinfo (locale .D_T_FMT )
397
412
398
- header_msg = '{t.bold_white}{hostname}{t.normal} {timestr}' .format (** {
399
- 'hostname' : self .hostname ,
400
- 'timestr' : self .query_time .strftime (time_format ),
401
- 't' : t_color ,
402
- })
413
+ header_template = '{t.bold_white}{hostname}{t.normal} {timestr}'
414
+ header_msg = header_template .format (
415
+ hostname = self .hostname ,
416
+ timestr = self .query_time .strftime (time_format ),
417
+ t = t_color ,
418
+ )
403
419
404
420
fp .write (header_msg )
405
421
fp .write ('\n ' )
406
422
407
423
# body
408
- gpuname_width = max ([gpuname_width ] + [len (g .entry ['name' ]) for g in self ])
424
+ entry_name_width = [len (g .entry ['name' ]) for g in self ]
425
+ gpuname_width = max ([gpuname_width ] + entry_name_width )
409
426
for g in self :
410
427
g .print_to (fp ,
411
428
show_cmd = show_cmd ,
0 commit comments