C# Fias XML解析器：快速批量加载FIAS数据库

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Train a YOLOv5 model on a custom dataset. Models and datasets download automatically from the latest YOLOv5 release. Usage - Single-GPU training: $ python train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (recommended) $ python train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch Usage - Multi-GPU DDP training: $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 train.py --data coco128.yaml --weights yolov5s.pt --img 640 --device 0,1,2,3 Models: https://github.com/ultralytics/yolov5/tree/master/models Datasets: https://github.com/ultralytics/yolov5/tree/master/data Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data """ import argparse import math import os os.environ["GIT_PYTHON_REFRESH"] = "quiet" # add there import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm # import numpy # import torch.serialization # torch.serialization.add_safe_globals([numpy._core.multiarray._reconstruct]) FILE = Path(file).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import Loggers from utils.loggers.comet.comet_utils import check_comet_resume from utils.loss import ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = check_git_info() def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance # Register actions for k in methods(loggers): callbacks.register_action(k, callback=getattr(loggers, k)) # Process custom dataset artifact link data_dict = loggers.remote_dataset if resume: # If resuming runs from remote artifact weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Config plots = not evolve and not opt.noplots # create plots cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu', weights_only=False) # load checkpoint to CPU to avoid CUDA memory leak model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] = batch_size accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '), shuffle=True) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, prefix=colorstr('val: '))[0] if not resume: if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) hyp['box'] = 3 / nl # scale to layers hyp['cls'] = nc / 80 * 3 / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 7) % ('Epoch', 'GPU_mem', 'box_loss', 'obj_loss', 'cls_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward # with torch.cuda.amp.autocast(amp): with torch.amp.autocast(device_type='cuda'): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss = WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss = 4. # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 5) % (f'{epoch}/{epochs - 1}', mem, mloss, targets.shape[0], imgs.shape[-1])) callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss)) if callbacks.stop_training: return # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, [email protected], [email protected]] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss) # val best model with plots if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) callbacks.run('on_train_end', last, best, epoch, results) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default='./weights/yolov5s.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='./models/yolov5s.yaml', help='model.yaml path') parser.add_argument('--data', type=str, default=r'C:data/AAAA.yaml', help='data.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') parser.add_argument('--name', default='welding_defect_yolov5s_20241101_300', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=5, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') # Logger arguments parser.add_argument('--entity', default=None, help='Entity') parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option') parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval') parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) check_git_status() check_requirements() # Resume (from specified or most recent last.pt) if opt.resume and not check_comet_resume(opt) and not opt.evolve: last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLOv5 Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss') print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if name == "main": opt = parse_opt() main(opt) 为什么训练之后，他的runs里面并没有显示best.pt跟last.pt 请查找原因

```pythonimportnumpyfrom torch.serialization importsafe_globalswithsafe_globals()as _:#在上下文中添加允许的全局对象safe_globals().add('numpy._core.multiarray._reconstruct', numpy._core.multiarray._...

import argparse import math import os os.environ["GIT_PYTHON_REFRESH"] = "quiet" import random import subprocess import sys import time from copy import deepcopy from datetime import datetime, timedelta from pathlib import Path try: import comet_ml # must be imported before torch (if installed) except ImportError: comet_ml = None import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(file).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.downloads import attempt_download, is_url from utils.general import ( LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save, ) from utils.loggers import LOGGERS, Loggers from utils.loggers.comet.comet_utils import check_comet_resume from utils.loss import ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve from utils.torch_utils import ( EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first, ) LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv("RANK", -1)) WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1)) GIT_INFO = check_git_info() def train(hyp, opt, device, callbacks): save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = ( Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, ) callbacks.run("on_pretrain_routine_start") # Directories w = save_dir / "weights" # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / "last.pt", w / "best.pt" # Hyperparameters if isinstance(hyp, str): with open(hyp, errors="ignore") as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items())) opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / "hyp.yaml", hyp) yaml_save(save_dir / "opt.yaml", vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: include_loggers = list(LOGGERS) if getattr(opt, "ndjson_console", False): include_loggers.append("ndjson_console") if getattr(opt, "ndjson_file", False): include_loggers.append("ndjson_file") loggers = Loggers( save_dir=save_dir, weights=weights, opt=opt, hyp=hyp, logger=LOGGER, include=tuple(include_loggers), ) # Register actions for k in methods(loggers): callbacks.register_action(k, callback=getattr(loggers, k)) # Process custom dataset artifact link data_dict = loggers.remote_dataset if resume: # If resuming runs from remote artifact weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Config plots = not evolve and not opt.noplots # create plots cuda = device.type != "cpu" init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict["train"], data_dict["val"] nc = 1 if single_cls else int(data_dict["nc"]) # number of classes names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"] # class names is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt") # COCO dataset # Model check_suffix(weights, ".pt") # check weights pretrained = weights.endswith(".pt") if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location="cpu") # load checkpoint to CPU to avoid CUDA memory leak model = Model(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device) # create exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else [] # exclude keys csd = ckpt["model"].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}") # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f"freezing {k}") v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp["weight_decay"] = batch_size accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"]) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp["lrf"], epochs) # cosine 1->hyp['lrf'] else: def lf(x): """Linear learning rate scheduler function with decay calculated by epoch proportion.""" return (1 - x / epochs) * (1.0 - hyp["lrf"]) + hyp["lrf"] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning( "WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n" "See Multi-GPU Tutorial at https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training to get started." ) model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info("Using SyncBatchNorm()") # Trainloader train_loader, dataset = create_dataloader( train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == "val" else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr("train: "), shuffle=True, seed=opt.seed, ) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}" # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader( val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, prefix=colorstr("val: "), )[0] if not resume: if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision callbacks.run("on_pretrain_routine_end", labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) hyp["box"] = 3 / nl # scale to layers hyp["cls"] = nc / 80 * 3 / nl # scale to classes and layers hyp["obj"] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp["label_smoothing"] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp["warmup_epochs"] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run("on_train_start") LOGGER.info( f"Image sizes {imgsz} train, {imgsz} val\n" f"Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n" f"Logging results to {colorstr('bold', save_dir)}\n" f"Starting training for {epochs} epochs..." ) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ callbacks.run("on_train_epoch_start") model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(("\n" + "%11s" * 7) % ("Epoch", "GPU_mem", "box_loss", "obj_loss", "cls_loss", "Instances", "Size")) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- callbacks.run("on_train_batch_start") ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x["lr"] = np.interp(ni, xi, [hyp["warmup_bias_lr"] if j == 0 else 0.0, x["initial_lr"] * lf(epoch)]) if "momentum" in x: x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]]) # Multi-scale if opt.multi_scale: sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss = WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss = 4.0 # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f"{torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0:.3g}G" # (GB) pbar.set_description( ("%11s" * 2 + "%11.4g" * 5) % (f"{epoch}/{epochs - 1}", mem, mloss, targets.shape[0], imgs.shape[-1]) ) callbacks.run("on_train_batch_end", model, ni, imgs, targets, paths, list(mloss)) if callbacks.stop_training: return # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x["lr"] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP callbacks.run("on_train_epoch_end", epoch=epoch) ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"]) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss, ) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, [email protected], [email protected]] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr callbacks.run("on_fit_epoch_end", log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { "epoch": epoch, "best_fitness": best_fitness, "model": deepcopy(de_parallel(model)).half(), "ema": deepcopy(ema.ema).half(), "updates": ema.updates, "optimizer": optimizer.state_dict(), "opt": vars(opt), "git": GIT_INFO, # {remote, branch, commit} if a git repo "date": datetime.now().isoformat(), } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f"epoch{epoch}.pt") del ckpt callbacks.run("on_model_save", last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.") for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f"\nValidating {f}...") results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss, ) # val best model with plots if is_coco: callbacks.run("on_fit_epoch_end", list(mloss) + list(results) + lr, epoch, best_fitness, fi) callbacks.run("on_train_end", last, best, epoch, results) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path") parser.add_argument("--cfg", type=str, default="A_dataset/yolov5s.yaml", help="model.yaml path") parser.add_argument("--data", type=str, default=ROOT / "A_dataset/dataset.yaml", help="dataset.yaml path") parser.add_argument("--hyp", type=str, default=ROOT / "data/hyps/hyp.scratch-low.yaml", help="hyperparameters path") parser.add_argument("--epochs", type=int, default=100, help="total training epochs") parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs, -1 for autobatch") parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)") parser.add_argument("--rect", action="store_true", help="rectangular training") parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training") parser.add_argument("--nosave", action="store_true", help="only save final checkpoint") parser.add_argument("--noval", action="store_true", help="only validate final epoch") parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor") parser.add_argument("--noplots", action="store_true", help="save no plot files") parser.add_argument("--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations") parser.add_argument( "--evolve_population", type=str, default=ROOT / "data/hyps", help="location for loading population" ) parser.add_argument("--resume_evolve", type=str, default=None, help="resume evolve from last generation") parser.add_argument("--bucket", type=str, default="", help="gsutil bucket") parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk") parser.add_argument("--image-weights", action="store_true", help="use weighted image selection for training") parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu") parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%") parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class") parser.add_argument("--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer") parser.add_argument("--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode") parser.add_argument("--workers", type=int, default=0, help="max dataloader workers (per RANK in DDP mode)") parser.add_argument("--project", default=ROOT / "runs/train", help="save to project/name") parser.add_argument("--name", default="exp", help="save to project/name") parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment") parser.add_argument("--quad", action="store_true", help="quad dataloader") parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler") parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon") parser.add_argument("--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)") parser.add_argument("--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2") parser.add_argument("--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)") parser.add_argument("--seed", type=int, default=0, help="Global training seed") parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify") # Logger arguments parser.add_argument("--entity", default=None, help="Entity") parser.add_argument("--upload_dataset", nargs="?", const=True, default=False, help='Upload data, "val" option') parser.add_argument("--bbox_interval", type=int, default=-1, help="Set bounding-box image logging interval") parser.add_argument("--artifact_alias", type=str, default="latest", help="Version of dataset artifact to use") # NDJSON logging parser.add_argument("--ndjson-console", action="store_true", help="Log ndjson to console") parser.add_argument("--ndjson-file", action="store_true", help="Log ndjson to file") return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): if RANK in {-1, 0}: print_args(vars(opt)) check_git_status() check_requirements(ROOT / "requirements.txt") # Resume (from specified or most recent last.pt) if opt.resume and not check_comet_resume(opt) and not opt.evolve: last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / "opt.yaml" # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors="ignore") as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location="cpu")["opt"] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = "", str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = ( check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project), ) # checks assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified" if opt.evolve: if opt.project == str(ROOT / "runs/train"): # if default project name, rename to runs/evolve opt.project = str(ROOT / "runs/evolve") opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == "cfg": opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = "is not compatible with YOLOv5 Multi-GPU DDP training" assert not opt.image_weights, f"--image-weights {msg}" assert not opt.evolve, f"--evolve {msg}" assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size" assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE" assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command" torch.cuda.set_device(LOCAL_RANK) device = torch.device("cuda", LOCAL_RANK) dist.init_process_group( backend="nccl" if dist.is_nccl_available() else "gloo", timeout=timedelta(seconds=10800) ) # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (including this hyperparameter True-False, lower_limit, upper_limit) meta = { "lr0": (False, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) "lrf": (False, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) "momentum": (False, 0.6, 0.98), # SGD momentum/Adam beta1 "weight_decay": (False, 0.0, 0.001), # optimizer weight decay "warmup_epochs": (False, 0.0, 5.0), # warmup epochs (fractions ok) "warmup_momentum": (False, 0.0, 0.95), # warmup initial momentum "warmup_bias_lr": (False, 0.0, 0.2), # warmup initial bias lr "box": (False, 0.02, 0.2), # box loss gain "cls": (False, 0.2, 4.0), # cls loss gain "cls_pw": (False, 0.5, 2.0), # cls BCELoss positive_weight "obj": (False, 0.2, 4.0), # obj loss gain (scale with pixels) "obj_pw": (False, 0.5, 2.0), # obj BCELoss positive_weight "iou_t": (False, 0.1, 0.7), # IoU training threshold "anchor_t": (False, 2.0, 8.0), # anchor-multiple threshold "anchors": (False, 2.0, 10.0), # anchors per output grid (0 to ignore) "fl_gamma": (False, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) "hsv_h": (True, 0.0, 0.1), # image HSV-Hue augmentation (fraction) "hsv_s": (True, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) "hsv_v": (True, 0.0, 0.9), # image HSV-Value augmentation (fraction) "degrees": (True, 0.0, 45.0), # image rotation (+/- deg) "translate": (True, 0.0, 0.9), # image translation (+/- fraction) "scale": (True, 0.0, 0.9), # image scale (+/- gain) "shear": (True, 0.0, 10.0), # image shear (+/- deg) "perspective": (True, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 "flipud": (True, 0.0, 1.0), # image flip up-down (probability) "fliplr": (True, 0.0, 1.0), # image flip left-right (probability) "mosaic": (True, 0.0, 1.0), # image mosaic (probability) "mixup": (True, 0.0, 1.0), # image mixup (probability) "copy_paste": (True, 0.0, 1.0), # segment copy-paste (probability) } # GA configs pop_size = 50 mutation_rate_min = 0.01 mutation_rate_max = 0.5 crossover_rate_min = 0.5 crossover_rate_max = 1 min_elite_size = 2 max_elite_size = 5 tournament_size_min = 2 tournament_size_max = 10 with open(opt.hyp, errors="ignore") as f: hyp = yaml.safe_load(f) # load hyps dict if "anchors" not in hyp: # anchors commented in hyp.yaml hyp["anchors"] = 3 if opt.noautoanchor: del hyp["anchors"], meta["anchors"] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / "hyp_evolve.yaml", save_dir / "evolve.csv" if opt.bucket: # download evolve.csv if exists subprocess.run( [ "gsutil", "cp", f"gs://{opt.bucket}/evolve.csv", str(evolve_csv), ] ) # Delete the items in meta dictionary whose first value is False del_ = [item for item, value_ in meta.items() if value_[0] is False] hyp_GA = hyp.copy() # Make a copy of hyp dictionary for item in del_: del meta[item] # Remove the item from meta dictionary del hyp_GA[item] # Remove the item from hyp_GA dictionary # Set lower_limit and upper_limit arrays to hold the search space boundaries lower_limit = np.array([meta[k][1] for k in hyp_GA.keys()]) upper_limit = np.array([meta[k][2] for k in hyp_GA.keys()]) # Create gene_ranges list to hold the range of values for each gene in the population gene_ranges = [(lower_limit[i], upper_limit[i]) for i in range(len(upper_limit))] # Initialize the population with initial_values or random values initial_values = [] # If resuming evolution from a previous checkpoint if opt.resume_evolve is not None: assert os.path.isfile(ROOT / opt.resume_evolve), "evolve population path is wrong!" with open(ROOT / opt.resume_evolve, errors="ignore") as f: evolve_population = yaml.safe_load(f) for value in evolve_population.values(): value = np.array([value[k] for k in hyp_GA.keys()]) initial_values.append(list(value)) # If not resuming from a previous checkpoint, generate initial values from .yaml files in opt.evolve_population else: yaml_files = [f for f in os.listdir(opt.evolve_population) if f.endswith(".yaml")] for file_name in yaml_files: with open(os.path.join(opt.evolve_population, file_name)) as yaml_file: value = yaml.safe_load(yaml_file) value = np.array([value[k] for k in hyp_GA.keys()]) initial_values.append(list(value)) # Generate random values within the search space for the rest of the population if initial_values is None: population = [generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size)] elif pop_size > 1: population = [generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size - len(initial_values))] for initial_value in initial_values: population = [initial_value] + population # Run the genetic algorithm for a fixed number of generations list_keys = list(hyp_GA.keys()) for generation in range(opt.evolve): if generation >= 1: save_dict = {} for i in range(len(population)): little_dict = {list_keys[j]: float(population[i][j]) for j in range(len(population[i]))} save_dict[f"gen{str(generation)}number{str(i)}"] = little_dict with open(save_dir / "evolve_population.yaml", "w") as outfile: yaml.dump(save_dict, outfile, default_flow_style=False) # Adaptive elite size elite_size = min_elite_size + int((max_elite_size - min_elite_size) * (generation / opt.evolve)) # Evaluate the fitness of each individual in the population fitness_scores = [] for individual in population: for key, value in zip(hyp_GA.keys(), individual): hyp_GA[key] = value hyp.update(hyp_GA) results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results keys = ( "metrics/precision", "metrics/recall", "metrics/mAP_0.5", "metrics/mAP_0.5:0.95", "val/box_loss", "val/obj_loss", "val/cls_loss", ) print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket) fitness_scores.append(results[2]) # Select the fittest individuals for reproduction using adaptive tournament selection selected_indices = [] for _ in range(pop_size - elite_size): # Adaptive tournament size tournament_size = max( max(2, tournament_size_min), int(min(tournament_size_max, pop_size) - (generation / (opt.evolve / 10))), ) # Perform tournament selection to choose the best individual tournament_indices = random.sample(range(pop_size), tournament_size) tournament_fitness = [fitness_scores[j] for j in tournament_indices] winner_index = tournament_indices[tournament_fitness.index(max(tournament_fitness))] selected_indices.append(winner_index) # Add the elite individuals to the selected indices elite_indices = [i for i in range(pop_size) if fitness_scores[i] in sorted(fitness_scores)[-elite_size:]] selected_indices.extend(elite_indices) # Create the next generation through crossover and mutation next_generation = [] for _ in range(pop_size): parent1_index = selected_indices[random.randint(0, pop_size - 1)] parent2_index = selected_indices[random.randint(0, pop_size - 1)] # Adaptive crossover rate crossover_rate = max( crossover_rate_min, min(crossover_rate_max, crossover_rate_max - (generation / opt.evolve)) ) if random.uniform(0, 1) < crossover_rate: crossover_point = random.randint(1, len(hyp_GA) - 1) child = population[parent1_index][:crossover_point] + population[parent2_index][crossover_point:] else: child = population[parent1_index] # Adaptive mutation rate mutation_rate = max( mutation_rate_min, min(mutation_rate_max, mutation_rate_max - (generation / opt.evolve)) ) for j in range(len(hyp_GA)): if random.uniform(0, 1) < mutation_rate: child[j] += random.uniform(-0.1, 0.1) child[j] = min(max(child[j], gene_ranges[j][0]), gene_ranges[j][1]) next_generation.append(child) # Replace the old population with the new generation population = next_generation # Print the best solution found best_index = fitness_scores.index(max(fitness_scores)) best_individual = population[best_index] print("Best solution found:", best_individual) # Plot results plot_evolve(evolve_csv) LOGGER.info( f"Hyperparameter evolution finished {opt.evolve} generations\n" f"Results saved to {colorstr('bold', save_dir)}\n" f"Usage example: $ python train.py --hyp {evolve_yaml}" ) def generate_individual(input_ranges, individual_length): individual = [] for i in range(individual_length): lower_bound, upper_bound = input_ranges[i] individual.append(random.uniform(lower_bound, upper_bound)) return individual def run(**kwargs): opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if name == "main": opt = parse_opt() main(opt) 这是yolov5自带的train训练代码，我现在想要修改它，让该代码简洁一些，并且仍然能完成训练

通常，训练脚本会处理数据加载、模型初始化、优化器设置、训练循环、验证、日志记录等。用户可能希望在不影响这些核心功能的前提下，删除冗余代码，合并重复的部分，或者简化参数解析。接下来，我需要考虑哪些部分...

#!/bin/bash # GPCR 分析流程 - 最新数据库版 # 工作目录: /home/cm/GPCR_project/He/ # CPU 核心数: 8 # 使用最新版数据库 # ===== 配置参数 ===== WORK_DIR="/media/edsb3/disk1/cm/GPCR_project/He" INPUT_FASTA="${WORK_DIR}/proteins.fasta" OUTPUT_DIR="${WORK_DIR}/GPCR_results" CPU=8 # 步骤控制参数 - 设置从哪个步骤开始 (1-6) # 设置为 1 表示从头开始，设置为 2 表示从步骤2开始，以此类推 START_STEP=4 # 初始化 Conda source /home/cm/miniconda3/etc/profile.d/conda.sh conda activate gpcr_analysis # ===== 准备目录 ===== mkdir -p ${OUTPUT_DIR}/{hmmer_results,tmhmm_results,gpcrdb_results,final_results} DB_DIR="${WORK_DIR}/bio_dbs" mkdir -p ${DB_DIR} cd ${WORK_DIR} # ===== 1. Pfam 结构域扫描 ===== if [ $START_STEP -le 1 ]; then echo "=== 步骤1: Pfam 数据库扫描 (使用 ${CPU} 核心) ===" DOMAINS=("PF00001" "PF00002" "PF00003" "PF10324" "PF10328" "PF01534" "PF06814") # 下载最新版 Pfam 数据库 if [ ! -f "${DB_DIR}/Pfam-A.hmm" ]; then echo "下载最新版 Pfam 数据库..." wget -P ${DB_DIR} https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz gunzip ${DB_DIR}/Pfam-A.hmm.gz hmmpress ${DB_DIR}/Pfam-A.hmm fi # 创建GPCR结构域子集（增加错误检查） echo "创建GPCR结构域子集..." rm -f ${DB_DIR}/GPCR_domains.hmm* 2>/dev/null for domain in "${DOMAINS[@]}"; do echo "提取结构域: $domain" hmmfetch ${DB_DIR}/Pfam-A.hmm $domain > ${DB_DIR}/${domain}.hmm # 检查是否提取成功 if [ ! -s "${DB_DIR}/${domain}.hmm" ]; then echo "警告: 无法提取结构域 $domain，尝试完整数据库扫描" # 如果提取失败，直接使用完整数据库 cp ${DB_DIR}/Pfam-A.hmm ${DB_DIR}/GPCR_domains.hmm break fi done # 如果所有结构域都提取成功，则合并它们 if [ ! -f "${DB_DIR}/GPCR_domains.hmm" ]; then cat ${DB_DIR}/PF.hmm > ${DB_DIR}/GPCR_domains.hmm fi # 创建索引 hmmpress ${DB_DIR}/GPCR_domains.hmm # 运行hmmscan echo "扫描GPCR结构域 (使用 ${CPU} 核心)..." hmmscan --cpu ${CPU} \ --tblout ${OUTPUT_DIR}/hmmer_results/pfam.tblout \ --domtblout ${OUTPUT_DIR}/hmmer_results/pfam.domtblout \ ${DB_DIR}/GPCR_domains.hmm \ ${INPUT_FASTA} # 检查hmmscan是否成功 if [ ! -s "${OUTPUT_DIR}/hmmer_results/pfam.tblout" ]; then echo "警告: hmmscan 未生成有效输出，尝试使用完整Pfam数据库" hmmscan --cpu ${CPU} \ --tblout ${OUTPUT_DIR}/hmmer_results/pfam.tblout \ --domtblout ${OUTPUT_DIR}/hmmer_results/pfam.domtblout \ ${DB_DIR}/Pfam-A.hmm \ ${INPUT_FASTA} fi fi # ===== 2. InterPro 远程注释 ===== if [ $START_STEP -le 2 ]; then echo "=== 步骤2: 使用本地InterProScan进行注释 ===" # 确保在正确的conda环境 source /home/cm/miniconda3/etc/profile.d/conda.sh conda activate gpcr_analysis # 1. 设置本地InterProScan路径 IPRSCAN_DIR="/media/edsb3/disk1/cm/GPCR_project/He/interproscan-5.75-106.0" IPRSCAN_SCRIPT="${IPRSCAN_DIR}/interproscan.sh" # 2. 验证安装 if [ ! -f "${IPRSCAN_SCRIPT}" ]; then echo "错误: 找不到interproscan.sh脚本! 请检查路径: ${IPRSCAN_SCRIPT}" exit 1 fi echo "使用本地InterProScan: ${IPRSCAN_SCRIPT}" # 3. 确保Java环境 echo "配置Java环境..." if ! command -v java &> /dev/null; then echo "安装Java..." conda install -c conda-forge -y openjdk=17 fi echo "Java版本: $(java -version 2>&1 | head -1)" # 4. 初始化InterProScan（如果未初始化） if [ ! -f "${IPRSCAN_DIR}/interproscan.properties" ]; then echo "初始化InterProScan..." cd ${IPRSCAN_DIR} python3 setup.py interproscan.properties cd ${WORK_DIR} fi # 5. 准备输出目录 mkdir -p ${OUTPUT_DIR}/hmmer_results # 6. 创建清洁的FASTA文件（移除星号） echo "创建清洁的FASTA文件（移除星号）..." CLEAN_FASTA="${OUTPUT_DIR}/hmmer_results/clean_proteins.fasta" # 使用AWK移除序列中的星号 awk '{ if (/^>/) { # 标题行直接打印 print } else { # 序列行移除星号 gsub(/\/, "", $0) print } }' ${INPUT_FASTA} > ${CLEAN_FASTA} # 验证清洁文件 if [ ! -s "${CLEAN_FASTA}" ]; then echo "错误: 清洁FASTA文件创建失败!" exit 1 fi # 7. 运行Interpro注释 echo "运行Interpro注释..." INTERPRO_OUTPUT="${OUTPUT_DIR}/hmmer_results/interpro_results.xml" LOG_FILE="${OUTPUT_DIR}/hmmer_results/interpro_scan.log" # 设置Java内存参数 export JAVA_OPTS="-Xmx8G" # 运行命令 ${IPRSCAN_SCRIPT} \ -i ${CLEAN_FASTA} \ -f XML \ -o ${INTERPRO_OUTPUT} \ -appl SUPERFAMILY \ -goterms \ -pa \ -iprlookup \ -verbose 2>&1 | tee ${LOG_FILE} # 8. 提取包含SSF81321结构域的蛋白质名称 echo "提取GPCR相关结构域 (SSF81321)..." python3 <<EOF import xml.etree.ElementTree as ET import re # 定义命名空间 ns = {'ipr': 'https://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/schemas'} try: # 解析XML文件 tree = ET.parse('${INTERPRO_OUTPUT}') root = tree.getroot() # 存储结果 gpcr_proteins = set() # 遍历所有蛋白质元素 for protein in root.findall('ipr:protein', ns): # 获取蛋白质名称（来自<xref>标签的name属性） xref = protein.find('ipr:xref', ns) if xref is None: continue protein_name = xref.get('name') if protein_name is None: continue # 检查匹配项 matches = protein.find('ipr:matches', ns) if matches is None: continue # 遍历所有匹配 for match in matches.findall('ipr:superfamilyhmmer3-match', ns): signature = match.find('ipr:signature', ns) if signature is None: continue # 检查是否为SSF81321 if signature.get('ac') == 'SSF81321': # 提取蛋白质名称的第一部分 name_parts = protein_name.split() if name_parts: gpcr_proteins.add(name_parts[0]) else: gpcr_proteins.add(protein_name) break # 找到一个匹配就足够 # 保存结果 with open('${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt', 'w') as f: for protein in sorted(gpcr_proteins): f.write(protein + '\n') print(f'找到 {len(gpcr_proteins)} 个包含SSF81321结构域的蛋白质') except Exception as e: print(f"XML解析错误: {str(e)}") print("使用备用方法...") # 备用方法：正则表达式解析 content = open('${INTERPRO_OUTPUT}', 'r').read() # 查找所有蛋白质块 protein_blocks = re.findall(r'<protein\b[^>]>(.?)</protein>', content, re.DOTALL) results = set() for block in protein_blocks: # 检查是否有SSF81321 if 'ac="SSF81321"' not in block: continue # 提取蛋白质名称 name_match = re.search(r'<xref\s[^>]name="([^"]+)"', block) if name_match: name = name_match.group(1).split()[0] # 取第一部分 results.add(name) # 保存结果 with open('${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt', 'w') as f: for protein in sorted(results): f.write(protein + '\n') print(f'使用备用方法找到 {len(results)} 个包含SSF81321结构域的蛋白质') EOF # 9. 验证结果 if [ -s "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" ]; then COUNT=$(wc -l < "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt") echo "Interpro注释完成! 找到 ${COUNT} 个GPCR相关蛋白" echo "前5个候选蛋白:" head -n 5 "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" else echo "警告: 未找到包含SSF81321结构域的蛋白质" # 直接使用grep提取 grep -B 5 'signature_ac="SSF81321"' "${INTERPRO_OUTPUT}" | grep 'protein name=' | awk -F'"' '{print $2}' | sort -u > "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" if [ -s "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" ]; then COUNT=$(wc -l < "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt") echo "使用grep找到 ${COUNT} 个候选" else touch "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" fi fi fi # ===== 3. 合并候选分子 ===== if [ $START_STEP -le 3 ]; then echo "=== 步骤3: 合并候选GPCR分子 ====" if [ -f "${OUTPUT_DIR}/hmmer_results/pfam.tblout" ]; then awk '! /^#/ {print $1}' ${OUTPUT_DIR}/hmmer_results/pfam.tblout | sort -u \ > ${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt else echo "警告: Pfam结果文件缺失" touch ${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt fi cat ${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt \ ${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt \ | sort -u > ${OUTPUT_DIR}/hmmer_results/all_candidates.txt # 提取候选序列 if [ -s "${OUTPUT_DIR}/hmmer_results/all_candidates.txt" ]; then seqkit grep -f ${OUTPUT_DIR}/hmmer_results/all_candidates.txt \ ${INPUT_FASTA} > ${OUTPUT_DIR}/final_results/candidate_sequences.fasta else echo "警告: 无候选序列" touch ${OUTPUT_DIR}/final_results/candidate_sequences.fasta fi fi # ===== 4. DeepTMHMM 预测 (独立 Python 实现) ===== if [ $START_STEP -le 4 ]; then echo "=== 步骤4: 直接运行 DeepTMHMM Python 实现 ===" INPUT_FILE="${OUTPUT_DIR}/final_results/candidate_sequences.fasta" OUTPUT_DIR_TMHMM="${OUTPUT_DIR}/tmhmm_results" # 检查是否有候选序列 if [ ! -s "$INPUT_FILE" ]; then echo "警告: 没有候选序列，跳过 DeepTMHMM 分析" exit 0 fi # 创建输出目录 mkdir -p "${OUTPUT_DIR_TMHMM}" # 激活分析环境 source /home/cm/miniconda3/etc/profile.d/conda.sh conda activate gpcr_analysis # 1. 下载 DeepTMHMM 代码 echo "下载 DeepTMHMM 代码..." DEEPTMHMM_DIR="${WORK_DIR}/DeepTMHMM" DEEPTMHMM_ZIP="${WORK_DIR}/DeepTMHMM.zip" if [ ! -d "${DEEPTMHMM_DIR}" ]; then wget -O "${DEEPTMHMM_ZIP}" https://github.com/ElofssonLab/DeepTMHMM/archive/refs/heads/main.zip unzip -q -d "${WORK_DIR}" "${DEEPTMHMM_ZIP}" mv "${WORK_DIR}/DeepTMHMM-main" "${DEEPTMHMM_DIR}" rm "${DEEPTMHMM_ZIP}" else echo "使用已有的 DeepTMHMM 目录" fi # 2. 安装依赖 echo "安装依赖..." pip install -q tensorflow==2.15.0 protobuf==3.20.3 # 3. 运行预测 echo "运行 DeepTMHMM 预测..." cd "${DEEPTMHMM_DIR}" # 创建运行脚本 cat > run_deeptmhmm.py <<EOF import os import sys import argparse from predict import predict def main(): parser = argparse.ArgumentParser(description='Run DeepTMHMM') parser.add_argument('--fasta', required=True, help='Input FASTA file') parser.add_argument('--out', required=True, help='Output directory') args = parser.parse_args() # 创建命名空间对象 class Args: pass args_obj = Args() args_obj.fasta = args.fasta args_obj.out = args.out args_obj.batch_size = 1 args_obj.cpu = True args_obj.gpu = False # 确保输出目录存在 os.makedirs(args_obj.out, exist_ok=True) # 运行预测 predict(args_obj) if name == "main": main() EOF # 运行脚本 python run_deeptmhmm.py \ --fasta "${INPUT_FILE}" \ --out "${OUTPUT_DIR_TMHMM}" # 4. 处理结果 cd "${WORK_DIR}" if [ -f "${OUTPUT_DIR_TMHMM}/predicted_topologies.gff3" ]; then # 重命名结果文件 mv "${OUTPUT_DIR_TMHMM}/predicted_topologies.gff3" "${OUTPUT_DIR_TMHMM}/combined.gff3" # 统计跨膜螺旋 COUNT=$(grep -c "TMhelix" "${OUTPUT_DIR_TMHMM}/combined.gff3" || echo 0) echo "DeepTMHMM 预测完成! 检测到 ${COUNT} 个跨膜螺旋" echo "结果文件: ${OUTPUT_DIR_TMHMM}/combined.gff3" else echo "错误: 未生成预测结果文件" echo "请尝试手动运行:" echo "cd ${DEEPTMHMM_DIR}" echo "python predict.py --fasta ${INPUT_FILE} --out ${OUTPUT_DIR_TMHMM} --cpu" exit 1 fi fi # ===== 5. 合并结果并筛选 ===== if [ $START_STEP -le 5 ]; then echo "=== 步骤5: 合并结果并筛选 ====" # 合并所有GFF3文件 if ls ${OUTPUT_DIR}/tmhmm_results/_result.gff3 1> /dev/null 2>&1; then cat ${OUTPUT_DIR}/tmhmm_results/*_result.gff3 > ${OUTPUT_DIR}/tmhmm_results/combined.gff3 else echo "警告: 未找到任何GFF3结果文件" touch ${OUTPUT_DIR}/tmhmm_results/combined.gff3 fi # 提取有效跨膜蛋白 if [ -s "${OUTPUT_DIR}/tmhmm_results/combined.gff3" ]; then awk '$7=="TMhelix" {print $1}' ${OUTPUT_DIR}/tmhmm_results/combined.gff3 \ | sort | uniq -c | awk '$1>=3 && $1<=8 {print $2}' \ > ${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt # 提取有效候选序列 if [ -s "${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt" ]; then seqkit grep -f ${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt \ ${OUTPUT_DIR}/final_results/candidate_sequences.fasta \ > ${OUTPUT_DIR}/final_results/valid_tm_candidates.fasta else echo "警告: 未找到有效跨膜蛋白" touch ${OUTPUT_DIR}/final_results/valid_tm_candidates.fasta fi else echo "警告: combined.gff3文件为空，跳过跨膜筛选" touch ${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt touch ${OUTPUT_DIR}/final_results/valid_tm_candidates.fasta fi fi # ===== 6. 生成最终报告 ===== if [ $START_STEP -le 6 ]; then echo "=== 步骤6: 生成最终报告 ====" # 生成报告 { echo "GPCR 分析最终报告" echo "======================" echo "分析时间: $(date)" echo "工作目录: ${WORK_DIR}" echo "输入文件: ${INPUT_FASTA}" echo "总蛋白数: $(grep -c '>' ${INPUT_FASTA} || echo 0)" # Pfam结果 echo -e "\n[Pfam 结果]" if [ -f "${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt" ] && [ -s "${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt" ]; then echo "候选数: $(wc -l < ${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt)" echo "前5个候选蛋白:" head -n 5 ${OUTPUT_DIR}/hmmer_results/pfam_candidates.txt else echo "无Pfam候选或结果文件缺失" fi # Superfamily结果 echo -e "\n[Superfamily 结果]" if [ -f "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" ] && [ -s "${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt" ]; then echo "候选数: $(wc -l < ${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt)" echo "前5个候选蛋白:" head -n 5 ${OUTPUT_DIR}/hmmer_results/superfamily_gpcrs.txt else echo "无Superfamily候选或结果文件缺失" fi # 合并结果 echo -e "\n[合并候选]" if [ -f "${OUTPUT_DIR}/hmmer_results/all_candidates.txt" ] && [ -s "${OUTPUT_DIR}/hmmer_results/all_candidates.txt" ]; then echo "总数: $(wc -l < ${OUTPUT_DIR}/hmmer_results/all_candidates.txt)" else echo "无合并候选" fi # 跨膜结果 echo -e "\n[跨膜蛋白筛选]" if [ -f "${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt" ] && [ -s "${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt" ]; then echo "有效跨膜蛋白(3-8TM): $(wc -l < ${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt)" echo "前5个有效蛋白:" head -n 5 ${OUTPUT_DIR}/tmhmm_results/valid_tm_proteins.txt else echo "无有效跨膜蛋白" fi # 结果文件列表 echo -e "\n[结果文件]" find ${OUTPUT_DIR} -type f | sed "s|${WORK_DIR}/||" } > ${OUTPUT_DIR}/final_results/summary_report.txt echo "分析完成！结果保存在: ${OUTPUT_DIR}" echo "最终报告: ${OUTPUT_DIR}/final_results/summary_report.txt" fi fi 这是我主要工作的代码，其中的deeptmhmm分析要怎么改进

因此，优化方向包括：本地化运行、批量处理、结果解析自动化等。然而，DeepTMHMM目前没有官方本地版本。但我们可以通过以下方式优化流程： 1. **使用本地化的跨膜预测工具**：例如使用TMHMM的本地版本（如果...

KCONFIG-CONF(1) October 2017 KCONFIG-CONF(1) NAME kconfig-conf - Standalone implementation of the Linux Kconfig parser SYNOPSIS kconfig-conf [options] <KConfig_file> DESCRIPTION The kconfig toolkit support the Kconfig language and implement the parser and the configuration support associated to the KConfig files respecting the Linux kernel KConfig convention. At configuration time: • kconfig-mconf is based on ncurses (menuconfig) • kconfig-conf is based on dialog (config) • kconfig-gconf is based on GTK+ (gconfig) • kconfig-qconf is based on QT (qconfig) Associated tools: • kconfig-diff displays symbols diff between .config files OPTIONS --silentoldconfig Only for kconfig-conf, reload a given .config and regenerate header and command files accordingly. --allnoconfig Set all boolean configuration to no. --allyesconfig Set all boolean configuration to yes. --randconfig Generates a random configuration. ENVIRONMENT Environment variables for 'config' KCONFIG_CONFIG This environment variable can be used to specify a default kernel config file name to override the default name of ".config". KCONFIG_OVERWRITECONFIG If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not break symlinks when .config is a symlink to somewhere else. CONFIG_ If you set CONFIG_ in the environment, Kconfig will prefix all symbols with its value when saving the configuration, instead of using the default, "CONFIG_". Environment variables for '{allyes/allmod/allno/rand}config' KCONFIG_ALLCONFIG The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also use the environment variable KCONFIG_ALLCONFIG as a flag or a filename that contains config symbols that the user requires to be set to a specific value. If KCONFIG_ALLCONFIG is used without a filename where KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", "make config" checks for a file named "all{yes/mod/no/def/random}.config" (corresponding to the *config command that was used) for symbol values that are to be forced. If this file is not found, it checks for a file named "all.config" to contain forced values. This enables you to create "miniature" config (miniconfig) or custom config files containing just the config symbols that you are interested in. Then the kernel config system generates the full .config file, including symbols of your miniconfig file. This 'KCONFIG_ALLCONFIG' file is a config file which contains (usually a subset of all) preset config symbols. These variable settings are still subject to normal dependency checks. Examples: KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig or KCONFIG_ALLCONFIG=mini.config make allnoconfig or make KCONFIG_ALLCONFIG=mini.config allnoconfig These examples will disable most options (allnoconfig) but enable or disable the options that are explicitly listed in the specified mini-config files. Environment variables for 'randconfig' KCONFIG_SEED You can set this to the integer value used to seed the RNG, if you want to somehow debug the behaviour of the kconfig parser/frontends. If not set, the current time will be used. KCONFIG_PROBABILITY This variable can be used to skew the probabilities. See /usr/share/doc/kconfig-frontends/kconfig.txt.gz. Environment variables for 'silentoldconfig' KCONFIG_NOSILENTUPDATE If this variable has a non-blank value, it prevents silent kernel config updates (requires explicit updates). KCONFIG_AUTOCONFIG This environment variable can be set to specify the path name of the "auto.conf" file. Its default value is "include/config/auto.conf". KCONFIG_TRISTATE This environment variable can be set to specify the path name of the "tristate.conf" file. Its default value is "include/config/tristate.conf". KCONFIG_AUTOHEADER This environment variable can be set to specify the path name of the "autoconf.h" (header) file. Its default value is "include/generated/autoconf.h". HISTORY June 2017, Man page originally compiled by Philippe Thierry (phil at reseau-libre dot com) Philippe Thierry kconfig-conf Man Page KCONFIG-CONF(1)

fi ``` 这样修改后，应该可以正确生成头文件。如果仍然遇到问题，可以检查环境变量是否被正确传递，以及 `kconfig-conf` 工具是否确实支持通过环境变量指定头文件路径（根据手册，是支持的）。如果因为某些...

KCONFIG-CONF(1) October 2017 KCONFIG-CONF(1) NAME kconfig-conf - Standalone implementation of the Linux Kconfig parser SYNOPSIS kconfig-conf [options] <KConfig_file> DESCRIPTION The kconfig toolkit support the Kconfig language and implement the parser and the configuration support associated to the KConfig files respecting the Linux kernel KConfig convention. At configuration time: • kconfig-mconf is based on ncurses (menuconfig) • kconfig-conf is based on dialog (config) • kconfig-gconf is based on GTK+ (gconfig) • kconfig-qconf is based on QT (qconfig) Associated tools: • kconfig-diff displays symbols diff between .config files OPTIONS –silentoldconfig Only for kconfig-conf, reload a given .config and regenerate header and command files accordingly. --allnoconfig Set all boolean configuration to no. --allyesconfig Set all boolean configuration to yes. --randconfig Generates a random configuration. ENVIRONMENT Environment variables for ‘config’ KCONFIG_CONFIG This environment variable can be used to specify a default kernel config file name to override the default name of ".config". KCONFIG_OVERWRITECONFIG If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not break symlinks when .config is a symlink to somewhere else. CONFIG_ If you set CONFIG_ in the environment, Kconfig will prefix all symbols with its value when saving the configuration, instead of using the default, "CONFIG_". Environment variables for ‘{allyes/allmod/allno/rand}config’ KCONFIG_ALLCONFIG The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also use the environment variable KCONFIG_ALLCONFIG as a flag or a filename that contains config symbols that the user requires to be set to a specific value. If KCONFIG_ALLCONFIG is used without a filename where KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", "make config" checks for a file named "all{yes/mod/no/def/random}.config" (corresponding to the *config command that was used) for symbol values that are to be forced. If this file is not found, it checks for a file named "all.config" to contain forced values. This enables you to create "miniature" config (miniconfig) or custom config files containing just the config symbols that you are interested in. Then the kernel config system generates the full .config file, including symbols of your miniconfig file. This 'KCONFIG_ALLCONFIG' file is a config file which contains (usually a subset of all) preset config symbols. These variable settings are still subject to normal dependency checks. Examples: KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig or KCONFIG_ALLCONFIG=mini.config make allnoconfig or make KCONFIG_ALLCONFIG=mini.config allnoconfig These examples will disable most options (allnoconfig) but enable or disable the options that are explicitly listed in the specified mini-config files. Environment variables for ‘randconfig’ KCONFIG_SEED You can set this to the integer value used to seed the RNG, if you want to somehow debug the behaviour of the kconfig parser/frontends. If not set, the current time will be used. KCONFIG_PROBABILITY This variable can be used to skew the probabilities. See /usr/share/doc/kconfig-frontends/kconfig.txt.gz. Environment variables for 'silentoldconfig' KCONFIG_NOSILENTUPDATE If this variable has a non-blank value, it prevents silent kernel config updates (requires explicit updates). KCONFIG_AUTOCONFIG This environment variable can be set to specify the path name of the "auto.conf" file. Its default value is "include/config/auto.conf". KCONFIG_TRISTATE This environment variable can be set to specify the path name of the "tristate.conf" file. Its default value is "include/config/tristate.conf". KCONFIG_AUTOHEADER This environment variable can be set to specify the path name of the "autoconf.h" (header) file. Its default value is "include/generated/autoconf.h". HISTORY June 2017, Man page originally compiled by Philippe Thierry (phil at reseau-libre dot com) Philippe Thierry kconfig-conf Man Page KCONFIG-CONF(1) 这是手册

with open(file_path, 'r') as f: lines = f.readlines() # 检查未关闭的块 block_stack = [] for i, line in enumerate(lines, 1): # 忽略注释行 if line.strip().startswith('#'): continue # 检查块...

configure.ac如下无需修改 FFMPEG_ARCH_PLATFORM="" AS_CASE([$host_cpu], [x86_64], [FFMPEG_ARCH_PLATFORM="x86_64"], [arm*], [FFMPEG_ARCH_PLATFORM="arm"], [aarch64], [FFMPEG_ARCH_PLATFORM="aarch64"]) AM_CONDITIONAL([FFMPEG_ARCH_ARM], [test "$FFMPEG_ARCH_PLATFORM" = "arm" -o "$FFMPEG_ARCH_PLATFORM" = "aarch64"]) AS_CASE([$host_cpu], [riscv], [FFMPEG_ARCH_PLATFORM="riscv"], [riscv64], [FFMPEG_ARCH_PLATFORM="riscv64"]) AM_CONDITIONAL([FFMPEG_ARCH_RISC], [test "$FFMPEG_ARCH_PLATFORM" = "riscv" -o "$FFMPEG_ARCH_PLATFORM" = "riscv64"]) 如果我想在匹配上述configure.ac 使下面的SUBDIRS += heif_parser在符合FFMPEG_ARCH_ARM和FFMPEG_ARCH_RISC条件时都运行该怎么写？ SUBDIRS = subtitle if FFMPEG_ARCH_ARM || FFMPEG_ARCH_RISC SUBDIRS += heif_parser endif

好的，我现在需要解决如何在configure.ac文件中添加逻辑，使得当FFMPEG_ARCH_ARM或FFMPEG_ARCH_RISC条件满足时，SUBDIRS变量包含heif_parser。首先，我得回忆一下configure.ac的基本结构和常用宏。用户提到的是...

``` dataroot='/home/nvidia/chenboln/8SelfHDR-master/Data/dataset/HDR/Kalantari17' # set the path of dataset ckpt="./ckpt" gpu="1" # set the id of GPUs name="ahdrnet_stage1" # set the name of the folder where the model is stored net="AHDRNet" # can only choose 'AHDRNet', 'FSHDR', 'HDR-Transformer', or 'SCTNet' batch="16" niter="150" lr_decay="50" lr="0.0001" build_dir=$ckpt"/"$name if [ ! -d "$build_dir" ]; then mkdir $build_dir fi LOG=$build_dir/`date +%Y-%m-%d-%H-%M-%S`.txt # Step1: Training 1-Stage model python train.py \ --dataset_name sig17align --model selfhdr1 --name $name --network $net \ --dataroot $dataroot --patch_size 128 --print_freq 300 --gpu_ids $gpu \ --checkpoints_dir $ckpt --batch_size $batch --niter $niter --lr_decay_iters $lr_decay \ --lr $lr --lr_policy step --save_imgs False | tee $LOG```这段代码为sh脚本定义的训练参数，将其改为定义参数直接写在train.py文件中

with open(LOG_FILE, 'w+') as LOG: print('Training started...', file=LOG) # 训练过程... print('Training completed.', file=LOG) if __name__ == '__main__': import argparse parser = argparse....

你好，你好。

最近向学习《Orange's一个操作系统的实现》这本书，于是在gentoo下搭载实验环境。其实，只是安装nasm和bochs两个软件。nasm很好安装，gentoo的stage3默认已经安装，bochs可是大费周章，直接emerge报错。 checking for PCI host device mapping support... yes configure: Linux detected as

西门子STEP7系列PLC硬件组态教程.doc

C# Fias XML解析器：快速批量加载FIAS数据库

fias-parser:xml 格式的 FIAS 基础解析器

com.osbcp.cssparser:cssparser:1.5

sql-parser:用Python和C ++编写SQL解析器

FIAS XML格式基础解析器：实现数据导出至SQLite

Boot.img命令行魔法：自动化启动镜像创建的7个步骤

【tlslite.api深度剖析】：Python网络编程进阶的加密传输解决方案

【车载网络通信细节】：DoIP报文封装与解析全解

揭秘PostgreSQL架构：核心组件全解析与最佳实践

【MVS_WIN_STD_3.3.1日志分析】：日志信息挖掘专家，让问题无所遁形

自动化转换流程：编写脚本简化.a到.lib的操作指南

EDID解析工具使用教程：快速诊断显示问题的秘诀（EDID解析工具使用教程）

【GBFF文件深度解析】：字段类型与处理技巧精讲

你好，你好。

西门子STEP7系列PLC硬件组态教程.doc

最新资源