From 1e6551a8acf347cb09f8aa2d610b60bdad0df820 Mon Sep 17 00:00:00 2001 From: Sourcery AI <> Date: Thu, 5 Oct 2023 05:16:46 +0000 Subject: [PATCH] 'Refactored by Sourcery' --- lib/core/config.py | 6 +- lib/core/loss.py | 8 ++- lib/core/trainer.py | 8 ++- lib/data_utils/feature_extractor.py | 2 +- lib/data_utils/img_utils.py | 39 ++++++------- lib/data_utils/insta_utils.py | 22 +++----- lib/data_utils/kp_utils.py | 3 +- lib/data_utils/mpii3d_utils.py | 88 ++++++++++++++--------------- lib/data_utils/penn_action_utils.py | 14 +++-- lib/data_utils/posetrack_utils.py | 28 ++++----- lib/data_utils/threedpw_utils.py | 10 ++-- lib/dataset/amass.py | 6 +- lib/dataset/dataset_2d.py | 2 +- lib/dataset/dataset_3d.py | 23 +------- lib/dataset/inference.py | 7 +-- lib/dataset/insta.py | 10 ++-- lib/models/attention.py | 27 ++++----- lib/models/motion_discriminator.py | 10 ++-- lib/models/resnet.py | 36 ++++++++---- lib/models/smpl.py | 15 ++--- lib/models/spin.py | 33 +++++------ lib/smplify/prior.py | 12 ++-- lib/utils/demo_utils.py | 51 ++++++++++------- lib/utils/eval_utils.py | 8 +-- lib/utils/fbx_output.py | 34 ++++------- lib/utils/geometry.py | 55 +++++++++--------- lib/utils/pose_tracker.py | 12 ++-- lib/utils/smooth_pose.py | 8 +-- lib/utils/utils.py | 10 +--- lib/utils/vis.py | 49 +++++----------- tests/test_3d_datasets.py | 8 +-- 31 files changed, 293 insertions(+), 351 deletions(-) diff --git a/lib/core/config.py b/lib/core/config.py index 469ba4f..3e95561 100644 --- a/lib/core/config.py +++ b/lib/core/config.py @@ -125,9 +125,5 @@ def parse_args(): print(args, end='\n\n') cfg_file = args.cfg - if args.cfg is not None: - cfg = update_cfg(args.cfg) - else: - cfg = get_cfg_defaults() - + cfg = update_cfg(args.cfg) if args.cfg is not None else get_cfg_defaults() return cfg, cfg_file diff --git a/lib/core/loss.py b/lib/core/loss.py index 648abb4..bfc7834 100644 --- a/lib/core/loss.py +++ b/lib/core/loss.py @@ -155,8 +155,12 @@ def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d, openpose_weight, gt_ conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone() conf[:, :25] *= openpose_weight conf[:, 25:] *= gt_weight - loss = (conf * self.criterion_keypoints(pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean() - return loss + return ( + conf + * self.criterion_keypoints( + pred_keypoints_2d, gt_keypoints_2d[:, :, :-1] + ) + ).mean() def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d): """ diff --git a/lib/core/trainer.py b/lib/core/trainer.py index 1715e18..913dc33 100644 --- a/lib/core/trainer.py +++ b/lib/core/trainer.py @@ -217,11 +217,13 @@ def train(self): start = time.time() summary_string = f'({i + 1}/{self.num_iters_per_epoch}) | Total: {bar.elapsed_td} | ' \ - f'ETA: {bar.eta_td:} | loss: {losses.avg:.4f}' + f'ETA: {bar.eta_td:} | loss: {losses.avg:.4f}' for k, v in loss_dict.items(): summary_string += f' | {k}: {v:.2f}' - self.writer.add_scalar('train_loss/'+k, v, global_step=self.train_global_step) + self.writer.add_scalar( + f'train_loss/{k}', v, global_step=self.train_global_step + ) for k,v in timer.items(): summary_string += f' | {k}: {v:.2f}' @@ -243,7 +245,7 @@ def train(self): if torch.isnan(total_loss): exit('Nan value in loss, exiting!...') - # =======> + # =======> bar.finish() diff --git a/lib/data_utils/feature_extractor.py b/lib/data_utils/feature_extractor.py index 4db7077..2d78830 100644 --- a/lib/data_utils/feature_extractor.py +++ b/lib/data_utils/feature_extractor.py @@ -35,7 +35,7 @@ def extract_features(model, video, bbox, debug=False, batch_size=200, kp_2d=None ''' device = 'cuda' - if isinstance(video, torch.Tensor) or isinstance(video, np.ndarray): + if isinstance(video, (torch.Tensor, np.ndarray)): video = video elif isinstance(video, str): if os.path.isfile(video): diff --git a/lib/data_utils/img_utils.py b/lib/data_utils/img_utils.py index 9550586..47f8c29 100644 --- a/lib/data_utils/img_utils.py +++ b/lib/data_utils/img_utils.py @@ -40,7 +40,7 @@ def do_augmentation(scale_factor=0.3, color_factor=0.2): def trans_point2d(pt_2d, trans): src_pt = np.array([pt_2d[0], pt_2d[1], 1.]).T dst_pt = np.dot(trans, src_pt) - return dst_pt[0:2] + return dst_pt[:2] def rotate_2d(pt_2d, rot_rad): x = pt_2d[0] @@ -78,12 +78,11 @@ def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_heig dst[1, :] = dst_center + dst_downdir dst[2, :] = dst_center + dst_rightdir - if inv: - trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) - else: - trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) - - return trans + return ( + cv2.getAffineTransform(np.float32(dst), np.float32(src)) + if inv + else cv2.getAffineTransform(np.float32(src), np.float32(dst)) + ) def generate_patch_image_cv(cvimg, c_x, c_y, bb_width, bb_height, patch_width, patch_height, do_flip, scale, rot): img = cvimg.copy() @@ -174,8 +173,7 @@ def get_image_crops(image_file, bboxes): crop_image = convert_cvimg_to_tensor(crop_image) crop_images.append(crop_image) - batch_image = torch.cat([x.unsqueeze(0) for x in crop_images]) - return batch_image + return torch.cat([x.unsqueeze(0) for x in crop_images]) def get_single_image_crop(image, bbox, scale=1.3): if isinstance(image, str): @@ -295,29 +293,24 @@ def get_bbox_from_kp2d(kp_2d): w = h = np.where(w / h > 1, w, h) w = h = h * 1.1 - bbox = np.array([c_x, c_y, w, h]) # shape = (4,N) - return bbox + return np.array([c_x, c_y, w, h]) def normalize_2d_kp(kp_2d, crop_size=224, inv=False): + ratio = 1.0 / crop_size # Normalize keypoints between -1, 1 - if not inv: - ratio = 1.0 / crop_size - kp_2d = 2.0 * kp_2d * ratio - 1.0 - else: - ratio = 1.0 / crop_size - kp_2d = (kp_2d + 1.0)/(2*ratio) - + kp_2d = 2.0 * kp_2d * ratio - 1.0 if not inv else (kp_2d + 1.0)/(2*ratio) return kp_2d def get_default_transform(): normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) - transform = transforms.Compose([ - transforms.ToTensor(), - normalize, - ]) - return transform + return transforms.Compose( + [ + transforms.ToTensor(), + normalize, + ] + ) def split_into_chunks(vid_names, seqlen, stride): video_start_end_indices = [] diff --git a/lib/data_utils/insta_utils.py b/lib/data_utils/insta_utils.py index a53939a..27c6a4e 100644 --- a/lib/data_utils/insta_utils.py +++ b/lib/data_utils/insta_utils.py @@ -76,18 +76,14 @@ def decode_jpeg(self, image_data): return image def encode_jpeg(self, image): - image_data = self._sess.run( - self._encode_jpeg, feed_dict={ - self._encode_jpeg_data: image - }) - return image_data + return self._sess.run( + self._encode_jpeg, feed_dict={self._encode_jpeg_data: image} + ) def encode_png(self, image): - image_data = self._sess.run( - self._encode_png, feed_dict={ - self._encode_png_data: image - }) - return image_data + return self._sess.run( + self._encode_png, feed_dict={self._encode_png_data: image} + ) def decode_png(self, image_data): image = self._sess.run( @@ -326,7 +322,7 @@ def read_single_record(fname): print(features.shape) assert features.shape[0] == N - for k in dataset.keys(): + for k in dataset: dataset[k] = np.concatenate(dataset[k]) for k,v in dataset.items(): @@ -355,10 +351,10 @@ def concatenate_annotations(): filename = osp.join(VIBE_DB_DIR, 'insta_parts', f'insta_train_part_{i}.h5') print(filename) with h5py.File(filename, 'r') as f: - for k in ds.keys(): + for k in ds: ds[k].append(f[k].value) - for k in ds.keys(): + for k in ds: ds[k] = np.concatenate(ds[k]) print('Saving Insta Variety dataset!..') diff --git a/lib/data_utils/kp_utils.py b/lib/data_utils/kp_utils.py index 3788222..4e10cfc 100644 --- a/lib/data_utils/kp_utils.py +++ b/lib/data_utils/kp_utils.py @@ -40,8 +40,7 @@ def convert_kps(joints2d, src, dst): def get_perm_idxs(src, dst): src_names = eval(f'get_{src}_joint_names')() dst_names = eval(f'get_{dst}_joint_names')() - idxs = [src_names.index(h) for h in dst_names if h in src_names] - return idxs + return [src_names.index(h) for h in dst_names if h in src_names] def get_mpii3d_test_joint_names(): return [ diff --git a/lib/data_utils/mpii3d_utils.py b/lib/data_utils/mpii3d_utils.py index 333d4e7..e174b35 100644 --- a/lib/data_utils/mpii3d_utils.py +++ b/lib/data_utils/mpii3d_utils.py @@ -19,43 +19,42 @@ def read_openpose(json_file, gt_part, dataset): - # get only the arms/legs joints - op_to_12 = [11, 10, 9, 12, 13, 14, 4, 3, 2, 5, 6, 7] # read the openpose detection json_data = json.load(open(json_file, 'r')) people = json_data['people'] if len(people) == 0: # no openpose detection - keyp25 = np.zeros([25,3]) - else: - # size of person in pixels - scale = max(max(gt_part[:,0])-min(gt_part[:,0]),max(gt_part[:,1])-min(gt_part[:,1])) - # go through all people and find a match - dist_conf = np.inf*np.ones(len(people)) - for i, person in enumerate(people): - # openpose keypoints - op_keyp25 = np.reshape(person['pose_keypoints_2d'], [25,3]) - op_keyp12 = op_keyp25[op_to_12, :2] - op_conf12 = op_keyp25[op_to_12, 2:3] > 0 + return np.zeros([25,3]) + # size of person in pixels + scale = max(max(gt_part[:,0])-min(gt_part[:,0]),max(gt_part[:,1])-min(gt_part[:,1])) + # go through all people and find a match + dist_conf = np.inf*np.ones(len(people)) + # get only the arms/legs joints + op_to_12 = [11, 10, 9, 12, 13, 14, 4, 3, 2, 5, 6, 7] + for i, person in enumerate(people): + # openpose keypoints + op_keyp25 = np.reshape(person['pose_keypoints_2d'], [25,3]) + op_conf12 = op_keyp25[op_to_12, 2:3] > 0 # all the relevant joints should be detected - if min(op_conf12) > 0: - # weighted distance of keypoints - dist_conf[i] = np.mean(np.sqrt(np.sum(op_conf12*(op_keyp12 - gt_part[:12, :2])**2, axis=1))) - # closest match - p_sel = np.argmin(dist_conf) + if min(op_conf12) > 0: + op_keyp12 = op_keyp25[op_to_12, :2] + # weighted distance of keypoints + dist_conf[i] = np.mean(np.sqrt(np.sum(op_conf12*(op_keyp12 - gt_part[:12, :2])**2, axis=1))) + # closest match + p_sel = np.argmin(dist_conf) # the exact threshold is not super important but these are the values we used - if dataset == 'mpii': - thresh = 30 - elif dataset == 'coco': - thresh = 10 - else: - thresh = 0 + if dataset == 'coco': + thresh = 10 + elif dataset == 'mpii': + thresh = 30 + else: + thresh = 0 # dataset-specific thresholding based on pixel size of person - if min(dist_conf)/scale > 0.1 and min(dist_conf) < thresh: - keyp25 = np.zeros([25,3]) - else: - keyp25 = np.reshape(people[p_sel]['pose_keypoints_2d'], [25,3]) - return keyp25 + return ( + np.zeros([25, 3]) + if min(dist_conf) / scale > 0.1 and min(dist_conf) < thresh + else np.reshape(people[p_sel]['pose_keypoints_2d'], [25, 3]) + ) def read_calibration(calib_file, vid_list): @@ -99,9 +98,7 @@ def read_data_train(dataset_path, debug=False): for user_i in user_list: for seq_i in seq_list: - seq_path = os.path.join(dataset_path, - 'S' + str(user_i), - 'Seq' + str(seq_i)) + seq_path = os.path.join(dataset_path, f'S{str(user_i)}', f'Seq{str(seq_i)}') # mat file with annotations annot_file = os.path.join(seq_path, 'annot.mat') annot2 = sio.loadmat(annot_file)['annot2'] @@ -109,8 +106,7 @@ def read_data_train(dataset_path, debug=False): # calibration file and camera parameters for j, vid_i in enumerate(vid_list): # image folder - imgs_path = os.path.join(seq_path, - 'video_' + str(vid_i)) + imgs_path = os.path.join(seq_path, f'video_{str(vid_i)}') # per frame pattern = os.path.join(imgs_path, '*.jpg') img_list = sorted(glob.glob(pattern)) @@ -118,7 +114,7 @@ def read_data_train(dataset_path, debug=False): vid_used_joints = [] vid_used_bbox = [] vid_segments = [] - vid_uniq_id = "subj" + str(user_i) + '_seq' + str(seq_i) + "_vid" + str(vid_i) + "_seg0" + vid_uniq_id = f"subj{str(user_i)}_seq{str(seq_i)}_vid{str(vid_i)}_seg0" for i, img_i in tqdm_enumerate(img_list): # for each image we store the relevant annotations @@ -191,7 +187,7 @@ def read_data_train(dataset_path, debug=False): dataset='spin', debug=False) dataset['features'].append(features) - for k in dataset.keys(): + for k in dataset: dataset[k] = np.array(dataset[k]) dataset['features'] = np.concatenate(dataset['features']) @@ -217,9 +213,9 @@ def read_test_data(dataset_path): for user_i in user_list: print('Subject', user_i) - seq_path = os.path.join(dataset_path, - 'mpi_inf_3dhp_test_set', - 'TS' + str(user_i)) + seq_path = os.path.join( + dataset_path, 'mpi_inf_3dhp_test_set', f'TS{str(user_i)}' + ) # mat file with annotations annot_file = os.path.join(seq_path, 'annot_data.mat') mat_as_h5 = h5py.File(annot_file, 'r') @@ -231,15 +227,17 @@ def read_test_data(dataset_path): vid_used_joints = [] vid_used_bbox = [] vid_segments = [] - vid_uniq_id = "subj" + str(user_i) + "_seg0" + vid_uniq_id = f"subj{str(user_i)}_seg0" for frame_i, valid_i in tqdm(enumerate(valid)): - img_i = os.path.join('mpi_inf_3dhp_test_set', - 'TS' + str(user_i), - 'imageSequence', - 'img_' + str(frame_i + 1).zfill(6) + '.jpg') + img_i = os.path.join( + 'mpi_inf_3dhp_test_set', + f'TS{str(user_i)}', + 'imageSequence', + f'img_{str(frame_i + 1).zfill(6)}.jpg', + ) joints_2d_raw = np.expand_dims(annot2[frame_i, 0, :, :], axis = 0) joints_2d_raw = np.append(joints_2d_raw, np.ones((1, 17, 1)), axis=2) @@ -317,7 +315,7 @@ def read_test_data(dataset_path): dataset='spin', debug=False) dataset['features'].append(features) - for k in dataset.keys(): + for k in dataset: dataset[k] = np.array(dataset[k]) dataset['features'] = np.concatenate(dataset['features']) diff --git a/lib/data_utils/penn_action_utils.py b/lib/data_utils/penn_action_utils.py index 48d5562..863aec4 100644 --- a/lib/data_utils/penn_action_utils.py +++ b/lib/data_utils/penn_action_utils.py @@ -71,11 +71,17 @@ def read_data(folder): model = spin.get_pretrained_hmr() - file_names = sorted(glob.glob(folder + '/labels/'+'*.mat')) + file_names = sorted(glob.glob(f'{folder}/labels/*.mat')) for fname in tqdm(file_names): vid_dict=load_mat(fname) - imgs = sorted(glob.glob(folder + '/frames/'+ fname.strip().split('/')[-1].split('.')[0]+'/*.jpg')) + imgs = sorted( + glob.glob( + f'{folder}/frames/' + + fname.strip().split('/')[-1].split('.')[0] + + '/*.jpg' + ) + ) kp_2d = np.zeros((vid_dict['nframes'], 13, 3)) perm_idxs = get_perm_idxs('pennaction', 'common') @@ -109,9 +115,9 @@ def read_data(folder): features = extract_features(model, np.array(imgs) , bbox, dataset='pennaction', debug=False) dataset['features'].append(features) - for k in dataset.keys(): + for k in dataset: dataset[k] = np.array(dataset[k]) - for k in dataset.keys(): + for k in dataset: dataset[k] = np.concatenate(dataset[k]) return dataset diff --git a/lib/data_utils/posetrack_utils.py b/lib/data_utils/posetrack_utils.py index b075c5c..04f4d8f 100644 --- a/lib/data_utils/posetrack_utils.py +++ b/lib/data_utils/posetrack_utils.py @@ -62,8 +62,7 @@ def read_data(folder, set): num_people = -1 for x in anns['annotations']: - if num_people < x['track_id']: - num_people = x['track_id'] + num_people = max(num_people, x['track_id']) num_people += 1 posetrack_joints = get_posetrack_original_kp_names() idxs = [anns['categories'][0]['keypoints'].index(h) for h in posetrack_joints if h in anns['categories'][0]['keypoints']] @@ -75,9 +74,12 @@ def read_data(folder, set): tot_frames += num_people * num_frames for p_id in range(num_people): - annot_pid = [(item['keypoints'], item['bbox'], item['image_id']) - for item in anns['annotations'] - if item['track_id'] == p_id and not(np.count_nonzero(item['keypoints']) == 0) ] + annot_pid = [ + (item['keypoints'], item['bbox'], item['image_id']) + for item in anns['annotations'] + if item['track_id'] == p_id + and np.count_nonzero(item['keypoints']) != 0 + ] if len(annot_pid) < min_frame_number: nn_corrupted += len(annot_pid) @@ -97,7 +99,7 @@ def read_data(folder, set): img_paths[i] = image_id key2djnts[2::3] = len(key2djnts[2::3])*[1] - kp_2d[i,:] = np.array(key2djnts).reshape(int(len(key2djnts)/3),3) # [perm_idxs, :] + kp_2d[i,:] = np.array(key2djnts).reshape(len(key2djnts) // 3, 3) for kp_loc in kp_2d[i,:]: if kp_loc[0] == 0 and kp_loc[1] == 0: kp_loc[2] = 0 @@ -120,11 +122,11 @@ def read_data(folder, set): img_paths = list(img_paths) img_paths = [osp.join(folder, frame2imgname[item]) if item != 0 else 0 for item in img_paths ] - bbx_idxs = [] - for bbx_id, bbx in enumerate(bbox): - if np.count_nonzero(bbx) == 0: - bbx_idxs += [bbx_id] - + bbx_idxs = [ + bbx_id + for bbx_id, bbx in enumerate(bbox) + if np.count_nonzero(bbx) == 0 + ] kp_2d = np.delete(kp_2d, bbx_idxs, 0) img_paths = np.delete(np.array(img_paths), bbx_idxs, 0) bbox = np.delete(bbox, np.where(~bbox.any(axis=1))[0], axis=0) @@ -157,10 +159,10 @@ def read_data(folder, set): print(nn_corrupted, tot_frames) - for k in dataset.keys(): + for k in dataset: dataset[k] = np.array(dataset[k]) - for k in dataset.keys(): + for k in dataset: dataset[k] = np.concatenate(dataset[k]) for k,v in dataset.items(): diff --git a/lib/data_utils/threedpw_utils.py b/lib/data_utils/threedpw_utils.py index a3865b5..ce0da8a 100644 --- a/lib/data_utils/threedpw_utils.py +++ b/lib/data_utils/threedpw_utils.py @@ -61,12 +61,12 @@ def read_data(folder, set, debug=False): J_regressor = None smpl = SMPL(SMPL_MODEL_DIR, batch_size=1, create_transl=False) - if set == 'test' or set == 'validation': + if set in ['test', 'validation']: J_regressor = torch.from_numpy(np.load(osp.join(VIBE_DATA_DIR, 'J_regressor_h36m.npy'))).float() for i, seq in tqdm(enumerate(sequences)): - data_file = osp.join(folder, 'sequenceFiles', set, seq + '.pkl') + data_file = osp.join(folder, 'sequenceFiles', set, f'{seq}.pkl') data = pkl.load(open(data_file, 'rb'), encoding='latin1') @@ -145,13 +145,13 @@ def read_data(folder, set, debug=False): kp_2d=j2d[time_pt1:time_pt2], debug=debug, dataset='3dpw', scale=1.2) dataset['features'].append(features) - for k in dataset.keys(): + for k, v in dataset.items(): dataset[k] = np.concatenate(dataset[k]) - print(k, dataset[k].shape) + print(k, v.shape) # Filter out keypoints indices_to_use = np.where((dataset['joints2D'][:, :, 2] > VIS_THRESH).sum(-1) > MIN_KP)[0] - for k in dataset.keys(): + for k in dataset: dataset[k] = dataset[k][indices_to_use] return dataset diff --git a/lib/dataset/amass.py b/lib/dataset/amass.py index a74ea2a..5d1ec3a 100644 --- a/lib/dataset/amass.py +++ b/lib/dataset/amass.py @@ -42,8 +42,7 @@ def __getitem__(self, index): def load_db(self): db_file = osp.join(VIBE_DB_DIR, 'amass_db.pt') - db = joblib.load(db_file) - return db + return joblib.load(db_file) def get_single_item(self, index): start_index, end_index = self.vid_indices[index] @@ -53,10 +52,9 @@ def get_single_item(self, index): cam = np.repeat(cam, thetas.shape[0], axis=0) theta = np.concatenate([cam, thetas], axis=-1) - target = { + return { 'theta': torch.from_numpy(theta).float(), # cam, pose and shape } - return target diff --git a/lib/dataset/dataset_2d.py b/lib/dataset/dataset_2d.py index fd323a5..dd505ed 100644 --- a/lib/dataset/dataset_2d.py +++ b/lib/dataset/dataset_2d.py @@ -121,7 +121,7 @@ def get_single_item(self, index): f = osp.join(self.folder, vid_folder, vid_name) video_file_list = [osp.join(f, x) for x in sorted(os.listdir(f)) if x.endswith('.jpg')] frame_idxs = self.db[img_id][start_index:end_index + 1] - if self.dataset_name == 'pennaction' or self.dataset_name == 'posetrack': + if self.dataset_name in ['pennaction', 'posetrack']: video = frame_idxs else: video = [video_file_list[i] for i in frame_idxs] diff --git a/lib/dataset/dataset_3d.py b/lib/dataset/dataset_3d.py index ea02b2e..ade1f7c 100644 --- a/lib/dataset/dataset_3d.py +++ b/lib/dataset/dataset_3d.py @@ -66,19 +66,12 @@ def get_single_item(self, index): if self.dataset_name == '3dpw': kp_2d = convert_kps(self.db['joints2D'][start_index:end_index + 1], src='common', dst='spin') kp_3d = self.db['joints3D'][start_index:end_index + 1] - elif self.dataset_name == 'mpii3d': - kp_2d = self.db['joints2D'][start_index:end_index + 1] - if is_train: - kp_3d = self.db['joints3D'][start_index:end_index + 1] - else: - kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common') - elif self.dataset_name == 'h36m': + elif self.dataset_name in ['mpii3d', 'h36m']: kp_2d = self.db['joints2D'][start_index:end_index + 1] if is_train: kp_3d = self.db['joints3D'][start_index:end_index + 1] else: kp_3d = convert_kps(self.db['joints3D'][start_index:end_index + 1], src='spin', dst='common') - kp_2d_tensor = np.ones((self.seqlen, 49, 3), dtype=np.float16) nj = 14 if not is_train else 49 kp_3d_tensor = np.zeros((self.seqlen, nj, 3), dtype=np.float16) @@ -94,12 +87,11 @@ def get_single_item(self, index): pose = np.zeros((kp_2d.shape[0], 72)) shape = np.zeros((kp_2d.shape[0], 10)) w_smpl = torch.zeros(self.seqlen).float() - w_3d = torch.ones(self.seqlen).float() else: pose = self.db['pose'][start_index:end_index + 1] shape = self.db['shape'][start_index:end_index + 1] w_smpl = torch.ones(self.seqlen).float() - w_3d = torch.ones(self.seqlen).float() + w_3d = torch.ones(self.seqlen).float() elif self.dataset_name == 'mpii3d': pose = np.zeros((kp_2d.shape[0], 72)) shape = np.zeros((kp_2d.shape[0], 10)) @@ -152,21 +144,12 @@ def get_single_item(self, index): - # if self.dataset_name == '3dpw' and not self.is_train: - # target['imgname'] = self.db['img_name'][start_index:end_index+1].tolist() - # target['imgname'] = np.array(target['imgname']) - # print(target['imgname'].dtype) - # target['center'] = self.db['bbox'][start_index:end_index+1, :2] - # target['valid'] = torch.from_numpy(self.db['valid'][start_index:end_index+1]) - if self.debug: from lib.data_utils.img_utils import get_single_image_crop - if self.dataset_name == 'mpii3d': + if self.dataset_name in ['mpii3d', 'h36m']: video = self.db['img_name'][start_index:end_index+1] # print(video) - elif self.dataset_name == 'h36m': - video = self.db['img_name'][start_index:end_index + 1] else: vid_name = self.db['vid_name'][start_index] vid_name = '_'.join(vid_name.split('_')[:-1]) diff --git a/lib/dataset/inference.py b/lib/dataset/inference.py index 318657a..a40fbfb 100644 --- a/lib/dataset/inference.py +++ b/lib/dataset/inference.py @@ -39,7 +39,7 @@ def __init__(self, image_folder, frames, bboxes=None, joints2d=None, scale=1.0, self.scale = scale self.crop_size = crop_size self.frames = frames - self.has_keypoints = True if joints2d is not None else False + self.has_keypoints = joints2d is not None self.norm_joints2d = np.zeros_like(self.joints2d) @@ -68,10 +68,7 @@ def __getitem__(self, idx): kp_2d=j2d, scale=self.scale, crop_size=self.crop_size) - if self.has_keypoints: - return norm_img, kp_2d - else: - return norm_img + return (norm_img, kp_2d) if self.has_keypoints else norm_img class ImageFolder(Dataset): diff --git a/lib/dataset/insta.py b/lib/dataset/insta.py index c14e0dd..019ed4f 100644 --- a/lib/dataset/insta.py +++ b/lib/dataset/insta.py @@ -67,10 +67,10 @@ def get_single_item(self, index): kp_2d[idx,:,:2] = normalize_2d_kp(kp_2d[idx,:,:2], 224) kp_2d_tensor[idx] = kp_2d[idx] - target = { + return { 'features': input, - 'kp_2d': torch.from_numpy(kp_2d_tensor).float(), # 2D keypoints transformed according to bbox cropping + 'kp_2d': torch.from_numpy( + kp_2d_tensor + ).float(), # 2D keypoints transformed according to bbox cropping # 'instance_id': instance_id - } - - return target \ No newline at end of file + } \ No newline at end of file diff --git a/lib/models/attention.py b/lib/models/attention.py index f97a154..463add6 100644 --- a/lib/models/attention.py +++ b/lib/models/attention.py @@ -32,24 +32,19 @@ def __init__(self, attention_size, self.batch_first = batch_first - if non_linearity == "relu": - activation = nn.ReLU() - else: - activation = nn.Tanh() - + activation = nn.ReLU() if non_linearity == "relu" else nn.Tanh() modules = [] - for i in range(layers - 1): - modules.append(nn.Linear(attention_size, attention_size)) - modules.append(activation) - modules.append(nn.Dropout(dropout)) - - # last attention layer must output 1 - modules.append(nn.Linear(attention_size, 1)) - modules.append(activation) - modules.append(nn.Dropout(dropout)) - + for _ in range(layers - 1): + modules.extend( + ( + nn.Linear(attention_size, attention_size), + activation, + nn.Dropout(dropout), + ) + ) + modules.extend((nn.Linear(attention_size, 1), activation, nn.Dropout(dropout))) self.attention = nn.Sequential(*modules) - self.attention.apply(init_weights) + self.attention.apply(init_weights) self.softmax = nn.Softmax(dim=-1) diff --git a/lib/models/motion_discriminator.py b/lib/models/motion_discriminator.py index dc15166..9c40f4b 100644 --- a/lib/models/motion_discriminator.py +++ b/lib/models/motion_discriminator.py @@ -44,7 +44,7 @@ def __init__(self, self.gru = nn.GRU(self.input_size, self.rnn_size, num_layers=num_layers) - linear_size = self.rnn_size if not feature_pool == "concat" else self.rnn_size * 2 + linear_size = self.rnn_size if feature_pool != "concat" else self.rnn_size * 2 if feature_pool == "attention" : self.attention = SelfAttention(attention_size=self.attention_size, @@ -68,12 +68,10 @@ def forward(self, sequence): outputs = F.relu(outputs) avg_pool = F.adaptive_avg_pool1d(outputs.permute(1, 2, 0), 1).view(batchsize, -1) max_pool = F.adaptive_max_pool1d(outputs.permute(1, 2, 0), 1).view(batchsize, -1) - output = self.fc(torch.cat([avg_pool, max_pool], dim=1)) + return self.fc(torch.cat([avg_pool, max_pool], dim=1)) elif self.feature_pool == "attention": outputs = outputs.permute(1, 0, 2) y, attentions = self.attention(outputs) - output = self.fc(y) + return self.fc(y) else: - output = self.fc(outputs[-1]) - - return output + return self.fc(outputs[-1]) diff --git a/lib/models/resnet.py b/lib/models/resnet.py index 1a7957e..c960afa 100644 --- a/lib/models/resnet.py +++ b/lib/models/resnet.py @@ -135,8 +135,9 @@ def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: - raise ValueError("replace_stride_with_dilation should be None " - "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + raise ValueError( + f"replace_stride_with_dilation should be None or a 3-element tuple, got {replace_stride_with_dilation}" + ) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, @@ -184,15 +185,30 @@ def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer(planes * block.expansion), ) - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample, self.groups, - self.base_width, previous_dilation, norm_layer)) + layers = [ + block( + self.inplanes, + planes, + stride, + downsample, + self.groups, + self.base_width, + previous_dilation, + norm_layer, + ) + ] self.inplanes = planes * block.expansion - for _ in range(1, blocks): - layers.append(block(self.inplanes, planes, groups=self.groups, - base_width=self.base_width, dilation=self.dilation, - norm_layer=norm_layer)) - + layers.extend( + block( + self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + dilation=self.dilation, + norm_layer=norm_layer, + ) + for _ in range(1, blocks) + ) return nn.Sequential(*layers) def forward(self, x): diff --git a/lib/models/smpl.py b/lib/models/smpl.py index 17723b1..d09f6dd 100644 --- a/lib/models/smpl.py +++ b/lib/models/smpl.py @@ -74,13 +74,14 @@ def forward(self, *args, **kwargs): extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices) joints = torch.cat([smpl_output.joints, extra_joints], dim=1) joints = joints[:, self.joint_map, :] - output = SMPLOutput(vertices=smpl_output.vertices, - global_orient=smpl_output.global_orient, - body_pose=smpl_output.body_pose, - joints=joints, - betas=smpl_output.betas, - full_pose=smpl_output.full_pose) - return output + return SMPLOutput( + vertices=smpl_output.vertices, + global_orient=smpl_output.global_orient, + body_pose=smpl_output.body_pose, + joints=joints, + betas=smpl_output.betas, + full_pose=smpl_output.full_pose, + ) def get_smpl_faces(): diff --git a/lib/models/spin.py b/lib/models/spin.py index 079d10b..4bd5d4d 100644 --- a/lib/models/spin.py +++ b/lib/models/spin.py @@ -116,12 +116,9 @@ def _make_layer(self, block, planes, blocks, stride=1): nn.BatchNorm2d(planes * block.expansion), ) - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) + layers = [block(self.inplanes, planes, stride, downsample)] self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - + layers.extend(block(self.inplanes, planes) for _ in range(1, blocks)) return nn.Sequential(*layers) def feature_extractor(self, x): @@ -167,7 +164,7 @@ def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3, r pred_pose = init_pose pred_shape = init_shape pred_cam = init_cam - for i in range(n_iter): + for _ in range(n_iter): xc = torch.cat([xf, pred_pose, pred_shape, pred_cam], 1) xc = self.fc1(xc) xc = self.drop1(xc) @@ -200,10 +197,7 @@ def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3, r 'kp_3d': pred_joints, }] - if return_features: - return xf, output - else: - return output + return (xf, output) if return_features else output class Regressor(nn.Module): @@ -252,7 +246,7 @@ def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3, J pred_pose = init_pose pred_shape = init_shape pred_cam = init_cam - for i in range(n_iter): + for _ in range(n_iter): xc = torch.cat([x, pred_pose, pred_shape, pred_cam], 1) xc = self.fc1(xc) xc = self.drop1(xc) @@ -283,14 +277,15 @@ def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3, J pose = rotation_matrix_to_angle_axis(pred_rotmat.reshape(-1, 3, 3)).reshape(-1, 72) - output = [{ - 'theta' : torch.cat([pred_cam, pose, pred_shape], dim=1), - 'verts' : pred_vertices, - 'kp_2d' : pred_keypoints_2d, - 'kp_3d' : pred_joints, - 'rotmat' : pred_rotmat - }] - return output + return [ + { + 'theta': torch.cat([pred_cam, pose, pred_shape], dim=1), + 'verts': pred_vertices, + 'kp_2d': pred_keypoints_2d, + 'kp_3d': pred_joints, + 'rotmat': pred_rotmat, + } + ] def hmr(smpl_mean_params=SMPL_MEAN_PARAMS, pretrained=True, **kwargs): diff --git a/lib/smplify/prior.py b/lib/smplify/prior.py index f7e8987..ab64364 100644 --- a/lib/smplify/prior.py +++ b/lib/smplify/prior.py @@ -109,7 +109,7 @@ def __init__(self, prior_folder='prior', elif dtype == torch.float64: np_dtype = np.float64 else: - print('Unknown float type {}, exiting!'.format(dtype)) + print(f'Unknown float type {dtype}, exiting!') sys.exit(-1) self.num_gaussians = num_gaussians @@ -119,8 +119,9 @@ def __init__(self, prior_folder='prior', full_gmm_fn = os.path.join(prior_folder, gmm_fn) if not os.path.exists(full_gmm_fn): - print('The path to the mixture prior "{}"'.format(full_gmm_fn) + - ' does not exist, exiting!') + print( + f'The path to the mixture prior "{full_gmm_fn}" does not exist, exiting!' + ) sys.exit(-1) with open(full_gmm_fn, 'rb') as f: @@ -135,7 +136,7 @@ def __init__(self, prior_folder='prior', covs = gmm.covars_.astype(np_dtype) weights = gmm.weights_.astype(np_dtype) else: - print('Unknown type for the prior: {}, exiting!'.format(type(gmm))) + print(f'Unknown type for the prior: {type(gmm)}, exiting!') sys.exit(-1) self.register_buffer('means', torch.tensor(means, dtype=dtype)) @@ -174,8 +175,7 @@ def __init__(self, prior_folder='prior', def get_mean(self): ''' Returns the mean of the mixture ''' - mean_pose = torch.matmul(self.weights, self.means) - return mean_pose + return torch.matmul(self.weights, self.means) def merged_log_likelihood(self, pose, betas): diff_from_mean = pose.unsqueeze(dim=1) - self.means diff --git a/lib/utils/demo_utils.py b/lib/utils/demo_utils.py index 60ba205..5421ebf 100644 --- a/lib/utils/demo_utils.py +++ b/lib/utils/demo_utils.py @@ -158,24 +158,37 @@ def smplify_runner( new_opt_betas = output['theta'][:,75:] new_opt_joints3d = output['kp_3d'] - return_val = [ - update, new_opt_vertices.cpu(), new_opt_cam_t.cpu(), - new_opt_pose.cpu(), new_opt_betas.cpu(), new_opt_joints3d.cpu(), - new_opt_joint_loss, opt_joint_loss, + return [ + update, + new_opt_vertices.cpu(), + new_opt_cam_t.cpu(), + new_opt_pose.cpu(), + new_opt_betas.cpu(), + new_opt_joints3d.cpu(), + new_opt_joint_loss, + opt_joint_loss, ] - return return_val - def trim_videos(filename, start_time, end_time, output_filename): - command = ['ffmpeg', - '-i', '"%s"' % filename, - '-ss', str(start_time), - '-t', str(end_time - start_time), - '-c:v', 'libx264', '-c:a', 'copy', - '-threads', '1', - '-loglevel', 'panic', - '"%s"' % output_filename] + command = [ + 'ffmpeg', + '-i', + f'"{filename}"', + '-ss', + str(start_time), + '-t', + str(end_time - start_time), + '-c:v', + 'libx264', + '-c:a', + 'copy', + '-threads', + '1', + '-loglevel', + 'panic', + f'"{output_filename}"', + ] # command = ' '.join(command) subprocess.call(command) @@ -216,14 +229,11 @@ def download_ckpt(outdir='data/vibe_data', use_3dpw=False): if use_3dpw: ckpt_file = 'data/vibe_data/vibe_model_w_3dpw.pth.tar' url = 'https://www.dropbox.com/s/41ozgqorcp095ja/vibe_model_w_3dpw.pth.tar' - if not os.path.isfile(ckpt_file): - download_url(url=url, outdir=outdir) else: ckpt_file = 'data/vibe_data/vibe_model_wo_3dpw.pth.tar' url = 'https://www.dropbox.com/s/amj2p8bmf6g56k6/vibe_model_wo_3dpw.pth.tar' - if not os.path.isfile(ckpt_file): - download_url(url=url, outdir=outdir) - + if not os.path.isfile(ckpt_file): + download_url(url=url, outdir=outdir) return ckpt_file @@ -255,8 +265,7 @@ def convert_crop_cam_to_orig_img(cam, bbox, img_width, img_height): sy = cam[:,0] * (1. / (img_height / h)) tx = ((cx - hw) / hw / sx) + cam[:,1] ty = ((cy - hh) / hh / sy) + cam[:,2] - orig_cam = np.stack([sx, sy, tx, ty]).T - return orig_cam + return np.stack([sx, sy, tx, ty]).T def convert_crop_coords_to_orig_img(bbox, keypoints, crop_size): diff --git a/lib/utils/eval_utils.py b/lib/utils/eval_utils.py index 59c9296..add0ca5 100644 --- a/lib/utils/eval_utils.py +++ b/lib/utils/eval_utils.py @@ -95,7 +95,7 @@ def compute_similarity_transform(S1, S2): i.e. solves the orthogonal Procrutes problem. ''' transposed = False - if S1.shape[0] != 3 and S1.shape[0] != 2: + if S1.shape[0] not in [3, 2]: S1 = S1.T S2 = S2.T transposed = True @@ -146,7 +146,7 @@ def compute_similarity_transform_torch(S1, S2): i.e. solves the orthogonal Procrutes problem. ''' transposed = False - if S1.shape[0] != 3 and S1.shape[0] != 2: + if S1.shape[0] not in [3, 2]: S1 = S1.T S2 = S2.T transposed = True @@ -204,7 +204,7 @@ def batch_compute_similarity_transform_torch(S1, S2): i.e. solves the orthogonal Procrutes problem. ''' transposed = False - if S1.shape[0] != 3 and S1.shape[0] != 2: + if S1.shape[0] not in [3, 2]: S1 = S1.permute(0,2,1) S2 = S2.permute(0,2,1) transposed = True @@ -273,7 +273,7 @@ def compute_errors(gt3ds, preds): - preds: N x 14 x 3 """ errors, errors_pa = [], [] - for i, (gt3d, pred) in enumerate(zip(gt3ds, preds)): + for gt3d, pred in zip(gt3ds, preds): gt3d = gt3d.reshape(-1, 3) # Root align. gt3d = align_by_pelvis(gt3d) diff --git a/lib/utils/fbx_output.py b/lib/utils/fbx_output.py index 37dfc1b..171ca9e 100644 --- a/lib/utils/fbx_output.py +++ b/lib/utils/fbx_output.py @@ -110,11 +110,7 @@ def setup_scene(model_path, fps_target): # Process single pose into keyframed bone orientations def process_pose(current_frame, pose, trans, pelvis_position): - if pose.shape[0] == 72: - rod_rots = pose.reshape(24, 3) - else: - rod_rots = pose.reshape(26, 3) - + rod_rots = pose.reshape(24, 3) if pose.shape[0] == 72 else pose.reshape(26, 3) mat_rots = [Rodrigues(rod_rot) for rod_rot in rod_rots] # Set the location of the Pelvis bone to the translation parameter @@ -160,7 +156,7 @@ def process_poses( person_id=1, ): - print('Processing: ' + input_path) + print(f'Processing: {input_path}') data = joblib.load(input_path) poses = data[person_id]['pose'] @@ -169,19 +165,17 @@ def process_poses( if gender == 'female': model_path = female_model_path for k,v in bone_name_from_index.items(): - bone_name_from_index[k] = 'f_avg_' + v + bone_name_from_index[k] = f'f_avg_{v}' elif gender == 'male': model_path = male_model_path for k,v in bone_name_from_index.items(): - bone_name_from_index[k] = 'm_avg_' + v + bone_name_from_index[k] = f'm_avg_{v}' else: - print('ERROR: Unsupported gender: ' + gender) + print(f'ERROR: Unsupported gender: {gender}') sys.exit(1) # Limit target fps to source fps - if fps_target > fps_source: - fps_target = fps_source - + fps_target = min(fps_target, fps_source) print(f'Gender: {gender}') print(f'Number of source poses: {str(poses.shape[0])}') print(f'Source frames-per-second: {str(fps_source)}') @@ -207,7 +201,7 @@ def process_poses( offset = np.array([0.0, 0.0, 0.0]) while source_index < poses.shape[0]: - print('Adding pose: ' + str(source_index)) + print(f'Adding pose: {source_index}') if start_origin: if source_index == 0: @@ -242,7 +236,7 @@ def export_animated_mesh(output_path): print('Exporting to FBX binary (.fbx)') bpy.ops.export_scene.fbx(filepath=output_path, use_selection=True, add_leaf_bones=False) else: - print('ERROR: Unsupported export format: ' + output_path) + print(f'ERROR: Unsupported export format: {output_path}') sys.exit(1) return @@ -298,8 +292,8 @@ def export_animated_mesh(output_path): if not output_path.startswith(os.path.sep): output_path = os.path.join(cwd, output_path) - print('Input path: ' + input_path) - print('Output path: ' + output_path) + print(f'Input path: {input_path}') + print(f'Output path: {output_path}') if not (output_path.endswith('.fbx') or output_path.endswith('.glb')): print('ERROR: Invalid output format (must be .fbx or .glb)') @@ -329,12 +323,8 @@ def export_animated_mesh(output_path): sys.exit(0) except SystemExit as ex: - if ex.code is None: - exit_status = 0 - else: - exit_status = ex.code - - print('Exiting. Exit status: ' + str(exit_status)) + exit_status = 0 if ex.code is None else ex.code + print(f'Exiting. Exit status: {str(exit_status)}') # Only exit to OS when we are not running in Blender GUI if bpy.app.background: diff --git a/lib/utils/geometry.py b/lib/utils/geometry.py index d8271e8..fa108b2 100644 --- a/lib/utils/geometry.py +++ b/lib/utils/geometry.py @@ -56,13 +56,20 @@ def quat2mat(quat): wx, wy, wz = w * x, w * y, w * z xy, xz, yz = x * y, x * z, y * z - rotMat = torch.stack([ - w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy, - w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz, - w2 - x2 - y2 + z2 - ], - dim=1).view(batch_size, 3, 3) - return rotMat + return torch.stack( + [ + w2 + x2 - y2 - z2, + 2 * xy - 2 * wz, + 2 * wy + 2 * xz, + 2 * wz + 2 * xy, + w2 - x2 + y2 - z2, + 2 * yz - 2 * wx, + 2 * xz - 2 * wy, + 2 * wx + 2 * yz, + w2 - x2 - y2 + z2, + ], + dim=1, + ).view(batch_size, 3, 3) def rotation_matrix_to_angle_axis(rotation_matrix): @@ -120,12 +127,12 @@ def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor: >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion) # Nx3 """ if not torch.is_tensor(quaternion): - raise TypeError("Input type is not a torch.Tensor. Got {}".format( - type(quaternion))) + raise TypeError(f"Input type is not a torch.Tensor. Got {type(quaternion)}") - if not quaternion.shape[-1] == 4: - raise ValueError("Input must be a tensor of shape Nx4 or 4. Got {}" - .format(quaternion.shape)) + if quaternion.shape[-1] != 4: + raise ValueError( + f"Input must be a tensor of shape Nx4 or 4. Got {quaternion.shape}" + ) # unpack input and compute conversion q1: torch.Tensor = quaternion[..., 1] q2: torch.Tensor = quaternion[..., 2] @@ -174,17 +181,18 @@ def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6): >>> output = tgm.rotation_matrix_to_quaternion(input) # Nx4 """ if not torch.is_tensor(rotation_matrix): - raise TypeError("Input type is not a torch.Tensor. Got {}".format( - type(rotation_matrix))) + raise TypeError( + f"Input type is not a torch.Tensor. Got {type(rotation_matrix)}" + ) if len(rotation_matrix.shape) > 3: raise ValueError( - "Input size must be a three dimensional tensor. Got {}".format( - rotation_matrix.shape)) - if not rotation_matrix.shape[-2:] == (3, 4): + f"Input size must be a three dimensional tensor. Got {rotation_matrix.shape}" + ) + if rotation_matrix.shape[-2:] != (3, 4): raise ValueError( - "Input size must be a N x 3 x 4 tensor. Got {}".format( - rotation_matrix.shape)) + f"Input size must be a N x 3 x 4 tensor. Got {rotation_matrix.shape}" + ) rmat_t = torch.transpose(rotation_matrix, 1, 2) @@ -271,10 +279,7 @@ def estimate_translation_np(S, joints_2d, joints_conf, focal_length=5000., img_s A = np.dot(Q.T,Q) b = np.dot(Q.T,c) - # solution - trans = np.linalg.solve(A, b) - - return trans + return np.linalg.solve(A, b) def estimate_translation(S, joints_2d, focal_length=5000., img_size=224.): @@ -339,6 +344,4 @@ def rot6d_to_rotmat(x): # Finish building the basis by taking the cross product b3 = torch.cross(b1, b2, dim=1) - rot_mats = torch.stack([b1, b2, b3], dim=-1) - - return rot_mats \ No newline at end of file + return torch.stack([b1, b2, b3], dim=-1) \ No newline at end of file diff --git a/lib/utils/pose_tracker.py b/lib/utils/pose_tracker.py index 5028bb5..edb0327 100644 --- a/lib/utils/pose_tracker.py +++ b/lib/utils/pose_tracker.py @@ -60,18 +60,14 @@ def read_posetrack_keypoints(output_folder): for person in data['people']: person_id = person['person_id'][0] joints2d = person['pose_keypoints_2d'] - if person_id in people.keys(): - people[person_id]['joints2d'].append(joints2d) - people[person_id]['frames'].append(idx) - else: + if person_id not in people.keys(): people[person_id] = { 'joints2d': [], 'frames': [], } - people[person_id]['joints2d'].append(joints2d) - people[person_id]['frames'].append(idx) - - for k in people.keys(): + people[person_id]['joints2d'].append(joints2d) + people[person_id]['frames'].append(idx) + for k in people: people[k]['joints2d'] = np.array(people[k]['joints2d']).reshape((len(people[k]['joints2d']), -1, 3)) people[k]['frames'] = np.array(people[k]['frames']) diff --git a/lib/utils/smooth_pose.py b/lib/utils/smooth_pose.py index c6b5e8a..0b48b11 100644 --- a/lib/utils/smooth_pose.py +++ b/lib/utils/smooth_pose.py @@ -39,17 +39,13 @@ def smooth_pose(pred_pose, pred_betas, min_cutoff=0.004, beta=0.7): # initialize pred_pose_hat[0] = pred_pose[0] - pred_verts_hat = [] - pred_joints3d_hat = [] - smpl_output = smpl( betas=torch.from_numpy(pred_betas[0]).unsqueeze(0), body_pose=torch.from_numpy(pred_pose[0, 1:]).unsqueeze(0), global_orient=torch.from_numpy(pred_pose[0, 0:1]).unsqueeze(0), ) - pred_verts_hat.append(smpl_output.vertices.detach().cpu().numpy()) - pred_joints3d_hat.append(smpl_output.joints.detach().cpu().numpy()) - + pred_verts_hat = [smpl_output.vertices.detach().cpu().numpy()] + pred_joints3d_hat = [smpl_output.joints.detach().cpu().numpy()] for idx, pose in enumerate(pred_pose[1:]): idx += 1 diff --git a/lib/utils/utils.py b/lib/utils/utils.py index e8ad8d6..42a3e8d 100644 --- a/lib/utils/utils.py +++ b/lib/utils/utils.py @@ -30,10 +30,7 @@ def move_dict_to_device(dict, device, tensor2float=False): for k,v in dict.items(): if isinstance(v, torch.Tensor): - if tensor2float: - dict[k] = v.float().to(device) - else: - dict[k] = v.to(device) + dict[k] = v.float().to(device) if tensor2float else v.to(device) def get_from_dict(dict, keys): @@ -41,10 +38,7 @@ def get_from_dict(dict, keys): def tqdm_enumerate(iter): - i = 0 - for y in tqdm(iter): - yield i, y - i += 1 + yield from enumerate(tqdm(iter)) def iterdict(d): diff --git a/lib/utils/vis.py b/lib/utils/vis.py index f2283fe..bb6cde2 100644 --- a/lib/utils/vis.py +++ b/lib/utils/vis.py @@ -54,7 +54,7 @@ def get_projection_matrix(self, width=None, height=None): def get_colors(): - colors = { + return { 'pink': np.array([197, 27, 125]), # L lower leg 'light_pink': np.array([233, 163, 201]), # L upper leg 'light_green': np.array([161, 215, 106]), # L lower arm @@ -69,7 +69,6 @@ def get_colors(): 'gray': np.array([130, 130, 130]), # 'white': np.array([255, 255, 255]), # } - return colors def render_image(img, verts, cam, faces=None, angle=None, axis=None, resolution=224, output_fn=None): @@ -271,12 +270,11 @@ def visualize_preds(image, preds, target=None, target_exists=True, dataset='comm axis=[0,1,0] ) - if target_exists: - result_image = np.hstack([image, pred_image, target_image, render, render_side]) - else: - result_image = np.hstack([image, pred_image, render, render_side]) - - return result_image + return ( + np.hstack([image, pred_image, target_image, render, render_side]) + if target_exists + else np.hstack([image, pred_image, render, render_side]) + ) def batch_visualize_preds(images, preds, target=None, max_images=16, idxs=None, @@ -301,14 +299,9 @@ def batch_visualize_preds(images, preds, target=None, max_images=16, idxs=None, indexes = range(max_images) if idxs is None else idxs for idx in indexes: - single_pred = {} - for k, v in preds.items(): - single_pred[k] = v[idx] - + single_pred = {k: v[idx] for k, v in preds.items()} if target_exists: - single_target = {} - for k, v in target.items(): - single_target[k] = v[idx] + single_target = {k: v[idx] for k, v in target.items()} else: single_target = None @@ -316,9 +309,7 @@ def batch_visualize_preds(images, preds, target=None, max_images=16, idxs=None, dataset=dataset) result_images.append(img) - result_image = np.vstack(result_images) - - return result_image + return np.vstack(result_images) def batch_visualize_vid_preds(video, preds, target, max_video=4, vis_hmr=False, dataset='common'): @@ -353,14 +344,8 @@ def batch_visualize_vid_preds(video, preds, target, max_video=4, vis_hmr=False, for t_id in range(tsize): image = video[batch_id, t_id] - single_pred = {} - single_target = {} - for k, v in preds.items(): - single_pred[k] = v[batch_id, t_id] - - for k, v in target.items(): - single_target[k] = v[batch_id, t_id] - + single_pred = {k: v[batch_id, t_id] for k, v in preds.items()} + single_target = {k: v[batch_id, t_id] for k, v in target.items()} img = visualize_preds(image, single_pred, single_target, vis_hmr=vis_hmr, dataset=dataset) @@ -389,7 +374,7 @@ def draw_skeleton(image, kp_2d, dataset='common', unnormalize=True, thickness=2) skeleton = eval(f'kp_utils.get_{dataset}_skeleton')() common_lr = [0,0,1,1,0,0,0,0,1,0,0,1,1,1,0] - for idx,pt in enumerate(kp_2d): + for pt in kp_2d: if pt[2] > 0: # if visible cv2.circle(image, (pt[0], pt[1]), 4, pcolor, -1) # cv2.putText(image, f'{idx}', (pt[0]+1, pt[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0)) @@ -420,19 +405,13 @@ def batch_draw_skeleton(images, target, max_images=8, dataset='common'): result_images = [] for idx in range(max_images): - single_target = {} - - for k, v in target.items(): - single_target[k] = v[idx] - + single_target = {k: v[idx] for k, v in target.items()} img = torch2numpy(images[idx]) img = draw_skeleton(img.copy(), single_target['kp_2d'], dataset=dataset) result_images.append(img) - result_image = np.vstack(result_images) - - return result_image + return np.vstack(result_images) def get_regressor_output(features): diff --git a/tests/test_3d_datasets.py b/tests/test_3d_datasets.py index 5742e48..084a950 100644 --- a/tests/test_3d_datasets.py +++ b/tests/test_3d_datasets.py @@ -37,8 +37,6 @@ if dataset == 'MPII3D': images = batch_draw_skeleton(input, single_target, dataset='spin', max_images=4) - plt.imshow(images) - plt.show() else: theta = single_target['theta'] pose, shape = theta[:, 3:75], theta[:, 75:] @@ -50,9 +48,7 @@ single_target['verts'] = pred_output.vertices images = batch_visualize_preds(input, single_target, single_target, max_images=4, dataset='spin') - # images = batch_draw_skeleton(input, single_target, dataset='common', max_images=10) - plt.imshow(images) - plt.show() - + plt.imshow(images) + plt.show() if i == 100: break \ No newline at end of file