Skip to content

Commit e6cc440

Browse files
authored
Merge pull request #8 from tomrunia/regression
UCF-101 dataset mean and normalization changes
2 parents 89f89d4 + d2a9797 commit e6cc440

File tree

11 files changed

+169
-76
lines changed

11 files changed

+169
-76
lines changed

config.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ def parse_opts():
1414
parser.add_argument('--dataset', type=str, required=True, help='Dataset string (kinetics | activitynet | ucf101 | blender)')
1515
parser.add_argument('--num_val_samples', type=int, default=1, help='Number of validation samples for each activity')
1616
parser.add_argument('--norm_value', default=255, type=int, help='Divide inputs by 255 or 1')
17+
parser.add_argument('--no_dataset_mean', action='store_true', help='Dont use the dataset mean but normalize to zero mean')
18+
parser.add_argument('--no_dataset_std', action='store_true', help='Dont use the dataset std but normalize to unity std')
1719
parser.add_argument('--num_classes', default=400, type=int, help= 'Number of classes (activitynet: 200, kinetics: 400, ucf101: 101, hmdb51: 51)')
20+
parser.set_defaults(no_dataset_std=True)
1821

1922
# Preprocessing pipeline
2023
parser.add_argument('--spatial_size', default=224, type=int, help='Height and width of inputs')
@@ -38,7 +41,8 @@ def parse_opts():
3841
parser.add_argument('--checkpoint_path', default='', type=str, help='Checkpoint file (.pth) of previous training')
3942
parser.add_argument('--finetune_num_classes', default=36, type=int, help='Number of classes for fine-tuning. num_classes is set to the number when pretraining.')
4043
parser.add_argument('--finetune_prefixes', default='logits,Mixed_5', type=str, help='Prefixes of layers to finetune, comma seperated (only used by I3D).')
41-
parser.add_argument('--finetune_begin_index', default=0, type=int, help='Begin block index of fine-tuning (not used by I3D).')
44+
parser.add_argument('--finetune_begin_index', default=4, type=int, help='Begin block index of fine-tuning (not used by I3D).')
45+
parser.add_argument('--finetune_restore_optimizer', action='store_true', help='Whether to restore optimizer state')
4246

4347
# Optimization
4448
parser.add_argument('--optimizer', default='adam', type=str, help='Which optimizer to use (SGD | adam | rmsprop)')
@@ -58,12 +62,13 @@ def parse_opts():
5862
parser.add_argument('--checkpoint_frequency', type=int, default=1, help='Save checkpoint after this number of epochs')
5963
parser.add_argument('--checkpoints_num_keep', type=int, default=5, help='Number of checkpoints to keep')
6064
parser.add_argument('--log_frequency', type=int, default=5, help='Logging frequency in number of steps')
65+
parser.add_argument('--log_image_frequency', type=int, default=200, help='Logging images frequency in number of steps')
6166
parser.add_argument('--no_tensorboard', action='store_true', default=False, help='Disable the use of TensorboardX')
6267

6368
# Misc
6469
parser.add_argument('--device', default='cuda:0', help='Device string cpu | cuda:0')
6570
parser.add_argument('--history_steps', default=25, type=int, help='History of running average meters')
66-
parser.add_argument('--num_workers', default=4, type=int, help='Number of threads for multi-thread loading')
71+
parser.add_argument('--num_workers', default=6, type=int, help='Number of threads for multi-thread loading')
6772
parser.add_argument('--no_eval', action='store_true', default=False, help='Disable evaluation')
6873

6974
return parser.parse_args()

datasets/ucf101.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111

1212
from utils.utils import load_value_file
1313

14+
##########################################################################################
15+
##########################################################################################
1416

1517
def pil_loader(path):
1618
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
1719
with open(path, 'rb') as f:
1820
with Image.open(f) as img:
1921
return img.convert('RGB')
2022

21-
2223
def accimage_loader(path):
2324
try:
2425
import accimage
@@ -27,15 +28,13 @@ def accimage_loader(path):
2728
# Potentially a decoding problem, fall back to PIL.Image
2829
return pil_loader(path)
2930

30-
3131
def get_default_image_loader():
3232
from torchvision import get_image_backend
3333
if get_image_backend() == 'accimage':
3434
return accimage_loader
3535
else:
3636
return pil_loader
3737

38-
3938
def video_loader(video_dir_path, frame_indices, image_loader):
4039
video = []
4140
for i in frame_indices:
@@ -47,17 +46,14 @@ def video_loader(video_dir_path, frame_indices, image_loader):
4746

4847
return video
4948

50-
5149
def get_default_video_loader():
5250
image_loader = get_default_image_loader()
5351
return functools.partial(video_loader, image_loader=image_loader)
5452

55-
5653
def load_annotation_data(data_file_path):
5754
with open(data_file_path, 'r') as data_file:
5855
return json.load(data_file)
5956

60-
6157
def get_class_labels(data):
6258
class_labels_map = {}
6359
index = 0
@@ -66,7 +62,6 @@ def get_class_labels(data):
6662
index += 1
6763
return class_labels_map
6864

69-
7065
def get_video_names_and_annotations(data, subset):
7166
video_names = []
7267
annotations = []
@@ -80,6 +75,8 @@ def get_video_names_and_annotations(data, subset):
8075

8176
return video_names, annotations
8277

78+
##########################################################################################
79+
##########################################################################################
8380

8481
def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video,
8582
sample_duration):
@@ -143,8 +140,8 @@ def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video,
143140

144141
return dataset, idx_to_class
145142

146-
############################################################################
147-
############################################################################
143+
##########################################################################################
144+
##########################################################################################
148145

149146
class UCF101(data.Dataset):
150147
"""

epoch_iterators.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def train_epoch(config, model, criterion, optimizer, device,
4545
optimizer.zero_grad()
4646

4747
# Move inputs to GPU memory
48-
clips = clips.to(device)
48+
clips = clips.to(device)
4949
targets = targets.to(device)
5050
if config.model == 'i3d':
5151
targets = torch.unsqueeze(targets, -1)
@@ -97,6 +97,16 @@ def train_epoch(config, model, criterion, optimizer, device,
9797
summary_writer.add_scalar('train/learning_rate', current_learning_rate(optimizer), global_step)
9898
summary_writer.add_scalar('train/weight_decay', current_weight_decay(optimizer), global_step)
9999

100+
if summary_writer and step % config.log_image_frequency == 0:
101+
# TensorboardX video summary
102+
for example_idx in range(4):
103+
clip_for_display = clips[example_idx].clone().cpu()
104+
min_val = float(clip_for_display.min())
105+
max_val = float(clip_for_display.max())
106+
clip_for_display.clamp_(min=min_val, max=max_val)
107+
clip_for_display.add_(-min_val).div_(max_val - min_val + 1e-5)
108+
summary_writer.add_video('train_clips/{:04d}'.format(example_idx), clip_for_display.unsqueeze(0), global_step)
109+
100110
# Epoch statistics
101111
epoch_duration = float(time.time() - epoch_start_time)
102112
epoch_avg_loss = np.mean(losses)
@@ -159,6 +169,16 @@ def validation_epoch(config, model, criterion, device, data_loader, epoch, summa
159169
step, steps_in_epoch, examples_per_second,
160170
accuracies[step], losses[step]))
161171

172+
if summary_writer and step == 0:
173+
# TensorboardX video summary
174+
for example_idx in range(4):
175+
clip_for_display = clips[example_idx].clone().cpu()
176+
min_val = float(clip_for_display.min())
177+
max_val = float(clip_for_display.max())
178+
clip_for_display.clamp_(min=min_val, max=max_val)
179+
clip_for_display.add_(-min_val).div_(max_val - min_val + 1e-5)
180+
summary_writer.add_video('validation_clips/{:04d}'.format(example_idx), clip_for_display.unsqueeze(0), epoch*steps_in_epoch)
181+
162182
# Epoch statistics
163183
epoch_duration = float(time.time() - epoch_start_time)
164184
epoch_avg_loss = np.mean(losses)

factory/data_factory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ def get_data_loaders(config, train_transforms, validation_transforms=None):
209209
if not config.no_eval and validation_transforms:
210210

211211
dataset_validation = get_validation_set(
212-
config, train_transforms['spatial'],
213-
train_transforms['temporal'], train_transforms['target'])
212+
config, validation_transforms['spatial'],
213+
validation_transforms['temporal'], validation_transforms['target'])
214214

215215
print('Found {} validation examples'.format(len(dataset_validation)))
216216

factory/model_factory.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ def get_model(config):
177177
print('Moving model to CUDA device...')
178178
# Move model to the GPU
179179
model = model.cuda()
180-
#model = nn.DataParallel(model, device_ids=None)
180+
181+
if config.model != 'i3d':
182+
model = nn.DataParallel(model, device_ids=None)
181183

182184
if config.checkpoint_path:
183185

@@ -194,6 +196,8 @@ def get_model(config):
194196

195197
# Setup finetuning layer for different number of classes
196198
# Note: the DataParallel adds 'module' dict to complicate things...
199+
print('Replacing model logits with {} output classes.'.format(config.finetune_num_classes))
200+
197201
if config.model == 'i3d':
198202
model.replace_logits(config.finetune_num_classes)
199203
elif config.model == 'densenet':
@@ -204,29 +208,12 @@ def get_model(config):
204208
model.module.fc = model.module.fc.cuda()
205209

206210
# Setup which layers to train
207-
finetune_criterion = config.finetune_prefixes if config.model == 'i3d' else config.finetune_begin_index
211+
assert config.model in ('i3d', 'resnet'), 'finetune params not implemented...'
212+
finetune_criterion = config.finetune_prefixes if config.model in ('i3d', 'resnet') else config.finetune_begin_index
208213
parameters_to_train = get_fine_tuning_parameters(model, finetune_criterion)
209214

210215
return model, parameters_to_train
211216
else:
212-
213-
if config.checkpoint_path:
214-
215-
print('Loading pretrained model {}'.format(config.checkpoint_path))
216-
assert os.path.isfile(config.checkpoint_path)
217-
218-
checkpoint = torch.load(config.checkpoint_path)
219-
model.load_state_dict(checkpoint['state_dict'])
220-
221-
if config.model == 'densenet':
222-
model.classifier = nn.Linear(model.classifier.in_features, config.finetune_num_classes)
223-
else:
224-
model.fc = nn.Linear(model.fc.in_features, config.finetune_num_classes)
225-
226-
# Setup which layers to train
227-
finetune_criterion = config.finetune_prefixes if config.model == 'i3d' else config.finetune_begin_index
228-
parameters_to_train = get_fine_tuning_parameters(model, finetune_criterion)
229-
230-
return model, parameters_to_train
217+
raise ValueError('CPU training not supported.')
231218

232219
return model, model.parameters()

models/densenet.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def get_fine_tuning_parameters(model, ft_begin_index):
197197

198198
assert isinstance(ft_begin_index, int)
199199
if ft_begin_index == 0:
200+
print('WARNING: training full network because --finetune_begin_index=0')
200201
return model.parameters()
201202

202203
ft_module_names = []
@@ -207,12 +208,20 @@ def get_fine_tuning_parameters(model, ft_begin_index):
207208
ft_module_names.append('classifier')
208209

209210
parameters = []
211+
param_names_to_finetune = []
212+
210213
for k, v in model.named_parameters():
211214
for ft_module in ft_module_names:
212215
if ft_module in k:
213216
parameters.append({'params': v})
217+
param_names_to_finetune.append(k)
214218
break
215219
else:
220+
param_names_to_finetune.append(k)
216221
parameters.append({'params': v, 'lr': 0.0})
217222

223+
for k, v in model.named_parameters():
224+
if k not in param_names_to_finetune:
225+
v.requires_grad = False
226+
218227
return parameters

models/i3d.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,6 @@ def trainable_params(self):
343343
return params
344344

345345
def replace_logits(self, num_classes, device='cuda:0'):
346-
print('Replacing I3D logits to {} output classes.'.format(num_classes))
347346
self._num_classes = num_classes
348347
self.layers['logits'] = Unit3D(
349348
in_channels=384+384+128+128, output_channels=num_classes,

models/resnet.py

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -193,28 +193,82 @@ def forward(self, x):
193193
##########################################################################################
194194
##########################################################################################
195195

196-
def get_fine_tuning_parameters(model, ft_begin_index):
197196

198-
assert isinstance(ft_begin_index, int)
199-
if ft_begin_index == 0:
197+
def get_fine_tuning_parameters(model, ft_prefixes):
198+
199+
assert isinstance(ft_prefixes, str)
200+
201+
if ft_prefixes == '':
202+
print('WARNING: training full network because --ft_predixes=None')
200203
return model.parameters()
201204

202-
ft_module_names = []
203-
for i in range(ft_begin_index, 5):
204-
ft_module_names.append('layer{}'.format(i))
205-
ft_module_names.append('fc')
205+
print('#'*60)
206+
print('Setting finetuning layer prefixes: {}'.format(ft_prefixes))
206207

208+
ft_prefixes = ft_prefixes.split(',')
207209
parameters = []
208-
for k, v in model.named_parameters():
209-
for ft_module in ft_module_names:
210-
if ft_module in k:
211-
parameters.append({'params': v})
212-
break
213-
else:
214-
parameters.append({'params': v, 'lr': 0.0})
210+
param_names = []
211+
for param_name, param in model.named_parameters():
212+
for prefix in ft_prefixes:
213+
if prefix in param_name:
214+
print(' Finetuning parameter: {}'.format(param_name))
215+
parameters.append({'params': param, 'name': param_name})
216+
param_names.append(param_name)
217+
218+
for param_name, param in model.named_parameters():
219+
if param_name not in param_names:
220+
# This sames a lot of GPU memory...
221+
print('disabling gradient for: {}'.format(param_name))
222+
param.requires_grad = False
215223

216224
return parameters
217225

226+
227+
228+
# def get_fine_tuning_parameters(model, ft_begin_index):
229+
#
230+
# assert isinstance(ft_begin_index, int)
231+
# if ft_begin_index == 0:
232+
# print('WARNING: training full network because --finetune_begin_index=0')
233+
# return model.parameters()
234+
#
235+
# for param_name, param in model.named_modules():
236+
# print(param_name)
237+
#
238+
#
239+
# ft_module_names = []
240+
# for i in range(ft_begin_index, 5):
241+
# ft_module_names.append('layer{}'.format(i))
242+
# ft_module_names.append('fc')
243+
#
244+
# print('Modules to finetune: {}'.format(ft_module_names))
245+
#
246+
# parameters = []
247+
# param_names_to_finetune = []
248+
# for k, v in model.named_parameters():
249+
# for ft_module in ft_module_names:
250+
# if ft_module in k:
251+
# parameters.append({'params': v, 'name': k})
252+
# param_names_to_finetune.append(k)
253+
# break
254+
# else:
255+
# parameters.append({'params': v, 'lr': 0.0, 'name': k})
256+
# param_names_to_finetune.append(k)
257+
#
258+
# # Disabling gradients for frozen weights (hacky...)
259+
# frozen_module_names = []
260+
# for i in range(0, ft_begin_index):
261+
# frozen_module_names.append('layer{}'.format(i))
262+
# for k, v in model.named_parameters():
263+
# for frozen_module in frozen_module_names:
264+
# if frozen_module in k:
265+
# print('disabling grad for: {}'.format(k))
266+
# v.requires_grad = False
267+
# model.module.conv1.requires_grad = False
268+
# model.module.bn1.requires_grad = False
269+
#
270+
# return parameters
271+
218272
##########################################################################################
219273
##########################################################################################
220274

models/resnext.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ def get_fine_tuning_parameters(model, ft_begin_index):
180180

181181
assert isinstance(ft_begin_index, int)
182182
if ft_begin_index == 0:
183+
print('WARNING: training full network because --finetune_begin_index=0')
183184
return model.parameters()
184185

185186
ft_module_names = []

0 commit comments

Comments
 (0)