diff --git a/codes/train.py b/codes/train.py index 1447236b..aed20554 100644 --- a/codes/train.py +++ b/codes/train.py @@ -186,9 +186,10 @@ class Trainer: #### training if self._profile: print("Update LR: %f" % (time() - _t)) - _t = time() + _t = time() self.model.feed_data(train_data, self.current_step) gradient_norms_dict = self.model.optimize_parameters(self.current_step, return_grad_norms=will_log) + iteration_rate = (time() - _t) / batch_size if self._profile: print("Model feed + step: %f" % (time() - _t)) _t = time() @@ -202,7 +203,8 @@ class Trainer: if will_log and self.rank <= 0: logs = {'step': self.current_step, 'samples': self.total_training_data_encountered, - 'megasamples': self.total_training_data_encountered / 1000000} + 'megasamples': self.total_training_data_encountered / 1000000, + 'iteration_rate': iteration_rate} logs.update(current_model_logs) if self.dataset_debugger is not None: logs.update(self.dataset_debugger.get_debugging_map())