add some comments, and fix use of g_val

2023-03-22 09:22:19 -07:00 · 2023-03-22 09:22:19 -07:00 · aa9b939edd
commit aa9b939edd
parent a43cd2008d
1 changed files with 3 additions and 1 deletions
--- a/csrc/kernels.cu
+++ b/csrc/kernels.cu
@ -1708,7 +1708,9 @@ kOptimizerStatic8bit1StateBlockwise(T* p, T* __restrict__ const g, unsigned char
 											s1_vals[j] = (s1_vals[j]*beta1) + g_val;
 										break;
 									case LION:
-                    g_vals[j] = lr*sgn(((float)s1_vals[j])*beta1 + ((1.0f-beta1)*((float)g_vals[j])));
+                    // here, using gvals[j] to store the gradient smoothed by beta1
+                    // then update the momentum state1, to make sure the order is correct
+                    g_vals[j] = lr*sgn(((float)s1_vals[j])*beta1 + ((1.0f-beta1)*g_val));
 										s1_vals[j] = s1_vals[j]*beta2 + ((1.0f-beta2)*g_val);
 										break;
 									case RMSPROP: