{ "best_metric": null, "best_model_checkpoint": null, "epoch": 400.0, "eval_steps": 200.0, "global_step": 6800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.058823529411764705, "grad_norm": 51.63768768310547, "learning_rate": 9.803921568627452e-08, "loss": 6.8164, "step": 1 }, { "epoch": 0.11764705882352941, "grad_norm": 53.6297607421875, "learning_rate": 1.9607843137254904e-07, "loss": 6.584, "step": 2 }, { "epoch": 0.17647058823529413, "grad_norm": 52.695533752441406, "learning_rate": 2.9411764705882356e-07, "loss": 6.6211, "step": 3 }, { "epoch": 0.23529411764705882, "grad_norm": 48.62294006347656, "learning_rate": 3.921568627450981e-07, "loss": 6.8555, "step": 4 }, { "epoch": 0.29411764705882354, "grad_norm": 58.69572067260742, "learning_rate": 4.901960784313725e-07, "loss": 6.8301, "step": 5 }, { "epoch": 0.35294117647058826, "grad_norm": 63.30489730834961, "learning_rate": 5.882352941176471e-07, "loss": 6.9004, "step": 6 }, { "epoch": 0.4117647058823529, "grad_norm": 53.24351501464844, "learning_rate": 6.862745098039217e-07, "loss": 6.7266, "step": 7 }, { "epoch": 0.47058823529411764, "grad_norm": 72.76138305664062, "learning_rate": 7.843137254901962e-07, "loss": 6.8281, "step": 8 }, { "epoch": 0.5294117647058824, "grad_norm": 83.228759765625, "learning_rate": 8.823529411764707e-07, "loss": 6.8496, "step": 9 }, { "epoch": 0.5882352941176471, "grad_norm": 79.47531127929688, "learning_rate": 9.80392156862745e-07, "loss": 6.8027, "step": 10 }, { "epoch": 0.6470588235294118, "grad_norm": 59.54706573486328, "learning_rate": 1.0784313725490197e-06, "loss": 6.6367, "step": 11 }, { "epoch": 0.7058823529411765, "grad_norm": 56.5369987487793, "learning_rate": 1.1764705882352942e-06, "loss": 6.666, "step": 12 }, { "epoch": 0.7647058823529411, "grad_norm": 63.2001953125, "learning_rate": 1.2745098039215686e-06, "loss": 6.7617, "step": 13 }, { "epoch": 0.8235294117647058, "grad_norm": 60.77675247192383, "learning_rate": 1.3725490196078434e-06, "loss": 6.6973, "step": 14 }, { "epoch": 0.8823529411764706, "grad_norm": 50.811553955078125, "learning_rate": 1.4705882352941177e-06, "loss": 6.4902, "step": 15 }, { "epoch": 0.9411764705882353, "grad_norm": 55.82879638671875, "learning_rate": 1.5686274509803923e-06, "loss": 6.5195, "step": 16 }, { "epoch": 1.0, "grad_norm": 49.42478942871094, "learning_rate": 1.6666666666666667e-06, "loss": 6.6152, "step": 17 }, { "epoch": 1.0588235294117647, "grad_norm": 37.404422760009766, "learning_rate": 1.7647058823529414e-06, "loss": 6.416, "step": 18 }, { "epoch": 1.1176470588235294, "grad_norm": 36.0550537109375, "learning_rate": 1.8627450980392158e-06, "loss": 6.2051, "step": 19 }, { "epoch": 1.1764705882352942, "grad_norm": 28.087570190429688, "learning_rate": 1.96078431372549e-06, "loss": 6.1074, "step": 20 }, { "epoch": 1.2352941176470589, "grad_norm": 23.727338790893555, "learning_rate": 2.058823529411765e-06, "loss": 6.3516, "step": 21 }, { "epoch": 1.2941176470588236, "grad_norm": 26.307571411132812, "learning_rate": 2.1568627450980393e-06, "loss": 6.0488, "step": 22 }, { "epoch": 1.3529411764705883, "grad_norm": 24.419462203979492, "learning_rate": 2.254901960784314e-06, "loss": 6.0879, "step": 23 }, { "epoch": 1.4117647058823528, "grad_norm": 21.19923210144043, "learning_rate": 2.3529411764705885e-06, "loss": 5.9668, "step": 24 }, { "epoch": 1.4705882352941178, "grad_norm": 22.968204498291016, "learning_rate": 2.450980392156863e-06, "loss": 5.7969, "step": 25 }, { "epoch": 1.5294117647058822, "grad_norm": 25.94234275817871, "learning_rate": 2.549019607843137e-06, "loss": 6.1016, "step": 26 }, { "epoch": 1.5882352941176472, "grad_norm": 19.601388931274414, "learning_rate": 2.647058823529412e-06, "loss": 6.0137, "step": 27 }, { "epoch": 1.6470588235294117, "grad_norm": 16.89910888671875, "learning_rate": 2.7450980392156867e-06, "loss": 5.6758, "step": 28 }, { "epoch": 1.7058823529411766, "grad_norm": 17.549100875854492, "learning_rate": 2.843137254901961e-06, "loss": 5.543, "step": 29 }, { "epoch": 1.7647058823529411, "grad_norm": 15.629093170166016, "learning_rate": 2.9411764705882355e-06, "loss": 5.3574, "step": 30 }, { "epoch": 1.8235294117647058, "grad_norm": 15.545809745788574, "learning_rate": 3.03921568627451e-06, "loss": 5.4551, "step": 31 }, { "epoch": 1.8823529411764706, "grad_norm": 18.034942626953125, "learning_rate": 3.1372549019607846e-06, "loss": 5.4473, "step": 32 }, { "epoch": 1.9411764705882353, "grad_norm": 18.402727127075195, "learning_rate": 3.2352941176470594e-06, "loss": 5.3789, "step": 33 }, { "epoch": 2.0, "grad_norm": 15.360274314880371, "learning_rate": 3.3333333333333333e-06, "loss": 5.2188, "step": 34 }, { "epoch": 2.0588235294117645, "grad_norm": 12.713061332702637, "learning_rate": 3.431372549019608e-06, "loss": 5.3633, "step": 35 }, { "epoch": 2.1176470588235294, "grad_norm": 13.014906883239746, "learning_rate": 3.529411764705883e-06, "loss": 5.2637, "step": 36 }, { "epoch": 2.176470588235294, "grad_norm": 13.670853614807129, "learning_rate": 3.6274509803921573e-06, "loss": 5.1895, "step": 37 }, { "epoch": 2.235294117647059, "grad_norm": 11.823801040649414, "learning_rate": 3.7254901960784316e-06, "loss": 5.1836, "step": 38 }, { "epoch": 2.2941176470588234, "grad_norm": 11.235871315002441, "learning_rate": 3.8235294117647055e-06, "loss": 4.9902, "step": 39 }, { "epoch": 2.3529411764705883, "grad_norm": 14.860069274902344, "learning_rate": 3.92156862745098e-06, "loss": 4.9062, "step": 40 }, { "epoch": 2.411764705882353, "grad_norm": 12.556838989257812, "learning_rate": 4.019607843137255e-06, "loss": 4.9043, "step": 41 }, { "epoch": 2.4705882352941178, "grad_norm": 10.711292266845703, "learning_rate": 4.11764705882353e-06, "loss": 4.8418, "step": 42 }, { "epoch": 2.5294117647058822, "grad_norm": 13.674238204956055, "learning_rate": 4.215686274509805e-06, "loss": 4.8301, "step": 43 }, { "epoch": 2.588235294117647, "grad_norm": 15.542411804199219, "learning_rate": 4.313725490196079e-06, "loss": 4.7988, "step": 44 }, { "epoch": 2.6470588235294117, "grad_norm": 13.242746353149414, "learning_rate": 4.411764705882353e-06, "loss": 4.8672, "step": 45 }, { "epoch": 2.7058823529411766, "grad_norm": 10.837019920349121, "learning_rate": 4.509803921568628e-06, "loss": 4.7969, "step": 46 }, { "epoch": 2.764705882352941, "grad_norm": 10.251729965209961, "learning_rate": 4.607843137254902e-06, "loss": 4.5977, "step": 47 }, { "epoch": 2.8235294117647056, "grad_norm": 11.639378547668457, "learning_rate": 4.705882352941177e-06, "loss": 4.7783, "step": 48 }, { "epoch": 2.8823529411764706, "grad_norm": 12.576868057250977, "learning_rate": 4.803921568627452e-06, "loss": 4.6562, "step": 49 }, { "epoch": 2.9411764705882355, "grad_norm": 10.43549919128418, "learning_rate": 4.901960784313726e-06, "loss": 4.6553, "step": 50 }, { "epoch": 3.0, "grad_norm": 10.10483169555664, "learning_rate": 5e-06, "loss": 4.6523, "step": 51 }, { "epoch": 3.0588235294117645, "grad_norm": 11.683320045471191, "learning_rate": 5.098039215686274e-06, "loss": 4.7109, "step": 52 }, { "epoch": 3.1176470588235294, "grad_norm": 16.418821334838867, "learning_rate": 5.19607843137255e-06, "loss": 4.5078, "step": 53 }, { "epoch": 3.176470588235294, "grad_norm": 10.852066993713379, "learning_rate": 5.294117647058824e-06, "loss": 4.4746, "step": 54 }, { "epoch": 3.235294117647059, "grad_norm": 13.366440773010254, "learning_rate": 5.392156862745098e-06, "loss": 4.5391, "step": 55 }, { "epoch": 3.2941176470588234, "grad_norm": 20.226062774658203, "learning_rate": 5.4901960784313735e-06, "loss": 4.3613, "step": 56 }, { "epoch": 3.3529411764705883, "grad_norm": 12.828580856323242, "learning_rate": 5.588235294117647e-06, "loss": 4.3301, "step": 57 }, { "epoch": 3.411764705882353, "grad_norm": 10.56985855102539, "learning_rate": 5.686274509803922e-06, "loss": 4.4297, "step": 58 }, { "epoch": 3.4705882352941178, "grad_norm": 11.738313674926758, "learning_rate": 5.784313725490197e-06, "loss": 4.1553, "step": 59 }, { "epoch": 3.5294117647058822, "grad_norm": 11.248602867126465, "learning_rate": 5.882352941176471e-06, "loss": 4.3076, "step": 60 }, { "epoch": 3.588235294117647, "grad_norm": 11.386012077331543, "learning_rate": 5.980392156862746e-06, "loss": 4.2568, "step": 61 }, { "epoch": 3.6470588235294117, "grad_norm": 18.26079559326172, "learning_rate": 6.07843137254902e-06, "loss": 4.1689, "step": 62 }, { "epoch": 3.7058823529411766, "grad_norm": 11.267820358276367, "learning_rate": 6.176470588235295e-06, "loss": 4.3125, "step": 63 }, { "epoch": 3.764705882352941, "grad_norm": 11.417197227478027, "learning_rate": 6.274509803921569e-06, "loss": 4.1113, "step": 64 }, { "epoch": 3.8235294117647056, "grad_norm": 10.317519187927246, "learning_rate": 6.372549019607843e-06, "loss": 4.0879, "step": 65 }, { "epoch": 3.8823529411764706, "grad_norm": 10.77283000946045, "learning_rate": 6.470588235294119e-06, "loss": 4.0312, "step": 66 }, { "epoch": 3.9411764705882355, "grad_norm": 10.12507438659668, "learning_rate": 6.568627450980393e-06, "loss": 4.0195, "step": 67 }, { "epoch": 4.0, "grad_norm": 12.873411178588867, "learning_rate": 6.666666666666667e-06, "loss": 3.9814, "step": 68 }, { "epoch": 4.0588235294117645, "grad_norm": 12.144936561584473, "learning_rate": 6.764705882352942e-06, "loss": 3.9248, "step": 69 }, { "epoch": 4.117647058823529, "grad_norm": 12.12427806854248, "learning_rate": 6.862745098039216e-06, "loss": 3.8066, "step": 70 }, { "epoch": 4.176470588235294, "grad_norm": 12.182165145874023, "learning_rate": 6.96078431372549e-06, "loss": 3.7109, "step": 71 }, { "epoch": 4.235294117647059, "grad_norm": 10.87298583984375, "learning_rate": 7.058823529411766e-06, "loss": 3.8291, "step": 72 }, { "epoch": 4.294117647058823, "grad_norm": 12.313884735107422, "learning_rate": 7.15686274509804e-06, "loss": 3.7949, "step": 73 }, { "epoch": 4.352941176470588, "grad_norm": 11.95705795288086, "learning_rate": 7.2549019607843145e-06, "loss": 3.8418, "step": 74 }, { "epoch": 4.411764705882353, "grad_norm": 9.335436820983887, "learning_rate": 7.352941176470589e-06, "loss": 3.7139, "step": 75 }, { "epoch": 4.470588235294118, "grad_norm": 10.223672866821289, "learning_rate": 7.450980392156863e-06, "loss": 3.7773, "step": 76 }, { "epoch": 4.529411764705882, "grad_norm": 12.767525672912598, "learning_rate": 7.549019607843138e-06, "loss": 3.7275, "step": 77 }, { "epoch": 4.588235294117647, "grad_norm": 10.596083641052246, "learning_rate": 7.647058823529411e-06, "loss": 3.5977, "step": 78 }, { "epoch": 4.647058823529412, "grad_norm": 11.530255317687988, "learning_rate": 7.745098039215687e-06, "loss": 3.5322, "step": 79 }, { "epoch": 4.705882352941177, "grad_norm": 10.752220153808594, "learning_rate": 7.84313725490196e-06, "loss": 3.6074, "step": 80 }, { "epoch": 4.764705882352941, "grad_norm": 14.33499813079834, "learning_rate": 7.941176470588236e-06, "loss": 3.5781, "step": 81 }, { "epoch": 4.823529411764706, "grad_norm": 11.895805358886719, "learning_rate": 8.03921568627451e-06, "loss": 3.6445, "step": 82 }, { "epoch": 4.882352941176471, "grad_norm": 11.475525856018066, "learning_rate": 8.137254901960784e-06, "loss": 3.6309, "step": 83 }, { "epoch": 4.9411764705882355, "grad_norm": 10.733941078186035, "learning_rate": 8.23529411764706e-06, "loss": 3.5254, "step": 84 }, { "epoch": 5.0, "grad_norm": 9.971414566040039, "learning_rate": 8.333333333333334e-06, "loss": 3.4873, "step": 85 }, { "epoch": 5.0588235294117645, "grad_norm": 11.416383743286133, "learning_rate": 8.43137254901961e-06, "loss": 3.3916, "step": 86 }, { "epoch": 5.117647058823529, "grad_norm": 10.809455871582031, "learning_rate": 8.529411764705883e-06, "loss": 3.4766, "step": 87 }, { "epoch": 5.176470588235294, "grad_norm": 11.244071006774902, "learning_rate": 8.627450980392157e-06, "loss": 3.4004, "step": 88 }, { "epoch": 5.235294117647059, "grad_norm": 10.760682106018066, "learning_rate": 8.725490196078433e-06, "loss": 3.5518, "step": 89 }, { "epoch": 5.294117647058823, "grad_norm": 11.62124252319336, "learning_rate": 8.823529411764707e-06, "loss": 3.3525, "step": 90 }, { "epoch": 5.352941176470588, "grad_norm": 11.17910099029541, "learning_rate": 8.921568627450982e-06, "loss": 3.4873, "step": 91 }, { "epoch": 5.411764705882353, "grad_norm": 10.203570365905762, "learning_rate": 9.019607843137256e-06, "loss": 3.292, "step": 92 }, { "epoch": 5.470588235294118, "grad_norm": 9.820199012756348, "learning_rate": 9.11764705882353e-06, "loss": 3.29, "step": 93 }, { "epoch": 5.529411764705882, "grad_norm": 10.270584106445312, "learning_rate": 9.215686274509804e-06, "loss": 3.332, "step": 94 }, { "epoch": 5.588235294117647, "grad_norm": 11.277420043945312, "learning_rate": 9.31372549019608e-06, "loss": 3.3047, "step": 95 }, { "epoch": 5.647058823529412, "grad_norm": 11.050536155700684, "learning_rate": 9.411764705882354e-06, "loss": 3.2559, "step": 96 }, { "epoch": 5.705882352941177, "grad_norm": 9.607011795043945, "learning_rate": 9.509803921568628e-06, "loss": 3.291, "step": 97 }, { "epoch": 5.764705882352941, "grad_norm": 9.275640487670898, "learning_rate": 9.607843137254903e-06, "loss": 3.3125, "step": 98 }, { "epoch": 5.823529411764706, "grad_norm": 9.94192886352539, "learning_rate": 9.705882352941177e-06, "loss": 3.3604, "step": 99 }, { "epoch": 5.882352941176471, "grad_norm": 11.351509094238281, "learning_rate": 9.803921568627451e-06, "loss": 3.2002, "step": 100 }, { "epoch": 5.9411764705882355, "grad_norm": 10.830763816833496, "learning_rate": 9.901960784313727e-06, "loss": 3.1992, "step": 101 }, { "epoch": 6.0, "grad_norm": 11.266824722290039, "learning_rate": 1e-05, "loss": 3.2236, "step": 102 }, { "epoch": 6.0588235294117645, "grad_norm": 10.818594932556152, "learning_rate": 1.0098039215686275e-05, "loss": 3.0488, "step": 103 }, { "epoch": 6.117647058823529, "grad_norm": 11.288885116577148, "learning_rate": 1.0196078431372549e-05, "loss": 3.2051, "step": 104 }, { "epoch": 6.176470588235294, "grad_norm": 9.862954139709473, "learning_rate": 1.0294117647058823e-05, "loss": 3.1299, "step": 105 }, { "epoch": 6.235294117647059, "grad_norm": 12.023382186889648, "learning_rate": 1.03921568627451e-05, "loss": 3.2109, "step": 106 }, { "epoch": 6.294117647058823, "grad_norm": 10.659722328186035, "learning_rate": 1.0490196078431374e-05, "loss": 3.0557, "step": 107 }, { "epoch": 6.352941176470588, "grad_norm": 9.081803321838379, "learning_rate": 1.0588235294117648e-05, "loss": 3.2344, "step": 108 }, { "epoch": 6.411764705882353, "grad_norm": 14.717130661010742, "learning_rate": 1.0686274509803922e-05, "loss": 3.1963, "step": 109 }, { "epoch": 6.470588235294118, "grad_norm": 10.27243423461914, "learning_rate": 1.0784313725490196e-05, "loss": 3.1318, "step": 110 }, { "epoch": 6.529411764705882, "grad_norm": 12.24393081665039, "learning_rate": 1.0882352941176471e-05, "loss": 3.1377, "step": 111 }, { "epoch": 6.588235294117647, "grad_norm": 11.39543342590332, "learning_rate": 1.0980392156862747e-05, "loss": 3.1211, "step": 112 }, { "epoch": 6.647058823529412, "grad_norm": 12.057255744934082, "learning_rate": 1.1078431372549021e-05, "loss": 3.083, "step": 113 }, { "epoch": 6.705882352941177, "grad_norm": 12.226395606994629, "learning_rate": 1.1176470588235295e-05, "loss": 3.1016, "step": 114 }, { "epoch": 6.764705882352941, "grad_norm": 11.01719856262207, "learning_rate": 1.1274509803921569e-05, "loss": 2.9482, "step": 115 }, { "epoch": 6.823529411764706, "grad_norm": 9.661253929138184, "learning_rate": 1.1372549019607844e-05, "loss": 3.0605, "step": 116 }, { "epoch": 6.882352941176471, "grad_norm": 12.23996639251709, "learning_rate": 1.1470588235294118e-05, "loss": 3.1504, "step": 117 }, { "epoch": 6.9411764705882355, "grad_norm": 15.322983741760254, "learning_rate": 1.1568627450980394e-05, "loss": 3.0283, "step": 118 }, { "epoch": 7.0, "grad_norm": 10.78109073638916, "learning_rate": 1.1666666666666668e-05, "loss": 3.0293, "step": 119 }, { "epoch": 7.0588235294117645, "grad_norm": 10.770563125610352, "learning_rate": 1.1764705882352942e-05, "loss": 2.9805, "step": 120 }, { "epoch": 7.117647058823529, "grad_norm": 11.851059913635254, "learning_rate": 1.1862745098039217e-05, "loss": 2.96, "step": 121 }, { "epoch": 7.176470588235294, "grad_norm": 12.949153900146484, "learning_rate": 1.1960784313725491e-05, "loss": 3.0869, "step": 122 }, { "epoch": 7.235294117647059, "grad_norm": 16.55706024169922, "learning_rate": 1.2058823529411765e-05, "loss": 2.9971, "step": 123 }, { "epoch": 7.294117647058823, "grad_norm": 15.319601058959961, "learning_rate": 1.215686274509804e-05, "loss": 2.8789, "step": 124 }, { "epoch": 7.352941176470588, "grad_norm": 12.268057823181152, "learning_rate": 1.2254901960784315e-05, "loss": 2.8623, "step": 125 }, { "epoch": 7.411764705882353, "grad_norm": 8.98075008392334, "learning_rate": 1.235294117647059e-05, "loss": 2.9385, "step": 126 }, { "epoch": 7.470588235294118, "grad_norm": 11.286323547363281, "learning_rate": 1.2450980392156864e-05, "loss": 3.0049, "step": 127 }, { "epoch": 7.529411764705882, "grad_norm": 11.966099739074707, "learning_rate": 1.2549019607843138e-05, "loss": 2.9297, "step": 128 }, { "epoch": 7.588235294117647, "grad_norm": 12.40942096710205, "learning_rate": 1.2647058823529412e-05, "loss": 2.917, "step": 129 }, { "epoch": 7.647058823529412, "grad_norm": 11.721186637878418, "learning_rate": 1.2745098039215686e-05, "loss": 2.9873, "step": 130 }, { "epoch": 7.705882352941177, "grad_norm": 19.976049423217773, "learning_rate": 1.2843137254901964e-05, "loss": 3.0518, "step": 131 }, { "epoch": 7.764705882352941, "grad_norm": 14.835821151733398, "learning_rate": 1.2941176470588238e-05, "loss": 2.9756, "step": 132 }, { "epoch": 7.823529411764706, "grad_norm": 25.713090896606445, "learning_rate": 1.3039215686274511e-05, "loss": 2.8232, "step": 133 }, { "epoch": 7.882352941176471, "grad_norm": 10.94117259979248, "learning_rate": 1.3137254901960785e-05, "loss": 2.8809, "step": 134 }, { "epoch": 7.9411764705882355, "grad_norm": 12.795940399169922, "learning_rate": 1.323529411764706e-05, "loss": 2.8711, "step": 135 }, { "epoch": 8.0, "grad_norm": 13.388744354248047, "learning_rate": 1.3333333333333333e-05, "loss": 2.8516, "step": 136 }, { "epoch": 8.058823529411764, "grad_norm": 14.354214668273926, "learning_rate": 1.3431372549019607e-05, "loss": 2.751, "step": 137 }, { "epoch": 8.117647058823529, "grad_norm": 20.77850341796875, "learning_rate": 1.3529411764705885e-05, "loss": 2.8936, "step": 138 }, { "epoch": 8.176470588235293, "grad_norm": 15.04226303100586, "learning_rate": 1.3627450980392158e-05, "loss": 2.8838, "step": 139 }, { "epoch": 8.235294117647058, "grad_norm": 11.744118690490723, "learning_rate": 1.3725490196078432e-05, "loss": 2.9014, "step": 140 }, { "epoch": 8.294117647058824, "grad_norm": 14.26008415222168, "learning_rate": 1.3823529411764706e-05, "loss": 2.7432, "step": 141 }, { "epoch": 8.352941176470589, "grad_norm": 11.286965370178223, "learning_rate": 1.392156862745098e-05, "loss": 2.6846, "step": 142 }, { "epoch": 8.411764705882353, "grad_norm": 12.249191284179688, "learning_rate": 1.4019607843137256e-05, "loss": 2.7988, "step": 143 }, { "epoch": 8.470588235294118, "grad_norm": 14.083580017089844, "learning_rate": 1.4117647058823532e-05, "loss": 2.8486, "step": 144 }, { "epoch": 8.529411764705882, "grad_norm": 10.49722957611084, "learning_rate": 1.4215686274509805e-05, "loss": 2.9395, "step": 145 }, { "epoch": 8.588235294117647, "grad_norm": 17.97847557067871, "learning_rate": 1.431372549019608e-05, "loss": 2.8291, "step": 146 }, { "epoch": 8.647058823529411, "grad_norm": 11.866440773010254, "learning_rate": 1.4411764705882353e-05, "loss": 2.7949, "step": 147 }, { "epoch": 8.705882352941176, "grad_norm": 15.376677513122559, "learning_rate": 1.4509803921568629e-05, "loss": 2.7393, "step": 148 }, { "epoch": 8.764705882352942, "grad_norm": 11.954102516174316, "learning_rate": 1.4607843137254903e-05, "loss": 2.8584, "step": 149 }, { "epoch": 8.823529411764707, "grad_norm": 13.41263198852539, "learning_rate": 1.4705882352941179e-05, "loss": 2.7344, "step": 150 }, { "epoch": 8.882352941176471, "grad_norm": 14.156145095825195, "learning_rate": 1.4803921568627453e-05, "loss": 2.6543, "step": 151 }, { "epoch": 8.941176470588236, "grad_norm": 14.093667984008789, "learning_rate": 1.4901960784313726e-05, "loss": 2.6406, "step": 152 }, { "epoch": 9.0, "grad_norm": 13.45874309539795, "learning_rate": 1.5000000000000002e-05, "loss": 2.5996, "step": 153 }, { "epoch": 9.058823529411764, "grad_norm": 14.479905128479004, "learning_rate": 1.5098039215686276e-05, "loss": 2.7339, "step": 154 }, { "epoch": 9.117647058823529, "grad_norm": 18.811214447021484, "learning_rate": 1.519607843137255e-05, "loss": 2.667, "step": 155 }, { "epoch": 9.176470588235293, "grad_norm": 15.144575119018555, "learning_rate": 1.5294117647058822e-05, "loss": 2.7031, "step": 156 }, { "epoch": 9.235294117647058, "grad_norm": 16.8763370513916, "learning_rate": 1.53921568627451e-05, "loss": 2.7295, "step": 157 }, { "epoch": 9.294117647058824, "grad_norm": 15.400444030761719, "learning_rate": 1.5490196078431373e-05, "loss": 2.7329, "step": 158 }, { "epoch": 9.352941176470589, "grad_norm": 21.350221633911133, "learning_rate": 1.558823529411765e-05, "loss": 2.6338, "step": 159 }, { "epoch": 9.411764705882353, "grad_norm": 16.61676788330078, "learning_rate": 1.568627450980392e-05, "loss": 2.5488, "step": 160 }, { "epoch": 9.470588235294118, "grad_norm": 15.97496509552002, "learning_rate": 1.5784313725490197e-05, "loss": 2.707, "step": 161 }, { "epoch": 9.529411764705882, "grad_norm": 17.508052825927734, "learning_rate": 1.5882352941176473e-05, "loss": 2.7515, "step": 162 }, { "epoch": 9.588235294117647, "grad_norm": 18.627103805541992, "learning_rate": 1.5980392156862748e-05, "loss": 2.5576, "step": 163 }, { "epoch": 9.647058823529411, "grad_norm": 14.976521492004395, "learning_rate": 1.607843137254902e-05, "loss": 2.6875, "step": 164 }, { "epoch": 9.705882352941176, "grad_norm": 12.28699779510498, "learning_rate": 1.6176470588235296e-05, "loss": 2.6855, "step": 165 }, { "epoch": 9.764705882352942, "grad_norm": 12.343031883239746, "learning_rate": 1.627450980392157e-05, "loss": 2.6553, "step": 166 }, { "epoch": 9.823529411764707, "grad_norm": 11.254836082458496, "learning_rate": 1.6372549019607844e-05, "loss": 2.5903, "step": 167 }, { "epoch": 9.882352941176471, "grad_norm": 11.342394828796387, "learning_rate": 1.647058823529412e-05, "loss": 2.4956, "step": 168 }, { "epoch": 9.941176470588236, "grad_norm": 13.43175220489502, "learning_rate": 1.6568627450980395e-05, "loss": 2.5439, "step": 169 }, { "epoch": 10.0, "grad_norm": 12.408002853393555, "learning_rate": 1.6666666666666667e-05, "loss": 2.5327, "step": 170 }, { "epoch": 10.058823529411764, "grad_norm": 14.573883056640625, "learning_rate": 1.6764705882352943e-05, "loss": 2.4771, "step": 171 }, { "epoch": 10.117647058823529, "grad_norm": 16.410829544067383, "learning_rate": 1.686274509803922e-05, "loss": 2.4985, "step": 172 }, { "epoch": 10.176470588235293, "grad_norm": 14.946598052978516, "learning_rate": 1.696078431372549e-05, "loss": 2.3813, "step": 173 }, { "epoch": 10.235294117647058, "grad_norm": 11.938079833984375, "learning_rate": 1.7058823529411767e-05, "loss": 2.5244, "step": 174 }, { "epoch": 10.294117647058824, "grad_norm": 11.429203033447266, "learning_rate": 1.715686274509804e-05, "loss": 2.4565, "step": 175 }, { "epoch": 10.352941176470589, "grad_norm": 12.640714645385742, "learning_rate": 1.7254901960784314e-05, "loss": 2.4956, "step": 176 }, { "epoch": 10.411764705882353, "grad_norm": 12.697708129882812, "learning_rate": 1.735294117647059e-05, "loss": 2.6064, "step": 177 }, { "epoch": 10.470588235294118, "grad_norm": 14.531661987304688, "learning_rate": 1.7450980392156866e-05, "loss": 2.6299, "step": 178 }, { "epoch": 10.529411764705882, "grad_norm": 18.739961624145508, "learning_rate": 1.7549019607843138e-05, "loss": 2.4282, "step": 179 }, { "epoch": 10.588235294117647, "grad_norm": 14.924627304077148, "learning_rate": 1.7647058823529414e-05, "loss": 2.5732, "step": 180 }, { "epoch": 10.647058823529411, "grad_norm": 17.789443969726562, "learning_rate": 1.7745098039215686e-05, "loss": 2.5488, "step": 181 }, { "epoch": 10.705882352941176, "grad_norm": 17.27865219116211, "learning_rate": 1.7843137254901965e-05, "loss": 2.4316, "step": 182 }, { "epoch": 10.764705882352942, "grad_norm": 18.87969398498535, "learning_rate": 1.7941176470588237e-05, "loss": 2.5283, "step": 183 }, { "epoch": 10.823529411764707, "grad_norm": 15.205449104309082, "learning_rate": 1.8039215686274513e-05, "loss": 2.502, "step": 184 }, { "epoch": 10.882352941176471, "grad_norm": 18.467741012573242, "learning_rate": 1.8137254901960785e-05, "loss": 2.4414, "step": 185 }, { "epoch": 10.941176470588236, "grad_norm": 13.336495399475098, "learning_rate": 1.823529411764706e-05, "loss": 2.3643, "step": 186 }, { "epoch": 11.0, "grad_norm": 13.050034523010254, "learning_rate": 1.8333333333333333e-05, "loss": 2.312, "step": 187 }, { "epoch": 11.058823529411764, "grad_norm": 11.88442325592041, "learning_rate": 1.843137254901961e-05, "loss": 2.2363, "step": 188 }, { "epoch": 11.117647058823529, "grad_norm": 21.228031158447266, "learning_rate": 1.8529411764705884e-05, "loss": 2.4487, "step": 189 }, { "epoch": 11.176470588235293, "grad_norm": 11.557737350463867, "learning_rate": 1.862745098039216e-05, "loss": 2.3486, "step": 190 }, { "epoch": 11.235294117647058, "grad_norm": 17.373249053955078, "learning_rate": 1.8725490196078432e-05, "loss": 2.4326, "step": 191 }, { "epoch": 11.294117647058824, "grad_norm": 14.251124382019043, "learning_rate": 1.8823529411764708e-05, "loss": 2.3242, "step": 192 }, { "epoch": 11.352941176470589, "grad_norm": 15.840014457702637, "learning_rate": 1.892156862745098e-05, "loss": 2.3091, "step": 193 }, { "epoch": 11.411764705882353, "grad_norm": 13.188559532165527, "learning_rate": 1.9019607843137255e-05, "loss": 2.4727, "step": 194 }, { "epoch": 11.470588235294118, "grad_norm": 10.975872993469238, "learning_rate": 1.911764705882353e-05, "loss": 2.2417, "step": 195 }, { "epoch": 11.529411764705882, "grad_norm": 12.124404907226562, "learning_rate": 1.9215686274509807e-05, "loss": 2.3999, "step": 196 }, { "epoch": 11.588235294117647, "grad_norm": 13.198169708251953, "learning_rate": 1.931372549019608e-05, "loss": 2.209, "step": 197 }, { "epoch": 11.647058823529411, "grad_norm": 13.107339859008789, "learning_rate": 1.9411764705882355e-05, "loss": 2.2832, "step": 198 }, { "epoch": 11.705882352941176, "grad_norm": 14.557199478149414, "learning_rate": 1.950980392156863e-05, "loss": 2.3354, "step": 199 }, { "epoch": 11.764705882352942, "grad_norm": 15.95510482788086, "learning_rate": 1.9607843137254903e-05, "loss": 2.4004, "step": 200 }, { "epoch": 11.823529411764707, "grad_norm": 15.092738151550293, "learning_rate": 1.9705882352941178e-05, "loss": 2.3965, "step": 201 }, { "epoch": 11.882352941176471, "grad_norm": 13.062868118286133, "learning_rate": 1.9803921568627454e-05, "loss": 2.3994, "step": 202 }, { "epoch": 11.941176470588236, "grad_norm": 15.117156982421875, "learning_rate": 1.9901960784313726e-05, "loss": 2.2852, "step": 203 }, { "epoch": 12.0, "grad_norm": 11.884886741638184, "learning_rate": 2e-05, "loss": 2.2563, "step": 204 }, { "epoch": 12.058823529411764, "grad_norm": 10.823262214660645, "learning_rate": 1.999999886575089e-05, "loss": 2.2852, "step": 205 }, { "epoch": 12.117647058823529, "grad_norm": 12.69868278503418, "learning_rate": 1.9999995463003813e-05, "loss": 2.1143, "step": 206 }, { "epoch": 12.176470588235293, "grad_norm": 12.938948631286621, "learning_rate": 1.9999989791759546e-05, "loss": 2.1396, "step": 207 }, { "epoch": 12.235294117647058, "grad_norm": 14.284083366394043, "learning_rate": 1.9999981852019372e-05, "loss": 2.3076, "step": 208 }, { "epoch": 12.294117647058824, "grad_norm": 14.45789623260498, "learning_rate": 1.999997164378509e-05, "loss": 2.2451, "step": 209 }, { "epoch": 12.352941176470589, "grad_norm": 13.255826950073242, "learning_rate": 1.9999959167059024e-05, "loss": 2.04, "step": 210 }, { "epoch": 12.411764705882353, "grad_norm": 14.691210746765137, "learning_rate": 1.9999944421843992e-05, "loss": 2.2344, "step": 211 }, { "epoch": 12.470588235294118, "grad_norm": 13.276491165161133, "learning_rate": 1.9999927408143346e-05, "loss": 2.2534, "step": 212 }, { "epoch": 12.529411764705882, "grad_norm": 17.43175506591797, "learning_rate": 1.9999908125960948e-05, "loss": 2.2451, "step": 213 }, { "epoch": 12.588235294117647, "grad_norm": 16.476093292236328, "learning_rate": 1.9999886575301173e-05, "loss": 2.3696, "step": 214 }, { "epoch": 12.647058823529411, "grad_norm": 13.290589332580566, "learning_rate": 1.99998627561689e-05, "loss": 2.0254, "step": 215 }, { "epoch": 12.705882352941176, "grad_norm": 16.636625289916992, "learning_rate": 1.9999836668569542e-05, "loss": 2.1182, "step": 216 }, { "epoch": 12.764705882352942, "grad_norm": 16.673063278198242, "learning_rate": 1.9999808312509014e-05, "loss": 2.0918, "step": 217 }, { "epoch": 12.823529411764707, "grad_norm": 12.715124130249023, "learning_rate": 1.9999777687993748e-05, "loss": 2.2256, "step": 218 }, { "epoch": 12.882352941176471, "grad_norm": 12.848173141479492, "learning_rate": 1.999974479503069e-05, "loss": 2.1343, "step": 219 }, { "epoch": 12.941176470588236, "grad_norm": 14.96810531616211, "learning_rate": 1.9999709633627305e-05, "loss": 2.2568, "step": 220 }, { "epoch": 13.0, "grad_norm": 14.461625099182129, "learning_rate": 1.9999672203791564e-05, "loss": 2.1802, "step": 221 }, { "epoch": 13.058823529411764, "grad_norm": 11.544590950012207, "learning_rate": 1.999963250553196e-05, "loss": 2.2036, "step": 222 }, { "epoch": 13.117647058823529, "grad_norm": 12.375348091125488, "learning_rate": 1.9999590538857506e-05, "loss": 2.0088, "step": 223 }, { "epoch": 13.176470588235293, "grad_norm": 12.363651275634766, "learning_rate": 1.9999546303777714e-05, "loss": 1.9727, "step": 224 }, { "epoch": 13.235294117647058, "grad_norm": 13.442216873168945, "learning_rate": 1.999949980030262e-05, "loss": 2.2446, "step": 225 }, { "epoch": 13.294117647058824, "grad_norm": 13.868474960327148, "learning_rate": 1.9999451028442777e-05, "loss": 2.0117, "step": 226 }, { "epoch": 13.352941176470589, "grad_norm": 18.010934829711914, "learning_rate": 1.9999399988209244e-05, "loss": 1.9287, "step": 227 }, { "epoch": 13.411764705882353, "grad_norm": 16.336238861083984, "learning_rate": 1.9999346679613598e-05, "loss": 2.0278, "step": 228 }, { "epoch": 13.470588235294118, "grad_norm": 15.854435920715332, "learning_rate": 1.999929110266794e-05, "loss": 2.127, "step": 229 }, { "epoch": 13.529411764705882, "grad_norm": 16.545732498168945, "learning_rate": 1.999923325738487e-05, "loss": 2.0122, "step": 230 }, { "epoch": 13.588235294117647, "grad_norm": 15.338556289672852, "learning_rate": 1.9999173143777517e-05, "loss": 1.9951, "step": 231 }, { "epoch": 13.647058823529411, "grad_norm": 13.483538627624512, "learning_rate": 1.999911076185951e-05, "loss": 2.084, "step": 232 }, { "epoch": 13.705882352941176, "grad_norm": 14.507209777832031, "learning_rate": 1.9999046111645002e-05, "loss": 2.1553, "step": 233 }, { "epoch": 13.764705882352942, "grad_norm": 14.834870338439941, "learning_rate": 1.9998979193148668e-05, "loss": 1.9512, "step": 234 }, { "epoch": 13.823529411764707, "grad_norm": 14.536206245422363, "learning_rate": 1.999891000638568e-05, "loss": 1.959, "step": 235 }, { "epoch": 13.882352941176471, "grad_norm": 16.981624603271484, "learning_rate": 1.9998838551371733e-05, "loss": 1.9668, "step": 236 }, { "epoch": 13.941176470588236, "grad_norm": 13.704812049865723, "learning_rate": 1.9998764828123042e-05, "loss": 2.0808, "step": 237 }, { "epoch": 14.0, "grad_norm": 13.37675952911377, "learning_rate": 1.9998688836656322e-05, "loss": 1.8921, "step": 238 }, { "epoch": 14.058823529411764, "grad_norm": 12.974909782409668, "learning_rate": 1.999861057698882e-05, "loss": 1.8872, "step": 239 }, { "epoch": 14.117647058823529, "grad_norm": 14.376026153564453, "learning_rate": 1.9998530049138286e-05, "loss": 1.8843, "step": 240 }, { "epoch": 14.176470588235293, "grad_norm": 13.164725303649902, "learning_rate": 1.999844725312299e-05, "loss": 1.9429, "step": 241 }, { "epoch": 14.235294117647058, "grad_norm": 11.941705703735352, "learning_rate": 1.9998362188961708e-05, "loss": 1.8672, "step": 242 }, { "epoch": 14.294117647058824, "grad_norm": 14.394044876098633, "learning_rate": 1.9998274856673744e-05, "loss": 1.894, "step": 243 }, { "epoch": 14.352941176470589, "grad_norm": 10.736377716064453, "learning_rate": 1.9998185256278905e-05, "loss": 2.062, "step": 244 }, { "epoch": 14.411764705882353, "grad_norm": 11.28419017791748, "learning_rate": 1.999809338779752e-05, "loss": 1.8501, "step": 245 }, { "epoch": 14.470588235294118, "grad_norm": 11.78189468383789, "learning_rate": 1.9997999251250428e-05, "loss": 1.8691, "step": 246 }, { "epoch": 14.529411764705882, "grad_norm": 13.027113914489746, "learning_rate": 1.999790284665898e-05, "loss": 2.0859, "step": 247 }, { "epoch": 14.588235294117647, "grad_norm": 13.774324417114258, "learning_rate": 1.999780417404505e-05, "loss": 1.8989, "step": 248 }, { "epoch": 14.647058823529411, "grad_norm": 16.810232162475586, "learning_rate": 1.999770323343102e-05, "loss": 1.8486, "step": 249 }, { "epoch": 14.705882352941176, "grad_norm": 11.063785552978516, "learning_rate": 1.9997600024839793e-05, "loss": 2.0093, "step": 250 }, { "epoch": 14.764705882352942, "grad_norm": 13.596749305725098, "learning_rate": 1.9997494548294774e-05, "loss": 2.0405, "step": 251 }, { "epoch": 14.823529411764707, "grad_norm": 11.78172492980957, "learning_rate": 1.9997386803819892e-05, "loss": 1.9155, "step": 252 }, { "epoch": 14.882352941176471, "grad_norm": 13.585885047912598, "learning_rate": 1.9997276791439593e-05, "loss": 1.7529, "step": 253 }, { "epoch": 14.941176470588236, "grad_norm": 13.251992225646973, "learning_rate": 1.9997164511178834e-05, "loss": 1.8384, "step": 254 }, { "epoch": 15.0, "grad_norm": 12.57100772857666, "learning_rate": 1.999704996306308e-05, "loss": 1.8472, "step": 255 }, { "epoch": 15.058823529411764, "grad_norm": 13.918010711669922, "learning_rate": 1.9996933147118322e-05, "loss": 1.9292, "step": 256 }, { "epoch": 15.117647058823529, "grad_norm": 12.65568733215332, "learning_rate": 1.9996814063371058e-05, "loss": 1.791, "step": 257 }, { "epoch": 15.176470588235293, "grad_norm": 14.7772855758667, "learning_rate": 1.9996692711848297e-05, "loss": 1.79, "step": 258 }, { "epoch": 15.235294117647058, "grad_norm": 14.07065200805664, "learning_rate": 1.999656909257757e-05, "loss": 1.6777, "step": 259 }, { "epoch": 15.294117647058824, "grad_norm": 13.427538871765137, "learning_rate": 1.9996443205586926e-05, "loss": 1.8257, "step": 260 }, { "epoch": 15.352941176470589, "grad_norm": 15.106050491333008, "learning_rate": 1.9996315050904916e-05, "loss": 1.8867, "step": 261 }, { "epoch": 15.411764705882353, "grad_norm": 14.101187705993652, "learning_rate": 1.9996184628560615e-05, "loss": 1.6567, "step": 262 }, { "epoch": 15.470588235294118, "grad_norm": 11.546265602111816, "learning_rate": 1.9996051938583608e-05, "loss": 1.7356, "step": 263 }, { "epoch": 15.529411764705882, "grad_norm": 16.897659301757812, "learning_rate": 1.9995916981003995e-05, "loss": 1.7485, "step": 264 }, { "epoch": 15.588235294117647, "grad_norm": 19.297094345092773, "learning_rate": 1.9995779755852393e-05, "loss": 1.7441, "step": 265 }, { "epoch": 15.647058823529411, "grad_norm": 11.41850471496582, "learning_rate": 1.999564026315993e-05, "loss": 1.6831, "step": 266 }, { "epoch": 15.705882352941176, "grad_norm": 15.145292282104492, "learning_rate": 1.9995498502958254e-05, "loss": 1.8735, "step": 267 }, { "epoch": 15.764705882352942, "grad_norm": 12.099532127380371, "learning_rate": 1.9995354475279517e-05, "loss": 1.7715, "step": 268 }, { "epoch": 15.823529411764707, "grad_norm": 12.588370323181152, "learning_rate": 1.9995208180156393e-05, "loss": 1.8345, "step": 269 }, { "epoch": 15.882352941176471, "grad_norm": 21.68567657470703, "learning_rate": 1.9995059617622072e-05, "loss": 1.8359, "step": 270 }, { "epoch": 15.941176470588236, "grad_norm": 21.75311851501465, "learning_rate": 1.9994908787710252e-05, "loss": 1.688, "step": 271 }, { "epoch": 16.0, "grad_norm": 21.852815628051758, "learning_rate": 1.9994755690455154e-05, "loss": 1.8481, "step": 272 }, { "epoch": 16.058823529411764, "grad_norm": 21.176721572875977, "learning_rate": 1.9994600325891498e-05, "loss": 1.6592, "step": 273 }, { "epoch": 16.11764705882353, "grad_norm": 15.565642356872559, "learning_rate": 1.9994442694054543e-05, "loss": 1.5659, "step": 274 }, { "epoch": 16.176470588235293, "grad_norm": 14.27005672454834, "learning_rate": 1.9994282794980032e-05, "loss": 1.7336, "step": 275 }, { "epoch": 16.235294117647058, "grad_norm": 15.819165229797363, "learning_rate": 1.999412062870425e-05, "loss": 1.6655, "step": 276 }, { "epoch": 16.294117647058822, "grad_norm": 17.24355125427246, "learning_rate": 1.999395619526398e-05, "loss": 1.7432, "step": 277 }, { "epoch": 16.352941176470587, "grad_norm": 11.833491325378418, "learning_rate": 1.9993789494696526e-05, "loss": 1.6121, "step": 278 }, { "epoch": 16.41176470588235, "grad_norm": 16.953298568725586, "learning_rate": 1.9993620527039698e-05, "loss": 1.8169, "step": 279 }, { "epoch": 16.470588235294116, "grad_norm": 15.286576271057129, "learning_rate": 1.999344929233183e-05, "loss": 1.6069, "step": 280 }, { "epoch": 16.529411764705884, "grad_norm": 11.435842514038086, "learning_rate": 1.999327579061177e-05, "loss": 1.7305, "step": 281 }, { "epoch": 16.58823529411765, "grad_norm": 11.69426441192627, "learning_rate": 1.999310002191887e-05, "loss": 1.7156, "step": 282 }, { "epoch": 16.647058823529413, "grad_norm": 12.389381408691406, "learning_rate": 1.999292198629301e-05, "loss": 1.6584, "step": 283 }, { "epoch": 16.705882352941178, "grad_norm": 12.719099044799805, "learning_rate": 1.999274168377457e-05, "loss": 1.553, "step": 284 }, { "epoch": 16.764705882352942, "grad_norm": 14.520657539367676, "learning_rate": 1.999255911440446e-05, "loss": 1.6465, "step": 285 }, { "epoch": 16.823529411764707, "grad_norm": 13.950552940368652, "learning_rate": 1.9992374278224093e-05, "loss": 1.7378, "step": 286 }, { "epoch": 16.88235294117647, "grad_norm": 12.431747436523438, "learning_rate": 1.9992187175275392e-05, "loss": 1.6267, "step": 287 }, { "epoch": 16.941176470588236, "grad_norm": 14.061230659484863, "learning_rate": 1.999199780560081e-05, "loss": 1.5669, "step": 288 }, { "epoch": 17.0, "grad_norm": 13.04484748840332, "learning_rate": 1.9991806169243302e-05, "loss": 1.6753, "step": 289 }, { "epoch": 17.058823529411764, "grad_norm": 20.169523239135742, "learning_rate": 1.9991612266246338e-05, "loss": 1.6133, "step": 290 }, { "epoch": 17.11764705882353, "grad_norm": 13.820265769958496, "learning_rate": 1.9991416096653913e-05, "loss": 1.6147, "step": 291 }, { "epoch": 17.176470588235293, "grad_norm": 14.276350975036621, "learning_rate": 1.9991217660510518e-05, "loss": 1.7104, "step": 292 }, { "epoch": 17.235294117647058, "grad_norm": 11.697235107421875, "learning_rate": 1.9991016957861177e-05, "loss": 1.5879, "step": 293 }, { "epoch": 17.294117647058822, "grad_norm": 15.01540470123291, "learning_rate": 1.9990813988751413e-05, "loss": 1.4504, "step": 294 }, { "epoch": 17.352941176470587, "grad_norm": 13.68730640411377, "learning_rate": 1.9990608753227274e-05, "loss": 1.5842, "step": 295 }, { "epoch": 17.41176470588235, "grad_norm": 14.246511459350586, "learning_rate": 1.9990401251335313e-05, "loss": 1.479, "step": 296 }, { "epoch": 17.470588235294116, "grad_norm": 12.295178413391113, "learning_rate": 1.9990191483122605e-05, "loss": 1.4897, "step": 297 }, { "epoch": 17.529411764705884, "grad_norm": 12.429271697998047, "learning_rate": 1.9989979448636732e-05, "loss": 1.5071, "step": 298 }, { "epoch": 17.58823529411765, "grad_norm": 12.495901107788086, "learning_rate": 1.9989765147925804e-05, "loss": 1.5588, "step": 299 }, { "epoch": 17.647058823529413, "grad_norm": 13.559638023376465, "learning_rate": 1.9989548581038425e-05, "loss": 1.6187, "step": 300 }, { "epoch": 17.705882352941178, "grad_norm": 11.611136436462402, "learning_rate": 1.9989329748023728e-05, "loss": 1.5698, "step": 301 }, { "epoch": 17.764705882352942, "grad_norm": 11.063663482666016, "learning_rate": 1.998910864893135e-05, "loss": 1.6516, "step": 302 }, { "epoch": 17.823529411764707, "grad_norm": 13.846134185791016, "learning_rate": 1.9988885283811454e-05, "loss": 1.4236, "step": 303 }, { "epoch": 17.88235294117647, "grad_norm": 13.088607788085938, "learning_rate": 1.9988659652714704e-05, "loss": 1.6934, "step": 304 }, { "epoch": 17.941176470588236, "grad_norm": 13.29516315460205, "learning_rate": 1.998843175569229e-05, "loss": 1.657, "step": 305 }, { "epoch": 18.0, "grad_norm": 12.520183563232422, "learning_rate": 1.998820159279591e-05, "loss": 1.5022, "step": 306 }, { "epoch": 18.058823529411764, "grad_norm": 12.421347618103027, "learning_rate": 1.998796916407777e-05, "loss": 1.2981, "step": 307 }, { "epoch": 18.11764705882353, "grad_norm": 12.45820426940918, "learning_rate": 1.9987734469590605e-05, "loss": 1.6768, "step": 308 }, { "epoch": 18.176470588235293, "grad_norm": 12.358576774597168, "learning_rate": 1.9987497509387647e-05, "loss": 1.5652, "step": 309 }, { "epoch": 18.235294117647058, "grad_norm": 10.482483863830566, "learning_rate": 1.998725828352266e-05, "loss": 1.3344, "step": 310 }, { "epoch": 18.294117647058822, "grad_norm": 15.997809410095215, "learning_rate": 1.99870167920499e-05, "loss": 1.606, "step": 311 }, { "epoch": 18.352941176470587, "grad_norm": 11.957833290100098, "learning_rate": 1.9986773035024164e-05, "loss": 1.3264, "step": 312 }, { "epoch": 18.41176470588235, "grad_norm": 11.897160530090332, "learning_rate": 1.9986527012500736e-05, "loss": 1.5795, "step": 313 }, { "epoch": 18.470588235294116, "grad_norm": 11.501913070678711, "learning_rate": 1.9986278724535434e-05, "loss": 1.3929, "step": 314 }, { "epoch": 18.529411764705884, "grad_norm": 12.001955032348633, "learning_rate": 1.9986028171184574e-05, "loss": 1.446, "step": 315 }, { "epoch": 18.58823529411765, "grad_norm": 16.223054885864258, "learning_rate": 1.9985775352505e-05, "loss": 1.3689, "step": 316 }, { "epoch": 18.647058823529413, "grad_norm": 15.16612720489502, "learning_rate": 1.9985520268554068e-05, "loss": 1.3259, "step": 317 }, { "epoch": 18.705882352941178, "grad_norm": 15.817391395568848, "learning_rate": 1.9985262919389635e-05, "loss": 1.5105, "step": 318 }, { "epoch": 18.764705882352942, "grad_norm": 10.394479751586914, "learning_rate": 1.998500330507008e-05, "loss": 1.55, "step": 319 }, { "epoch": 18.823529411764707, "grad_norm": 12.577038764953613, "learning_rate": 1.9984741425654308e-05, "loss": 1.4501, "step": 320 }, { "epoch": 18.88235294117647, "grad_norm": 13.078625679016113, "learning_rate": 1.998447728120171e-05, "loss": 1.4355, "step": 321 }, { "epoch": 18.941176470588236, "grad_norm": 10.724760055541992, "learning_rate": 1.998421087177222e-05, "loss": 1.5815, "step": 322 }, { "epoch": 19.0, "grad_norm": 12.352790832519531, "learning_rate": 1.9983942197426272e-05, "loss": 1.2498, "step": 323 }, { "epoch": 19.058823529411764, "grad_norm": 14.190204620361328, "learning_rate": 1.9983671258224808e-05, "loss": 1.408, "step": 324 }, { "epoch": 19.11764705882353, "grad_norm": 11.368410110473633, "learning_rate": 1.9983398054229296e-05, "loss": 1.4183, "step": 325 }, { "epoch": 19.176470588235293, "grad_norm": 10.733102798461914, "learning_rate": 1.9983122585501708e-05, "loss": 1.2156, "step": 326 }, { "epoch": 19.235294117647058, "grad_norm": 13.40896987915039, "learning_rate": 1.9982844852104538e-05, "loss": 1.311, "step": 327 }, { "epoch": 19.294117647058822, "grad_norm": 17.589664459228516, "learning_rate": 1.9982564854100786e-05, "loss": 1.2715, "step": 328 }, { "epoch": 19.352941176470587, "grad_norm": 12.28177261352539, "learning_rate": 1.9982282591553974e-05, "loss": 1.2876, "step": 329 }, { "epoch": 19.41176470588235, "grad_norm": 12.943239212036133, "learning_rate": 1.9981998064528133e-05, "loss": 1.4236, "step": 330 }, { "epoch": 19.470588235294116, "grad_norm": 10.253822326660156, "learning_rate": 1.9981711273087802e-05, "loss": 1.5972, "step": 331 }, { "epoch": 19.529411764705884, "grad_norm": 15.528768539428711, "learning_rate": 1.998142221729804e-05, "loss": 1.4919, "step": 332 }, { "epoch": 19.58823529411765, "grad_norm": 12.978276252746582, "learning_rate": 1.998113089722443e-05, "loss": 1.3457, "step": 333 }, { "epoch": 19.647058823529413, "grad_norm": 11.223001480102539, "learning_rate": 1.9980837312933044e-05, "loss": 1.3945, "step": 334 }, { "epoch": 19.705882352941178, "grad_norm": 13.505941390991211, "learning_rate": 1.998054146449049e-05, "loss": 1.2375, "step": 335 }, { "epoch": 19.764705882352942, "grad_norm": 11.05418872833252, "learning_rate": 1.9980243351963887e-05, "loss": 1.3405, "step": 336 }, { "epoch": 19.823529411764707, "grad_norm": 10.88160228729248, "learning_rate": 1.9979942975420845e-05, "loss": 1.3569, "step": 337 }, { "epoch": 19.88235294117647, "grad_norm": 13.959572792053223, "learning_rate": 1.9979640334929516e-05, "loss": 1.3564, "step": 338 }, { "epoch": 19.941176470588236, "grad_norm": 10.794942855834961, "learning_rate": 1.9979335430558552e-05, "loss": 1.2727, "step": 339 }, { "epoch": 20.0, "grad_norm": 12.409996032714844, "learning_rate": 1.997902826237712e-05, "loss": 1.4034, "step": 340 }, { "epoch": 20.058823529411764, "grad_norm": 15.122093200683594, "learning_rate": 1.9978718830454898e-05, "loss": 1.3135, "step": 341 }, { "epoch": 20.11764705882353, "grad_norm": 12.474552154541016, "learning_rate": 1.9978407134862086e-05, "loss": 1.3555, "step": 342 }, { "epoch": 20.176470588235293, "grad_norm": 11.723274230957031, "learning_rate": 1.9978093175669387e-05, "loss": 1.0212, "step": 343 }, { "epoch": 20.235294117647058, "grad_norm": 15.4851713180542, "learning_rate": 1.997777695294803e-05, "loss": 1.2568, "step": 344 }, { "epoch": 20.294117647058822, "grad_norm": 13.079642295837402, "learning_rate": 1.997745846676974e-05, "loss": 1.0637, "step": 345 }, { "epoch": 20.352941176470587, "grad_norm": 10.898942947387695, "learning_rate": 1.9977137717206774e-05, "loss": 1.3628, "step": 346 }, { "epoch": 20.41176470588235, "grad_norm": 14.524182319641113, "learning_rate": 1.9976814704331887e-05, "loss": 1.1697, "step": 347 }, { "epoch": 20.470588235294116, "grad_norm": 13.013614654541016, "learning_rate": 1.997648942821836e-05, "loss": 1.2429, "step": 348 }, { "epoch": 20.529411764705884, "grad_norm": 14.292510986328125, "learning_rate": 1.9976161888939984e-05, "loss": 1.5352, "step": 349 }, { "epoch": 20.58823529411765, "grad_norm": 13.172117233276367, "learning_rate": 1.9975832086571053e-05, "loss": 1.145, "step": 350 }, { "epoch": 20.647058823529413, "grad_norm": 11.645720481872559, "learning_rate": 1.9975500021186387e-05, "loss": 1.3458, "step": 351 }, { "epoch": 20.705882352941178, "grad_norm": 13.429648399353027, "learning_rate": 1.9975165692861315e-05, "loss": 1.3508, "step": 352 }, { "epoch": 20.764705882352942, "grad_norm": 9.450648307800293, "learning_rate": 1.997482910167168e-05, "loss": 1.3591, "step": 353 }, { "epoch": 20.823529411764707, "grad_norm": 10.992084503173828, "learning_rate": 1.9974490247693832e-05, "loss": 1.3064, "step": 354 }, { "epoch": 20.88235294117647, "grad_norm": 13.32996654510498, "learning_rate": 1.997414913100465e-05, "loss": 1.2429, "step": 355 }, { "epoch": 20.941176470588236, "grad_norm": 11.370253562927246, "learning_rate": 1.997380575168151e-05, "loss": 1.0391, "step": 356 }, { "epoch": 21.0, "grad_norm": 9.864896774291992, "learning_rate": 1.9973460109802306e-05, "loss": 1.2563, "step": 357 }, { "epoch": 21.058823529411764, "grad_norm": 11.51588249206543, "learning_rate": 1.9973112205445452e-05, "loss": 1.3486, "step": 358 }, { "epoch": 21.11764705882353, "grad_norm": 11.28585433959961, "learning_rate": 1.9972762038689865e-05, "loss": 1.0596, "step": 359 }, { "epoch": 21.176470588235293, "grad_norm": 10.13066291809082, "learning_rate": 1.9972409609614986e-05, "loss": 1.407, "step": 360 }, { "epoch": 21.235294117647058, "grad_norm": 12.727922439575195, "learning_rate": 1.9972054918300757e-05, "loss": 1.2767, "step": 361 }, { "epoch": 21.294117647058822, "grad_norm": 13.442667961120605, "learning_rate": 1.9971697964827643e-05, "loss": 1.2253, "step": 362 }, { "epoch": 21.352941176470587, "grad_norm": 9.988303184509277, "learning_rate": 1.9971338749276618e-05, "loss": 1.0627, "step": 363 }, { "epoch": 21.41176470588235, "grad_norm": 13.16825008392334, "learning_rate": 1.9970977271729172e-05, "loss": 1.1302, "step": 364 }, { "epoch": 21.470588235294116, "grad_norm": 16.701465606689453, "learning_rate": 1.9970613532267305e-05, "loss": 1.1345, "step": 365 }, { "epoch": 21.529411764705884, "grad_norm": 13.941383361816406, "learning_rate": 1.997024753097353e-05, "loss": 1.1628, "step": 366 }, { "epoch": 21.58823529411765, "grad_norm": 11.873706817626953, "learning_rate": 1.9969879267930876e-05, "loss": 1.1028, "step": 367 }, { "epoch": 21.647058823529413, "grad_norm": 12.428821563720703, "learning_rate": 1.996950874322288e-05, "loss": 1.2887, "step": 368 }, { "epoch": 21.705882352941178, "grad_norm": 9.995906829833984, "learning_rate": 1.9969135956933603e-05, "loss": 1.1285, "step": 369 }, { "epoch": 21.764705882352942, "grad_norm": 10.408707618713379, "learning_rate": 1.9968760909147603e-05, "loss": 1.1982, "step": 370 }, { "epoch": 21.823529411764707, "grad_norm": 13.146899223327637, "learning_rate": 1.9968383599949964e-05, "loss": 0.9257, "step": 371 }, { "epoch": 21.88235294117647, "grad_norm": 9.925361633300781, "learning_rate": 1.9968004029426277e-05, "loss": 1.3275, "step": 372 }, { "epoch": 21.941176470588236, "grad_norm": 9.511711120605469, "learning_rate": 1.9967622197662648e-05, "loss": 1.019, "step": 373 }, { "epoch": 22.0, "grad_norm": 11.32809829711914, "learning_rate": 1.9967238104745695e-05, "loss": 1.3127, "step": 374 }, { "epoch": 22.058823529411764, "grad_norm": 8.903733253479004, "learning_rate": 1.9966851750762553e-05, "loss": 1.2256, "step": 375 }, { "epoch": 22.11764705882353, "grad_norm": 9.838861465454102, "learning_rate": 1.996646313580086e-05, "loss": 1.124, "step": 376 }, { "epoch": 22.176470588235293, "grad_norm": 9.71605396270752, "learning_rate": 1.9966072259948776e-05, "loss": 1.0092, "step": 377 }, { "epoch": 22.235294117647058, "grad_norm": 9.887864112854004, "learning_rate": 1.9965679123294977e-05, "loss": 1.0057, "step": 378 }, { "epoch": 22.294117647058822, "grad_norm": 9.346569061279297, "learning_rate": 1.9965283725928635e-05, "loss": 1.0779, "step": 379 }, { "epoch": 22.352941176470587, "grad_norm": 9.403078079223633, "learning_rate": 1.9964886067939453e-05, "loss": 1.1166, "step": 380 }, { "epoch": 22.41176470588235, "grad_norm": 10.20677375793457, "learning_rate": 1.996448614941764e-05, "loss": 1.2169, "step": 381 }, { "epoch": 22.470588235294116, "grad_norm": 8.65866470336914, "learning_rate": 1.9964083970453916e-05, "loss": 1.0653, "step": 382 }, { "epoch": 22.529411764705884, "grad_norm": 9.778243064880371, "learning_rate": 1.9963679531139513e-05, "loss": 1.1052, "step": 383 }, { "epoch": 22.58823529411765, "grad_norm": 9.050769805908203, "learning_rate": 1.996327283156618e-05, "loss": 1.2169, "step": 384 }, { "epoch": 22.647058823529413, "grad_norm": 8.876376152038574, "learning_rate": 1.9962863871826178e-05, "loss": 0.9745, "step": 385 }, { "epoch": 22.705882352941178, "grad_norm": 9.806035041809082, "learning_rate": 1.9962452652012274e-05, "loss": 1.1194, "step": 386 }, { "epoch": 22.764705882352942, "grad_norm": 10.212801933288574, "learning_rate": 1.996203917221776e-05, "loss": 1.0471, "step": 387 }, { "epoch": 22.823529411764707, "grad_norm": 10.90985107421875, "learning_rate": 1.996162343253643e-05, "loss": 1.0916, "step": 388 }, { "epoch": 22.88235294117647, "grad_norm": 10.819242477416992, "learning_rate": 1.9961205433062598e-05, "loss": 1.0284, "step": 389 }, { "epoch": 22.941176470588236, "grad_norm": 12.506490707397461, "learning_rate": 1.996078517389108e-05, "loss": 0.9978, "step": 390 }, { "epoch": 23.0, "grad_norm": 9.14217472076416, "learning_rate": 1.996036265511722e-05, "loss": 0.9127, "step": 391 }, { "epoch": 23.058823529411764, "grad_norm": 9.007110595703125, "learning_rate": 1.9959937876836858e-05, "loss": 1.1738, "step": 392 }, { "epoch": 23.11764705882353, "grad_norm": 10.070932388305664, "learning_rate": 1.9959510839146364e-05, "loss": 0.902, "step": 393 }, { "epoch": 23.176470588235293, "grad_norm": 8.211676597595215, "learning_rate": 1.9959081542142606e-05, "loss": 0.8916, "step": 394 }, { "epoch": 23.235294117647058, "grad_norm": 11.396098136901855, "learning_rate": 1.9958649985922972e-05, "loss": 1.2354, "step": 395 }, { "epoch": 23.294117647058822, "grad_norm": 9.86749267578125, "learning_rate": 1.9958216170585356e-05, "loss": 1.0033, "step": 396 }, { "epoch": 23.352941176470587, "grad_norm": 8.67390251159668, "learning_rate": 1.9957780096228173e-05, "loss": 0.8306, "step": 397 }, { "epoch": 23.41176470588235, "grad_norm": 8.445151329040527, "learning_rate": 1.9957341762950346e-05, "loss": 0.9283, "step": 398 }, { "epoch": 23.470588235294116, "grad_norm": 9.473772048950195, "learning_rate": 1.9956901170851314e-05, "loss": 0.7494, "step": 399 }, { "epoch": 23.529411764705884, "grad_norm": 9.651832580566406, "learning_rate": 1.9956458320031016e-05, "loss": 1.1848, "step": 400 }, { "epoch": 23.58823529411765, "grad_norm": 9.225592613220215, "learning_rate": 1.995601321058992e-05, "loss": 0.9349, "step": 401 }, { "epoch": 23.647058823529413, "grad_norm": 9.724512100219727, "learning_rate": 1.9955565842628996e-05, "loss": 1.0385, "step": 402 }, { "epoch": 23.705882352941178, "grad_norm": 11.14598560333252, "learning_rate": 1.9955116216249736e-05, "loss": 1.1036, "step": 403 }, { "epoch": 23.764705882352942, "grad_norm": 12.257299423217773, "learning_rate": 1.9954664331554126e-05, "loss": 0.9392, "step": 404 }, { "epoch": 23.823529411764707, "grad_norm": 8.725815773010254, "learning_rate": 1.9954210188644687e-05, "loss": 1.3721, "step": 405 }, { "epoch": 23.88235294117647, "grad_norm": 8.42628002166748, "learning_rate": 1.995375378762443e-05, "loss": 0.8777, "step": 406 }, { "epoch": 23.941176470588236, "grad_norm": 8.630249977111816, "learning_rate": 1.9953295128596905e-05, "loss": 0.8802, "step": 407 }, { "epoch": 24.0, "grad_norm": 8.559574127197266, "learning_rate": 1.995283421166614e-05, "loss": 1.0759, "step": 408 }, { "epoch": 24.058823529411764, "grad_norm": 9.38712215423584, "learning_rate": 1.995237103693671e-05, "loss": 0.98, "step": 409 }, { "epoch": 24.11764705882353, "grad_norm": 9.595390319824219, "learning_rate": 1.9951905604513677e-05, "loss": 0.8521, "step": 410 }, { "epoch": 24.176470588235293, "grad_norm": 8.509419441223145, "learning_rate": 1.995143791450263e-05, "loss": 0.7998, "step": 411 }, { "epoch": 24.235294117647058, "grad_norm": 10.477494239807129, "learning_rate": 1.9950967967009657e-05, "loss": 0.7798, "step": 412 }, { "epoch": 24.294117647058822, "grad_norm": 10.087718963623047, "learning_rate": 1.9950495762141372e-05, "loss": 0.9608, "step": 413 }, { "epoch": 24.352941176470587, "grad_norm": 8.06483268737793, "learning_rate": 1.9950021300004895e-05, "loss": 0.967, "step": 414 }, { "epoch": 24.41176470588235, "grad_norm": 9.75458812713623, "learning_rate": 1.994954458070785e-05, "loss": 0.9332, "step": 415 }, { "epoch": 24.470588235294116, "grad_norm": 10.130102157592773, "learning_rate": 1.994906560435839e-05, "loss": 0.9685, "step": 416 }, { "epoch": 24.529411764705884, "grad_norm": 9.894362449645996, "learning_rate": 1.9948584371065165e-05, "loss": 0.9434, "step": 417 }, { "epoch": 24.58823529411765, "grad_norm": 12.085103034973145, "learning_rate": 1.9948100880937345e-05, "loss": 0.9722, "step": 418 }, { "epoch": 24.647058823529413, "grad_norm": 9.014267921447754, "learning_rate": 1.9947615134084607e-05, "loss": 1.0511, "step": 419 }, { "epoch": 24.705882352941178, "grad_norm": 9.02726936340332, "learning_rate": 1.9947127130617144e-05, "loss": 1.0548, "step": 420 }, { "epoch": 24.764705882352942, "grad_norm": 9.800994873046875, "learning_rate": 1.9946636870645663e-05, "loss": 1.0779, "step": 421 }, { "epoch": 24.823529411764707, "grad_norm": 9.839630126953125, "learning_rate": 1.9946144354281375e-05, "loss": 0.851, "step": 422 }, { "epoch": 24.88235294117647, "grad_norm": 11.427772521972656, "learning_rate": 1.9945649581636005e-05, "loss": 1.0225, "step": 423 }, { "epoch": 24.941176470588236, "grad_norm": 10.385010719299316, "learning_rate": 1.99451525528218e-05, "loss": 0.842, "step": 424 }, { "epoch": 25.0, "grad_norm": 9.20156478881836, "learning_rate": 1.9944653267951507e-05, "loss": 1.0435, "step": 425 }, { "epoch": 25.058823529411764, "grad_norm": 8.465832710266113, "learning_rate": 1.9944151727138385e-05, "loss": 0.8011, "step": 426 }, { "epoch": 25.11764705882353, "grad_norm": 8.551667213439941, "learning_rate": 1.994364793049621e-05, "loss": 0.7866, "step": 427 }, { "epoch": 25.176470588235293, "grad_norm": 7.380870342254639, "learning_rate": 1.9943141878139274e-05, "loss": 0.9948, "step": 428 }, { "epoch": 25.235294117647058, "grad_norm": 8.842757225036621, "learning_rate": 1.994263357018237e-05, "loss": 0.8337, "step": 429 }, { "epoch": 25.294117647058822, "grad_norm": 8.249530792236328, "learning_rate": 1.994212300674081e-05, "loss": 0.8853, "step": 430 }, { "epoch": 25.352941176470587, "grad_norm": 8.749493598937988, "learning_rate": 1.994161018793041e-05, "loss": 0.9282, "step": 431 }, { "epoch": 25.41176470588235, "grad_norm": 9.60055923461914, "learning_rate": 1.9941095113867507e-05, "loss": 0.8837, "step": 432 }, { "epoch": 25.470588235294116, "grad_norm": 8.89350414276123, "learning_rate": 1.9940577784668948e-05, "loss": 0.8476, "step": 433 }, { "epoch": 25.529411764705884, "grad_norm": 10.504905700683594, "learning_rate": 1.9940058200452083e-05, "loss": 0.8626, "step": 434 }, { "epoch": 25.58823529411765, "grad_norm": 8.1026029586792, "learning_rate": 1.9939536361334785e-05, "loss": 0.817, "step": 435 }, { "epoch": 25.647058823529413, "grad_norm": 8.504571914672852, "learning_rate": 1.9939012267435426e-05, "loss": 0.8915, "step": 436 }, { "epoch": 25.705882352941178, "grad_norm": 8.535932540893555, "learning_rate": 1.9938485918872903e-05, "loss": 0.8114, "step": 437 }, { "epoch": 25.764705882352942, "grad_norm": 9.950298309326172, "learning_rate": 1.993795731576662e-05, "loss": 0.8104, "step": 438 }, { "epoch": 25.823529411764707, "grad_norm": 10.134858131408691, "learning_rate": 1.993742645823648e-05, "loss": 1.0039, "step": 439 }, { "epoch": 25.88235294117647, "grad_norm": 9.84769344329834, "learning_rate": 1.993689334640292e-05, "loss": 0.8204, "step": 440 }, { "epoch": 25.941176470588236, "grad_norm": 8.630704879760742, "learning_rate": 1.993635798038687e-05, "loss": 0.9629, "step": 441 }, { "epoch": 26.0, "grad_norm": 10.74541187286377, "learning_rate": 1.993582036030978e-05, "loss": 0.8712, "step": 442 }, { "epoch": 26.058823529411764, "grad_norm": 10.440545082092285, "learning_rate": 1.9935280486293603e-05, "loss": 1.0154, "step": 443 }, { "epoch": 26.11764705882353, "grad_norm": 9.343551635742188, "learning_rate": 1.9934738358460818e-05, "loss": 0.8425, "step": 444 }, { "epoch": 26.176470588235293, "grad_norm": 7.248894214630127, "learning_rate": 1.99341939769344e-05, "loss": 0.7526, "step": 445 }, { "epoch": 26.235294117647058, "grad_norm": 8.376642227172852, "learning_rate": 1.9933647341837846e-05, "loss": 0.7814, "step": 446 }, { "epoch": 26.294117647058822, "grad_norm": 10.648490905761719, "learning_rate": 1.9933098453295162e-05, "loss": 0.9952, "step": 447 }, { "epoch": 26.352941176470587, "grad_norm": 8.688141822814941, "learning_rate": 1.9932547311430858e-05, "loss": 0.7867, "step": 448 }, { "epoch": 26.41176470588235, "grad_norm": 8.420398712158203, "learning_rate": 1.993199391636996e-05, "loss": 0.635, "step": 449 }, { "epoch": 26.470588235294116, "grad_norm": 7.840493202209473, "learning_rate": 1.9931438268238005e-05, "loss": 0.6466, "step": 450 }, { "epoch": 26.529411764705884, "grad_norm": 8.49230670928955, "learning_rate": 1.9930880367161052e-05, "loss": 0.7792, "step": 451 }, { "epoch": 26.58823529411765, "grad_norm": 12.778702735900879, "learning_rate": 1.993032021326565e-05, "loss": 0.8466, "step": 452 }, { "epoch": 26.647058823529413, "grad_norm": 8.074623107910156, "learning_rate": 1.992975780667887e-05, "loss": 0.7574, "step": 453 }, { "epoch": 26.705882352941178, "grad_norm": 10.056559562683105, "learning_rate": 1.99291931475283e-05, "loss": 0.6577, "step": 454 }, { "epoch": 26.764705882352942, "grad_norm": 7.490635871887207, "learning_rate": 1.992862623594203e-05, "loss": 0.7845, "step": 455 }, { "epoch": 26.823529411764707, "grad_norm": 9.720763206481934, "learning_rate": 1.9928057072048662e-05, "loss": 0.9535, "step": 456 }, { "epoch": 26.88235294117647, "grad_norm": 8.183370590209961, "learning_rate": 1.9927485655977318e-05, "loss": 0.7652, "step": 457 }, { "epoch": 26.941176470588236, "grad_norm": 8.541788101196289, "learning_rate": 1.992691198785761e-05, "loss": 0.8101, "step": 458 }, { "epoch": 27.0, "grad_norm": 9.022465705871582, "learning_rate": 1.9926336067819686e-05, "loss": 0.9492, "step": 459 }, { "epoch": 27.058823529411764, "grad_norm": 7.792491912841797, "learning_rate": 1.992575789599419e-05, "loss": 0.8368, "step": 460 }, { "epoch": 27.11764705882353, "grad_norm": 9.212955474853516, "learning_rate": 1.9925177472512282e-05, "loss": 0.6404, "step": 461 }, { "epoch": 27.176470588235293, "grad_norm": 7.8712382316589355, "learning_rate": 1.9924594797505622e-05, "loss": 0.8965, "step": 462 }, { "epoch": 27.235294117647058, "grad_norm": 7.67419958114624, "learning_rate": 1.9924009871106405e-05, "loss": 0.5638, "step": 463 }, { "epoch": 27.294117647058822, "grad_norm": 8.989701271057129, "learning_rate": 1.9923422693447307e-05, "loss": 0.8002, "step": 464 }, { "epoch": 27.352941176470587, "grad_norm": 9.678376197814941, "learning_rate": 1.9922833264661538e-05, "loss": 0.808, "step": 465 }, { "epoch": 27.41176470588235, "grad_norm": 6.8461127281188965, "learning_rate": 1.9922241584882805e-05, "loss": 0.8602, "step": 466 }, { "epoch": 27.470588235294116, "grad_norm": 8.264897346496582, "learning_rate": 1.9921647654245336e-05, "loss": 0.8262, "step": 467 }, { "epoch": 27.529411764705884, "grad_norm": 7.637362957000732, "learning_rate": 1.992105147288386e-05, "loss": 0.6861, "step": 468 }, { "epoch": 27.58823529411765, "grad_norm": 10.429255485534668, "learning_rate": 1.9920453040933618e-05, "loss": 0.9442, "step": 469 }, { "epoch": 27.647058823529413, "grad_norm": 9.251116752624512, "learning_rate": 1.991985235853037e-05, "loss": 0.7321, "step": 470 }, { "epoch": 27.705882352941178, "grad_norm": 6.5098981857299805, "learning_rate": 1.991924942581038e-05, "loss": 0.6644, "step": 471 }, { "epoch": 27.764705882352942, "grad_norm": 7.670507907867432, "learning_rate": 1.9918644242910418e-05, "loss": 0.9241, "step": 472 }, { "epoch": 27.823529411764707, "grad_norm": 8.245807647705078, "learning_rate": 1.9918036809967775e-05, "loss": 0.7057, "step": 473 }, { "epoch": 27.88235294117647, "grad_norm": 6.92917013168335, "learning_rate": 1.9917427127120242e-05, "loss": 0.6173, "step": 474 }, { "epoch": 27.941176470588236, "grad_norm": 8.565613746643066, "learning_rate": 1.9916815194506127e-05, "loss": 0.6385, "step": 475 }, { "epoch": 28.0, "grad_norm": 7.520271301269531, "learning_rate": 1.9916201012264255e-05, "loss": 0.5112, "step": 476 }, { "epoch": 28.058823529411764, "grad_norm": 6.489360809326172, "learning_rate": 1.9915584580533938e-05, "loss": 0.5986, "step": 477 }, { "epoch": 28.11764705882353, "grad_norm": 7.243619441986084, "learning_rate": 1.991496589945503e-05, "loss": 0.6538, "step": 478 }, { "epoch": 28.176470588235293, "grad_norm": 9.745566368103027, "learning_rate": 1.9914344969167865e-05, "loss": 0.6912, "step": 479 }, { "epoch": 28.235294117647058, "grad_norm": 8.437952041625977, "learning_rate": 1.991372178981331e-05, "loss": 0.6791, "step": 480 }, { "epoch": 28.294117647058822, "grad_norm": 8.32884407043457, "learning_rate": 1.9913096361532727e-05, "loss": 0.7352, "step": 481 }, { "epoch": 28.352941176470587, "grad_norm": 7.927309989929199, "learning_rate": 1.9912468684467997e-05, "loss": 0.5862, "step": 482 }, { "epoch": 28.41176470588235, "grad_norm": 8.880215644836426, "learning_rate": 1.9911838758761508e-05, "loss": 0.7539, "step": 483 }, { "epoch": 28.470588235294116, "grad_norm": 8.67226505279541, "learning_rate": 1.991120658455616e-05, "loss": 0.7145, "step": 484 }, { "epoch": 28.529411764705884, "grad_norm": 6.978667259216309, "learning_rate": 1.991057216199536e-05, "loss": 0.6099, "step": 485 }, { "epoch": 28.58823529411765, "grad_norm": 7.177332878112793, "learning_rate": 1.990993549122303e-05, "loss": 0.7916, "step": 486 }, { "epoch": 28.647058823529413, "grad_norm": 7.401503562927246, "learning_rate": 1.9909296572383593e-05, "loss": 0.9043, "step": 487 }, { "epoch": 28.705882352941178, "grad_norm": 7.431807518005371, "learning_rate": 1.990865540562199e-05, "loss": 0.7433, "step": 488 }, { "epoch": 28.764705882352942, "grad_norm": 10.64960765838623, "learning_rate": 1.990801199108367e-05, "loss": 0.599, "step": 489 }, { "epoch": 28.823529411764707, "grad_norm": 8.302475929260254, "learning_rate": 1.990736632891459e-05, "loss": 0.6572, "step": 490 }, { "epoch": 28.88235294117647, "grad_norm": 7.8737287521362305, "learning_rate": 1.990671841926122e-05, "loss": 0.5441, "step": 491 }, { "epoch": 28.941176470588236, "grad_norm": 9.041290283203125, "learning_rate": 1.990606826227054e-05, "loss": 0.7266, "step": 492 }, { "epoch": 29.0, "grad_norm": 7.909440994262695, "learning_rate": 1.9905415858090036e-05, "loss": 0.601, "step": 493 }, { "epoch": 29.058823529411764, "grad_norm": 7.882006645202637, "learning_rate": 1.9904761206867702e-05, "loss": 0.6471, "step": 494 }, { "epoch": 29.11764705882353, "grad_norm": 6.559683799743652, "learning_rate": 1.9904104308752053e-05, "loss": 0.7682, "step": 495 }, { "epoch": 29.176470588235293, "grad_norm": 7.539209842681885, "learning_rate": 1.99034451638921e-05, "loss": 0.7015, "step": 496 }, { "epoch": 29.235294117647058, "grad_norm": 9.794404029846191, "learning_rate": 1.9902783772437377e-05, "loss": 0.8175, "step": 497 }, { "epoch": 29.294117647058822, "grad_norm": 8.505248069763184, "learning_rate": 1.990212013453791e-05, "loss": 0.7873, "step": 498 }, { "epoch": 29.352941176470587, "grad_norm": 7.9317708015441895, "learning_rate": 1.9901454250344253e-05, "loss": 0.8162, "step": 499 }, { "epoch": 29.41176470588235, "grad_norm": 7.80979061126709, "learning_rate": 1.990078612000746e-05, "loss": 0.4734, "step": 500 }, { "epoch": 29.470588235294116, "grad_norm": 7.579810619354248, "learning_rate": 1.9900115743679095e-05, "loss": 0.4464, "step": 501 }, { "epoch": 29.529411764705884, "grad_norm": 8.921381950378418, "learning_rate": 1.9899443121511236e-05, "loss": 0.594, "step": 502 }, { "epoch": 29.58823529411765, "grad_norm": 6.904248237609863, "learning_rate": 1.9898768253656463e-05, "loss": 0.6729, "step": 503 }, { "epoch": 29.647058823529413, "grad_norm": 7.601705074310303, "learning_rate": 1.989809114026787e-05, "loss": 0.643, "step": 504 }, { "epoch": 29.705882352941178, "grad_norm": 7.452754020690918, "learning_rate": 1.9897411781499066e-05, "loss": 0.5947, "step": 505 }, { "epoch": 29.764705882352942, "grad_norm": 8.998228073120117, "learning_rate": 1.9896730177504154e-05, "loss": 0.6686, "step": 506 }, { "epoch": 29.823529411764707, "grad_norm": 10.327400207519531, "learning_rate": 1.9896046328437767e-05, "loss": 0.6553, "step": 507 }, { "epoch": 29.88235294117647, "grad_norm": 7.590938568115234, "learning_rate": 1.9895360234455023e-05, "loss": 0.4666, "step": 508 }, { "epoch": 29.941176470588236, "grad_norm": 7.3427414894104, "learning_rate": 1.9894671895711573e-05, "loss": 0.6658, "step": 509 }, { "epoch": 30.0, "grad_norm": 7.877609729766846, "learning_rate": 1.9893981312363563e-05, "loss": 0.7198, "step": 510 }, { "epoch": 30.058823529411764, "grad_norm": 7.707434177398682, "learning_rate": 1.989328848456765e-05, "loss": 0.4323, "step": 511 }, { "epoch": 30.11764705882353, "grad_norm": 7.576554298400879, "learning_rate": 1.9892593412481004e-05, "loss": 0.5953, "step": 512 }, { "epoch": 30.176470588235293, "grad_norm": 7.765503406524658, "learning_rate": 1.9891896096261297e-05, "loss": 0.5317, "step": 513 }, { "epoch": 30.235294117647058, "grad_norm": 8.552412033081055, "learning_rate": 1.9891196536066725e-05, "loss": 0.6091, "step": 514 }, { "epoch": 30.294117647058822, "grad_norm": 6.832590579986572, "learning_rate": 1.9890494732055972e-05, "loss": 0.5402, "step": 515 }, { "epoch": 30.352941176470587, "grad_norm": 7.945550918579102, "learning_rate": 1.9889790684388252e-05, "loss": 0.6334, "step": 516 }, { "epoch": 30.41176470588235, "grad_norm": 7.466334819793701, "learning_rate": 1.9889084393223274e-05, "loss": 0.4444, "step": 517 }, { "epoch": 30.470588235294116, "grad_norm": 6.485209941864014, "learning_rate": 1.9888375858721256e-05, "loss": 0.7241, "step": 518 }, { "epoch": 30.529411764705884, "grad_norm": 8.160638809204102, "learning_rate": 1.9887665081042934e-05, "loss": 0.7956, "step": 519 }, { "epoch": 30.58823529411765, "grad_norm": 7.055693626403809, "learning_rate": 1.9886952060349546e-05, "loss": 0.6833, "step": 520 }, { "epoch": 30.647058823529413, "grad_norm": 8.830961227416992, "learning_rate": 1.988623679680284e-05, "loss": 0.6439, "step": 521 }, { "epoch": 30.705882352941178, "grad_norm": 7.474220275878906, "learning_rate": 1.9885519290565078e-05, "loss": 0.6745, "step": 522 }, { "epoch": 30.764705882352942, "grad_norm": 7.574878692626953, "learning_rate": 1.9884799541799018e-05, "loss": 0.733, "step": 523 }, { "epoch": 30.823529411764707, "grad_norm": 6.732949256896973, "learning_rate": 1.9884077550667942e-05, "loss": 0.5056, "step": 524 }, { "epoch": 30.88235294117647, "grad_norm": 8.1598539352417, "learning_rate": 1.9883353317335633e-05, "loss": 0.5934, "step": 525 }, { "epoch": 30.941176470588236, "grad_norm": 7.737171649932861, "learning_rate": 1.988262684196638e-05, "loss": 0.5599, "step": 526 }, { "epoch": 31.0, "grad_norm": 7.946356296539307, "learning_rate": 1.988189812472498e-05, "loss": 0.6119, "step": 527 }, { "epoch": 31.058823529411764, "grad_norm": 7.558747291564941, "learning_rate": 1.988116716577675e-05, "loss": 0.4578, "step": 528 }, { "epoch": 31.11764705882353, "grad_norm": 6.331989288330078, "learning_rate": 1.9880433965287506e-05, "loss": 0.3685, "step": 529 }, { "epoch": 31.176470588235293, "grad_norm": 8.693822860717773, "learning_rate": 1.9879698523423573e-05, "loss": 0.4949, "step": 530 }, { "epoch": 31.235294117647058, "grad_norm": 7.206444263458252, "learning_rate": 1.9878960840351786e-05, "loss": 0.6959, "step": 531 }, { "epoch": 31.294117647058822, "grad_norm": 6.96695613861084, "learning_rate": 1.987822091623949e-05, "loss": 0.7399, "step": 532 }, { "epoch": 31.352941176470587, "grad_norm": 6.860937595367432, "learning_rate": 1.9877478751254538e-05, "loss": 0.5027, "step": 533 }, { "epoch": 31.41176470588235, "grad_norm": 7.1408162117004395, "learning_rate": 1.9876734345565284e-05, "loss": 0.5573, "step": 534 }, { "epoch": 31.470588235294116, "grad_norm": 7.120467662811279, "learning_rate": 1.9875987699340598e-05, "loss": 0.6655, "step": 535 }, { "epoch": 31.529411764705884, "grad_norm": 7.138544082641602, "learning_rate": 1.987523881274986e-05, "loss": 0.7514, "step": 536 }, { "epoch": 31.58823529411765, "grad_norm": 6.173851490020752, "learning_rate": 1.9874487685962953e-05, "loss": 0.455, "step": 537 }, { "epoch": 31.647058823529413, "grad_norm": 9.013117790222168, "learning_rate": 1.9873734319150273e-05, "loss": 0.5053, "step": 538 }, { "epoch": 31.705882352941178, "grad_norm": 6.969003677368164, "learning_rate": 1.9872978712482713e-05, "loss": 0.6453, "step": 539 }, { "epoch": 31.764705882352942, "grad_norm": 6.665143013000488, "learning_rate": 1.987222086613169e-05, "loss": 0.7439, "step": 540 }, { "epoch": 31.823529411764707, "grad_norm": 8.601595878601074, "learning_rate": 1.9871460780269117e-05, "loss": 0.5374, "step": 541 }, { "epoch": 31.88235294117647, "grad_norm": 7.456819534301758, "learning_rate": 1.9870698455067425e-05, "loss": 0.6487, "step": 542 }, { "epoch": 31.941176470588236, "grad_norm": 7.926211833953857, "learning_rate": 1.9869933890699543e-05, "loss": 0.4514, "step": 543 }, { "epoch": 32.0, "grad_norm": 5.993492603302002, "learning_rate": 1.9869167087338908e-05, "loss": 0.381, "step": 544 }, { "epoch": 32.05882352941177, "grad_norm": 6.764225006103516, "learning_rate": 1.9868398045159476e-05, "loss": 0.6099, "step": 545 }, { "epoch": 32.11764705882353, "grad_norm": 5.864200592041016, "learning_rate": 1.9867626764335705e-05, "loss": 0.5998, "step": 546 }, { "epoch": 32.1764705882353, "grad_norm": 6.67226505279541, "learning_rate": 1.986685324504255e-05, "loss": 0.3718, "step": 547 }, { "epoch": 32.23529411764706, "grad_norm": 7.507357597351074, "learning_rate": 1.9866077487455497e-05, "loss": 0.4778, "step": 548 }, { "epoch": 32.294117647058826, "grad_norm": 7.1864705085754395, "learning_rate": 1.986529949175052e-05, "loss": 0.6683, "step": 549 }, { "epoch": 32.35294117647059, "grad_norm": 6.539555549621582, "learning_rate": 1.9864519258104103e-05, "loss": 0.5032, "step": 550 }, { "epoch": 32.411764705882355, "grad_norm": 6.331545352935791, "learning_rate": 1.9863736786693247e-05, "loss": 0.6132, "step": 551 }, { "epoch": 32.470588235294116, "grad_norm": 7.014053821563721, "learning_rate": 1.9862952077695457e-05, "loss": 0.6029, "step": 552 }, { "epoch": 32.529411764705884, "grad_norm": 6.894479751586914, "learning_rate": 1.986216513128874e-05, "loss": 0.5703, "step": 553 }, { "epoch": 32.588235294117645, "grad_norm": 7.693381309509277, "learning_rate": 1.986137594765162e-05, "loss": 0.7638, "step": 554 }, { "epoch": 32.64705882352941, "grad_norm": 7.672065734863281, "learning_rate": 1.9860584526963114e-05, "loss": 0.4608, "step": 555 }, { "epoch": 32.705882352941174, "grad_norm": 6.956604957580566, "learning_rate": 1.9859790869402764e-05, "loss": 0.3745, "step": 556 }, { "epoch": 32.76470588235294, "grad_norm": 6.616424083709717, "learning_rate": 1.985899497515061e-05, "loss": 0.4202, "step": 557 }, { "epoch": 32.8235294117647, "grad_norm": 7.292904853820801, "learning_rate": 1.9858196844387194e-05, "loss": 0.5689, "step": 558 }, { "epoch": 32.88235294117647, "grad_norm": 6.782787799835205, "learning_rate": 1.985739647729358e-05, "loss": 0.548, "step": 559 }, { "epoch": 32.94117647058823, "grad_norm": 8.56790542602539, "learning_rate": 1.9856593874051326e-05, "loss": 0.5427, "step": 560 }, { "epoch": 33.0, "grad_norm": 6.882591724395752, "learning_rate": 1.9855789034842504e-05, "loss": 0.6161, "step": 561 }, { "epoch": 33.05882352941177, "grad_norm": 5.58071756362915, "learning_rate": 1.9854981959849693e-05, "loss": 0.4587, "step": 562 }, { "epoch": 33.11764705882353, "grad_norm": 6.271571159362793, "learning_rate": 1.9854172649255976e-05, "loss": 0.4969, "step": 563 }, { "epoch": 33.1764705882353, "grad_norm": 6.151976585388184, "learning_rate": 1.9853361103244943e-05, "loss": 0.4388, "step": 564 }, { "epoch": 33.23529411764706, "grad_norm": 6.576608657836914, "learning_rate": 1.98525473220007e-05, "loss": 0.4969, "step": 565 }, { "epoch": 33.294117647058826, "grad_norm": 6.38634729385376, "learning_rate": 1.9851731305707848e-05, "loss": 0.271, "step": 566 }, { "epoch": 33.35294117647059, "grad_norm": 6.806992053985596, "learning_rate": 1.98509130545515e-05, "loss": 0.5169, "step": 567 }, { "epoch": 33.411764705882355, "grad_norm": 6.714258670806885, "learning_rate": 1.9850092568717273e-05, "loss": 0.4074, "step": 568 }, { "epoch": 33.470588235294116, "grad_norm": 6.400191783905029, "learning_rate": 1.9849269848391302e-05, "loss": 0.56, "step": 569 }, { "epoch": 33.529411764705884, "grad_norm": 7.295366287231445, "learning_rate": 1.9848444893760216e-05, "loss": 0.5787, "step": 570 }, { "epoch": 33.588235294117645, "grad_norm": 7.145001411437988, "learning_rate": 1.9847617705011156e-05, "loss": 0.5006, "step": 571 }, { "epoch": 33.64705882352941, "grad_norm": 9.26329231262207, "learning_rate": 1.9846788282331773e-05, "loss": 0.506, "step": 572 }, { "epoch": 33.705882352941174, "grad_norm": 6.069484710693359, "learning_rate": 1.9845956625910218e-05, "loss": 0.5106, "step": 573 }, { "epoch": 33.76470588235294, "grad_norm": 6.1048078536987305, "learning_rate": 1.984512273593515e-05, "loss": 0.5914, "step": 574 }, { "epoch": 33.8235294117647, "grad_norm": 7.659140586853027, "learning_rate": 1.9844286612595743e-05, "loss": 0.4969, "step": 575 }, { "epoch": 33.88235294117647, "grad_norm": 5.78259801864624, "learning_rate": 1.984344825608167e-05, "loss": 0.6414, "step": 576 }, { "epoch": 33.94117647058823, "grad_norm": 6.124320030212402, "learning_rate": 1.9842607666583106e-05, "loss": 0.5883, "step": 577 }, { "epoch": 34.0, "grad_norm": 6.844267845153809, "learning_rate": 1.9841764844290744e-05, "loss": 0.3629, "step": 578 }, { "epoch": 34.05882352941177, "grad_norm": 7.427149772644043, "learning_rate": 1.9840919789395778e-05, "loss": 0.5623, "step": 579 }, { "epoch": 34.11764705882353, "grad_norm": 6.023584842681885, "learning_rate": 1.9840072502089906e-05, "loss": 0.4517, "step": 580 }, { "epoch": 34.1764705882353, "grad_norm": 6.7210001945495605, "learning_rate": 1.9839222982565335e-05, "loss": 0.5648, "step": 581 }, { "epoch": 34.23529411764706, "grad_norm": 6.12278938293457, "learning_rate": 1.9838371231014782e-05, "loss": 0.4501, "step": 582 }, { "epoch": 34.294117647058826, "grad_norm": 5.9851789474487305, "learning_rate": 1.9837517247631462e-05, "loss": 0.4612, "step": 583 }, { "epoch": 34.35294117647059, "grad_norm": 5.989927768707275, "learning_rate": 1.9836661032609104e-05, "loss": 0.4286, "step": 584 }, { "epoch": 34.411764705882355, "grad_norm": 6.777127742767334, "learning_rate": 1.9835802586141944e-05, "loss": 0.375, "step": 585 }, { "epoch": 34.470588235294116, "grad_norm": 5.28027868270874, "learning_rate": 1.983494190842471e-05, "loss": 0.3598, "step": 586 }, { "epoch": 34.529411764705884, "grad_norm": 6.758330345153809, "learning_rate": 1.983407899965266e-05, "loss": 0.4495, "step": 587 }, { "epoch": 34.588235294117645, "grad_norm": 7.0873122215271, "learning_rate": 1.983321386002153e-05, "loss": 0.5293, "step": 588 }, { "epoch": 34.64705882352941, "grad_norm": 7.478053092956543, "learning_rate": 1.9832346489727588e-05, "loss": 0.462, "step": 589 }, { "epoch": 34.705882352941174, "grad_norm": 6.936275005340576, "learning_rate": 1.9831476888967593e-05, "loss": 0.3752, "step": 590 }, { "epoch": 34.76470588235294, "grad_norm": 6.503251552581787, "learning_rate": 1.983060505793881e-05, "loss": 0.435, "step": 591 }, { "epoch": 34.8235294117647, "grad_norm": 5.472530364990234, "learning_rate": 1.982973099683902e-05, "loss": 0.5882, "step": 592 }, { "epoch": 34.88235294117647, "grad_norm": 8.767402648925781, "learning_rate": 1.98288547058665e-05, "loss": 0.3609, "step": 593 }, { "epoch": 34.94117647058823, "grad_norm": 8.083714485168457, "learning_rate": 1.9827976185220035e-05, "loss": 0.6984, "step": 594 }, { "epoch": 35.0, "grad_norm": 7.081943035125732, "learning_rate": 1.9827095435098926e-05, "loss": 0.4383, "step": 595 }, { "epoch": 35.05882352941177, "grad_norm": 5.379721164703369, "learning_rate": 1.9826212455702963e-05, "loss": 0.3031, "step": 596 }, { "epoch": 35.11764705882353, "grad_norm": 5.8803229331970215, "learning_rate": 1.9825327247232446e-05, "loss": 0.5199, "step": 597 }, { "epoch": 35.1764705882353, "grad_norm": 5.960088729858398, "learning_rate": 1.9824439809888197e-05, "loss": 0.5349, "step": 598 }, { "epoch": 35.23529411764706, "grad_norm": 5.662611484527588, "learning_rate": 1.982355014387152e-05, "loss": 0.4999, "step": 599 }, { "epoch": 35.294117647058826, "grad_norm": 5.399256706237793, "learning_rate": 1.9822658249384237e-05, "loss": 0.3109, "step": 600 }, { "epoch": 35.35294117647059, "grad_norm": 6.408110618591309, "learning_rate": 1.982176412662868e-05, "loss": 0.4474, "step": 601 }, { "epoch": 35.411764705882355, "grad_norm": 6.265597820281982, "learning_rate": 1.9820867775807676e-05, "loss": 0.3621, "step": 602 }, { "epoch": 35.470588235294116, "grad_norm": 7.649901390075684, "learning_rate": 1.9819969197124565e-05, "loss": 0.5226, "step": 603 }, { "epoch": 35.529411764705884, "grad_norm": 6.37490701675415, "learning_rate": 1.9819068390783184e-05, "loss": 0.4442, "step": 604 }, { "epoch": 35.588235294117645, "grad_norm": 6.392105579376221, "learning_rate": 1.9818165356987887e-05, "loss": 0.4215, "step": 605 }, { "epoch": 35.64705882352941, "grad_norm": 6.5612382888793945, "learning_rate": 1.9817260095943525e-05, "loss": 0.4518, "step": 606 }, { "epoch": 35.705882352941174, "grad_norm": 5.998802185058594, "learning_rate": 1.9816352607855455e-05, "loss": 0.3069, "step": 607 }, { "epoch": 35.76470588235294, "grad_norm": 7.392580032348633, "learning_rate": 1.981544289292954e-05, "loss": 0.4238, "step": 608 }, { "epoch": 35.8235294117647, "grad_norm": 7.29630184173584, "learning_rate": 1.981453095137215e-05, "loss": 0.4816, "step": 609 }, { "epoch": 35.88235294117647, "grad_norm": 5.974979400634766, "learning_rate": 1.9813616783390158e-05, "loss": 0.4252, "step": 610 }, { "epoch": 35.94117647058823, "grad_norm": 7.240076065063477, "learning_rate": 1.9812700389190945e-05, "loss": 0.6348, "step": 611 }, { "epoch": 36.0, "grad_norm": 7.136654853820801, "learning_rate": 1.9811781768982392e-05, "loss": 0.4326, "step": 612 }, { "epoch": 36.05882352941177, "grad_norm": 7.118417263031006, "learning_rate": 1.981086092297289e-05, "loss": 0.4162, "step": 613 }, { "epoch": 36.11764705882353, "grad_norm": 7.200650691986084, "learning_rate": 1.980993785137133e-05, "loss": 0.4177, "step": 614 }, { "epoch": 36.1764705882353, "grad_norm": 6.055899143218994, "learning_rate": 1.9809012554387115e-05, "loss": 0.4032, "step": 615 }, { "epoch": 36.23529411764706, "grad_norm": 5.977092742919922, "learning_rate": 1.9808085032230146e-05, "loss": 0.4147, "step": 616 }, { "epoch": 36.294117647058826, "grad_norm": 6.758155345916748, "learning_rate": 1.9807155285110832e-05, "loss": 0.5258, "step": 617 }, { "epoch": 36.35294117647059, "grad_norm": 11.828560829162598, "learning_rate": 1.9806223313240082e-05, "loss": 0.3609, "step": 618 }, { "epoch": 36.411764705882355, "grad_norm": 8.123369216918945, "learning_rate": 1.9805289116829315e-05, "loss": 0.3362, "step": 619 }, { "epoch": 36.470588235294116, "grad_norm": 9.13419246673584, "learning_rate": 1.980435269609046e-05, "loss": 0.3996, "step": 620 }, { "epoch": 36.529411764705884, "grad_norm": 6.419029235839844, "learning_rate": 1.9803414051235938e-05, "loss": 0.4417, "step": 621 }, { "epoch": 36.588235294117645, "grad_norm": 6.7250213623046875, "learning_rate": 1.980247318247868e-05, "loss": 0.4641, "step": 622 }, { "epoch": 36.64705882352941, "grad_norm": 5.3994317054748535, "learning_rate": 1.9801530090032123e-05, "loss": 0.3131, "step": 623 }, { "epoch": 36.705882352941174, "grad_norm": 6.561947822570801, "learning_rate": 1.9800584774110208e-05, "loss": 0.4226, "step": 624 }, { "epoch": 36.76470588235294, "grad_norm": 7.568603038787842, "learning_rate": 1.9799637234927384e-05, "loss": 0.3958, "step": 625 }, { "epoch": 36.8235294117647, "grad_norm": 44.0205078125, "learning_rate": 1.979868747269859e-05, "loss": 0.427, "step": 626 }, { "epoch": 36.88235294117647, "grad_norm": 5.4676923751831055, "learning_rate": 1.9797735487639288e-05, "loss": 0.4931, "step": 627 }, { "epoch": 36.94117647058823, "grad_norm": 5.912731170654297, "learning_rate": 1.979678127996543e-05, "loss": 0.6354, "step": 628 }, { "epoch": 37.0, "grad_norm": 6.309438228607178, "learning_rate": 1.9795824849893483e-05, "loss": 0.3394, "step": 629 }, { "epoch": 37.05882352941177, "grad_norm": 9.725333213806152, "learning_rate": 1.9794866197640406e-05, "loss": 0.5375, "step": 630 }, { "epoch": 37.11764705882353, "grad_norm": 6.820096969604492, "learning_rate": 1.9793905323423675e-05, "loss": 0.4184, "step": 631 }, { "epoch": 37.1764705882353, "grad_norm": 8.483501434326172, "learning_rate": 1.9792942227461265e-05, "loss": 0.5714, "step": 632 }, { "epoch": 37.23529411764706, "grad_norm": 7.295032978057861, "learning_rate": 1.979197690997165e-05, "loss": 0.4074, "step": 633 }, { "epoch": 37.294117647058826, "grad_norm": 6.712701320648193, "learning_rate": 1.9791009371173817e-05, "loss": 0.5455, "step": 634 }, { "epoch": 37.35294117647059, "grad_norm": 8.961592674255371, "learning_rate": 1.9790039611287248e-05, "loss": 0.4317, "step": 635 }, { "epoch": 37.411764705882355, "grad_norm": 5.972690105438232, "learning_rate": 1.9789067630531932e-05, "loss": 0.3223, "step": 636 }, { "epoch": 37.470588235294116, "grad_norm": 6.982143402099609, "learning_rate": 1.9788093429128364e-05, "loss": 0.3684, "step": 637 }, { "epoch": 37.529411764705884, "grad_norm": 7.315681457519531, "learning_rate": 1.978711700729754e-05, "loss": 0.3355, "step": 638 }, { "epoch": 37.588235294117645, "grad_norm": 4.6428422927856445, "learning_rate": 1.9786138365260967e-05, "loss": 0.2018, "step": 639 }, { "epoch": 37.64705882352941, "grad_norm": 5.681668281555176, "learning_rate": 1.9785157503240645e-05, "loss": 0.338, "step": 640 }, { "epoch": 37.705882352941174, "grad_norm": 7.907359600067139, "learning_rate": 1.9784174421459083e-05, "loss": 0.3814, "step": 641 }, { "epoch": 37.76470588235294, "grad_norm": 6.451786041259766, "learning_rate": 1.978318912013929e-05, "loss": 0.351, "step": 642 }, { "epoch": 37.8235294117647, "grad_norm": 7.725088596343994, "learning_rate": 1.9782201599504788e-05, "loss": 0.3915, "step": 643 }, { "epoch": 37.88235294117647, "grad_norm": 5.743388652801514, "learning_rate": 1.9781211859779593e-05, "loss": 0.4593, "step": 644 }, { "epoch": 37.94117647058823, "grad_norm": 6.925566673278809, "learning_rate": 1.9780219901188222e-05, "loss": 0.3639, "step": 645 }, { "epoch": 38.0, "grad_norm": 7.393144130706787, "learning_rate": 1.977922572395571e-05, "loss": 0.6194, "step": 646 }, { "epoch": 38.05882352941177, "grad_norm": 6.159358501434326, "learning_rate": 1.9778229328307577e-05, "loss": 0.278, "step": 647 }, { "epoch": 38.11764705882353, "grad_norm": 8.319305419921875, "learning_rate": 1.977723071446986e-05, "loss": 0.4198, "step": 648 }, { "epoch": 38.1764705882353, "grad_norm": 6.384435176849365, "learning_rate": 1.9776229882669095e-05, "loss": 0.3905, "step": 649 }, { "epoch": 38.23529411764706, "grad_norm": 5.490656852722168, "learning_rate": 1.9775226833132318e-05, "loss": 0.4086, "step": 650 }, { "epoch": 38.294117647058826, "grad_norm": 6.4956583976745605, "learning_rate": 1.9774221566087076e-05, "loss": 0.3691, "step": 651 }, { "epoch": 38.35294117647059, "grad_norm": 6.015424728393555, "learning_rate": 1.9773214081761404e-05, "loss": 0.5046, "step": 652 }, { "epoch": 38.411764705882355, "grad_norm": 15.76922607421875, "learning_rate": 1.977220438038386e-05, "loss": 0.5834, "step": 653 }, { "epoch": 38.470588235294116, "grad_norm": 5.6438775062561035, "learning_rate": 1.9771192462183486e-05, "loss": 0.3995, "step": 654 }, { "epoch": 38.529411764705884, "grad_norm": 6.137105941772461, "learning_rate": 1.977017832738984e-05, "loss": 0.3017, "step": 655 }, { "epoch": 38.588235294117645, "grad_norm": 5.990053176879883, "learning_rate": 1.976916197623298e-05, "loss": 0.2718, "step": 656 }, { "epoch": 38.64705882352941, "grad_norm": 5.703656196594238, "learning_rate": 1.9768143408943457e-05, "loss": 0.3794, "step": 657 }, { "epoch": 38.705882352941174, "grad_norm": 7.941743850708008, "learning_rate": 1.9767122625752345e-05, "loss": 0.4844, "step": 658 }, { "epoch": 38.76470588235294, "grad_norm": 6.959040641784668, "learning_rate": 1.97660996268912e-05, "loss": 0.3944, "step": 659 }, { "epoch": 38.8235294117647, "grad_norm": 5.590529918670654, "learning_rate": 1.9765074412592087e-05, "loss": 0.3436, "step": 660 }, { "epoch": 38.88235294117647, "grad_norm": 6.473758697509766, "learning_rate": 1.9764046983087582e-05, "loss": 0.4572, "step": 661 }, { "epoch": 38.94117647058823, "grad_norm": 8.84665298461914, "learning_rate": 1.9763017338610755e-05, "loss": 0.3459, "step": 662 }, { "epoch": 39.0, "grad_norm": 6.914156436920166, "learning_rate": 1.976198547939518e-05, "loss": 0.3071, "step": 663 }, { "epoch": 39.05882352941177, "grad_norm": 5.876584529876709, "learning_rate": 1.9760951405674936e-05, "loss": 0.4439, "step": 664 }, { "epoch": 39.11764705882353, "grad_norm": 5.950438976287842, "learning_rate": 1.97599151176846e-05, "loss": 0.4364, "step": 665 }, { "epoch": 39.1764705882353, "grad_norm": 6.327906608581543, "learning_rate": 1.9758876615659256e-05, "loss": 0.3084, "step": 666 }, { "epoch": 39.23529411764706, "grad_norm": 5.011962890625, "learning_rate": 1.9757835899834486e-05, "loss": 0.4264, "step": 667 }, { "epoch": 39.294117647058826, "grad_norm": 5.518246173858643, "learning_rate": 1.9756792970446375e-05, "loss": 0.3128, "step": 668 }, { "epoch": 39.35294117647059, "grad_norm": 6.999749660491943, "learning_rate": 1.9755747827731516e-05, "loss": 0.3397, "step": 669 }, { "epoch": 39.411764705882355, "grad_norm": 5.567922592163086, "learning_rate": 1.9754700471926994e-05, "loss": 0.3517, "step": 670 }, { "epoch": 39.470588235294116, "grad_norm": 6.207893371582031, "learning_rate": 1.9753650903270406e-05, "loss": 0.3835, "step": 671 }, { "epoch": 39.529411764705884, "grad_norm": 5.828309059143066, "learning_rate": 1.9752599121999845e-05, "loss": 0.3448, "step": 672 }, { "epoch": 39.588235294117645, "grad_norm": 5.733273029327393, "learning_rate": 1.9751545128353906e-05, "loss": 0.3796, "step": 673 }, { "epoch": 39.64705882352941, "grad_norm": 7.441806793212891, "learning_rate": 1.9750488922571688e-05, "loss": 0.384, "step": 674 }, { "epoch": 39.705882352941174, "grad_norm": 5.647262096405029, "learning_rate": 1.9749430504892795e-05, "loss": 0.3941, "step": 675 }, { "epoch": 39.76470588235294, "grad_norm": 6.076281547546387, "learning_rate": 1.974836987555732e-05, "loss": 0.315, "step": 676 }, { "epoch": 39.8235294117647, "grad_norm": 5.684563159942627, "learning_rate": 1.9747307034805877e-05, "loss": 0.3779, "step": 677 }, { "epoch": 39.88235294117647, "grad_norm": 5.20861291885376, "learning_rate": 1.9746241982879562e-05, "loss": 0.4354, "step": 678 }, { "epoch": 39.94117647058823, "grad_norm": 6.435985088348389, "learning_rate": 1.9745174720019992e-05, "loss": 0.4052, "step": 679 }, { "epoch": 40.0, "grad_norm": 5.535801887512207, "learning_rate": 1.9744105246469264e-05, "loss": 0.3622, "step": 680 }, { "epoch": 40.05882352941177, "grad_norm": 5.5403265953063965, "learning_rate": 1.974303356246999e-05, "loss": 0.2997, "step": 681 }, { "epoch": 40.11764705882353, "grad_norm": 6.644796371459961, "learning_rate": 1.974195966826529e-05, "loss": 0.2674, "step": 682 }, { "epoch": 40.1764705882353, "grad_norm": 5.4411444664001465, "learning_rate": 1.974088356409877e-05, "loss": 0.5295, "step": 683 }, { "epoch": 40.23529411764706, "grad_norm": 5.739968776702881, "learning_rate": 1.9739805250214546e-05, "loss": 0.3699, "step": 684 }, { "epoch": 40.294117647058826, "grad_norm": 5.203281879425049, "learning_rate": 1.973872472685723e-05, "loss": 0.3446, "step": 685 }, { "epoch": 40.35294117647059, "grad_norm": 9.264540672302246, "learning_rate": 1.9737641994271947e-05, "loss": 0.2492, "step": 686 }, { "epoch": 40.411764705882355, "grad_norm": 6.769572734832764, "learning_rate": 1.9736557052704303e-05, "loss": 0.4929, "step": 687 }, { "epoch": 40.470588235294116, "grad_norm": 6.158898830413818, "learning_rate": 1.9735469902400424e-05, "loss": 0.3234, "step": 688 }, { "epoch": 40.529411764705884, "grad_norm": 4.837128639221191, "learning_rate": 1.9734380543606932e-05, "loss": 0.3758, "step": 689 }, { "epoch": 40.588235294117645, "grad_norm": 6.951872825622559, "learning_rate": 1.973328897657094e-05, "loss": 0.2433, "step": 690 }, { "epoch": 40.64705882352941, "grad_norm": 5.233768939971924, "learning_rate": 1.9732195201540075e-05, "loss": 0.2227, "step": 691 }, { "epoch": 40.705882352941174, "grad_norm": 5.2865495681762695, "learning_rate": 1.973109921876246e-05, "loss": 0.3152, "step": 692 }, { "epoch": 40.76470588235294, "grad_norm": 5.124616622924805, "learning_rate": 1.9730001028486716e-05, "loss": 0.2346, "step": 693 }, { "epoch": 40.8235294117647, "grad_norm": 5.021505832672119, "learning_rate": 1.9728900630961968e-05, "loss": 0.3056, "step": 694 }, { "epoch": 40.88235294117647, "grad_norm": 5.886190891265869, "learning_rate": 1.9727798026437845e-05, "loss": 0.3852, "step": 695 }, { "epoch": 40.94117647058823, "grad_norm": 5.977292060852051, "learning_rate": 1.9726693215164464e-05, "loss": 0.3169, "step": 696 }, { "epoch": 41.0, "grad_norm": 5.533013820648193, "learning_rate": 1.972558619739246e-05, "loss": 0.3204, "step": 697 }, { "epoch": 41.05882352941177, "grad_norm": 5.394664764404297, "learning_rate": 1.9724476973372958e-05, "loss": 0.3922, "step": 698 }, { "epoch": 41.11764705882353, "grad_norm": 4.889113426208496, "learning_rate": 1.972336554335758e-05, "loss": 0.2962, "step": 699 }, { "epoch": 41.1764705882353, "grad_norm": 5.084755897521973, "learning_rate": 1.9722251907598458e-05, "loss": 0.3918, "step": 700 }, { "epoch": 41.23529411764706, "grad_norm": 8.564302444458008, "learning_rate": 1.9721136066348218e-05, "loss": 0.2633, "step": 701 }, { "epoch": 41.294117647058826, "grad_norm": 7.285110950469971, "learning_rate": 1.9720018019859992e-05, "loss": 0.3326, "step": 702 }, { "epoch": 41.35294117647059, "grad_norm": 5.235429763793945, "learning_rate": 1.9718897768387406e-05, "loss": 0.388, "step": 703 }, { "epoch": 41.411764705882355, "grad_norm": 5.335225582122803, "learning_rate": 1.9717775312184592e-05, "loss": 0.233, "step": 704 }, { "epoch": 41.470588235294116, "grad_norm": 5.148421764373779, "learning_rate": 1.971665065150617e-05, "loss": 0.2935, "step": 705 }, { "epoch": 41.529411764705884, "grad_norm": 6.040323734283447, "learning_rate": 1.9715523786607278e-05, "loss": 0.3781, "step": 706 }, { "epoch": 41.588235294117645, "grad_norm": 4.980144500732422, "learning_rate": 1.971439471774354e-05, "loss": 0.2648, "step": 707 }, { "epoch": 41.64705882352941, "grad_norm": 4.739278793334961, "learning_rate": 1.971326344517109e-05, "loss": 0.3055, "step": 708 }, { "epoch": 41.705882352941174, "grad_norm": 5.467220306396484, "learning_rate": 1.9712129969146556e-05, "loss": 0.4496, "step": 709 }, { "epoch": 41.76470588235294, "grad_norm": 4.402164936065674, "learning_rate": 1.9710994289927064e-05, "loss": 0.272, "step": 710 }, { "epoch": 41.8235294117647, "grad_norm": 6.740249156951904, "learning_rate": 1.970985640777024e-05, "loss": 0.2977, "step": 711 }, { "epoch": 41.88235294117647, "grad_norm": 5.422008037567139, "learning_rate": 1.9708716322934222e-05, "loss": 0.3754, "step": 712 }, { "epoch": 41.94117647058823, "grad_norm": 4.6660895347595215, "learning_rate": 1.970757403567763e-05, "loss": 0.2731, "step": 713 }, { "epoch": 42.0, "grad_norm": 4.064938068389893, "learning_rate": 1.9706429546259592e-05, "loss": 0.2515, "step": 714 }, { "epoch": 42.05882352941177, "grad_norm": 5.851126194000244, "learning_rate": 1.970528285493974e-05, "loss": 0.4198, "step": 715 }, { "epoch": 42.11764705882353, "grad_norm": 5.669169902801514, "learning_rate": 1.9704133961978195e-05, "loss": 0.203, "step": 716 }, { "epoch": 42.1764705882353, "grad_norm": 5.353869915008545, "learning_rate": 1.970298286763559e-05, "loss": 0.2621, "step": 717 }, { "epoch": 42.23529411764706, "grad_norm": 4.394347667694092, "learning_rate": 1.970182957217304e-05, "loss": 0.2328, "step": 718 }, { "epoch": 42.294117647058826, "grad_norm": 5.184201717376709, "learning_rate": 1.9700674075852185e-05, "loss": 0.4052, "step": 719 }, { "epoch": 42.35294117647059, "grad_norm": 5.526648044586182, "learning_rate": 1.9699516378935134e-05, "loss": 0.4065, "step": 720 }, { "epoch": 42.411764705882355, "grad_norm": 4.162188529968262, "learning_rate": 1.969835648168452e-05, "loss": 0.3036, "step": 721 }, { "epoch": 42.470588235294116, "grad_norm": 5.272726535797119, "learning_rate": 1.9697194384363462e-05, "loss": 0.3288, "step": 722 }, { "epoch": 42.529411764705884, "grad_norm": 5.438912391662598, "learning_rate": 1.969603008723558e-05, "loss": 0.3004, "step": 723 }, { "epoch": 42.588235294117645, "grad_norm": 4.908586025238037, "learning_rate": 1.9694863590564997e-05, "loss": 0.2526, "step": 724 }, { "epoch": 42.64705882352941, "grad_norm": 6.337149620056152, "learning_rate": 1.9693694894616332e-05, "loss": 0.4484, "step": 725 }, { "epoch": 42.705882352941174, "grad_norm": 4.747602939605713, "learning_rate": 1.9692523999654706e-05, "loss": 0.3799, "step": 726 }, { "epoch": 42.76470588235294, "grad_norm": 4.65410041809082, "learning_rate": 1.969135090594573e-05, "loss": 0.2273, "step": 727 }, { "epoch": 42.8235294117647, "grad_norm": 5.338381767272949, "learning_rate": 1.969017561375553e-05, "loss": 0.2808, "step": 728 }, { "epoch": 42.88235294117647, "grad_norm": 5.2972564697265625, "learning_rate": 1.968899812335071e-05, "loss": 0.2628, "step": 729 }, { "epoch": 42.94117647058823, "grad_norm": 5.208472728729248, "learning_rate": 1.968781843499839e-05, "loss": 0.208, "step": 730 }, { "epoch": 43.0, "grad_norm": 4.62021541595459, "learning_rate": 1.9686636548966177e-05, "loss": 0.3437, "step": 731 }, { "epoch": 43.05882352941177, "grad_norm": 4.755136489868164, "learning_rate": 1.968545246552219e-05, "loss": 0.2444, "step": 732 }, { "epoch": 43.11764705882353, "grad_norm": 4.990755081176758, "learning_rate": 1.9684266184935032e-05, "loss": 0.2595, "step": 733 }, { "epoch": 43.1764705882353, "grad_norm": 4.518778324127197, "learning_rate": 1.968307770747381e-05, "loss": 0.2525, "step": 734 }, { "epoch": 43.23529411764706, "grad_norm": 5.532416820526123, "learning_rate": 1.9681887033408135e-05, "loss": 0.2889, "step": 735 }, { "epoch": 43.294117647058826, "grad_norm": 5.1442999839782715, "learning_rate": 1.9680694163008102e-05, "loss": 0.203, "step": 736 }, { "epoch": 43.35294117647059, "grad_norm": 4.768996715545654, "learning_rate": 1.9679499096544325e-05, "loss": 0.3602, "step": 737 }, { "epoch": 43.411764705882355, "grad_norm": 4.918734550476074, "learning_rate": 1.9678301834287894e-05, "loss": 0.3917, "step": 738 }, { "epoch": 43.470588235294116, "grad_norm": 4.865251064300537, "learning_rate": 1.9677102376510415e-05, "loss": 0.2906, "step": 739 }, { "epoch": 43.529411764705884, "grad_norm": 3.597259283065796, "learning_rate": 1.9675900723483985e-05, "loss": 0.2518, "step": 740 }, { "epoch": 43.588235294117645, "grad_norm": 7.2501540184021, "learning_rate": 1.967469687548119e-05, "loss": 0.3708, "step": 741 }, { "epoch": 43.64705882352941, "grad_norm": 5.019376754760742, "learning_rate": 1.9673490832775136e-05, "loss": 0.3613, "step": 742 }, { "epoch": 43.705882352941174, "grad_norm": 4.3606696128845215, "learning_rate": 1.96722825956394e-05, "loss": 0.2124, "step": 743 }, { "epoch": 43.76470588235294, "grad_norm": 5.188763618469238, "learning_rate": 1.967107216434808e-05, "loss": 0.3353, "step": 744 }, { "epoch": 43.8235294117647, "grad_norm": 4.412519454956055, "learning_rate": 1.966985953917576e-05, "loss": 0.2366, "step": 745 }, { "epoch": 43.88235294117647, "grad_norm": 4.771398067474365, "learning_rate": 1.966864472039752e-05, "loss": 0.3405, "step": 746 }, { "epoch": 43.94117647058823, "grad_norm": 4.580239295959473, "learning_rate": 1.9667427708288945e-05, "loss": 0.2253, "step": 747 }, { "epoch": 44.0, "grad_norm": 4.731821537017822, "learning_rate": 1.9666208503126115e-05, "loss": 0.2882, "step": 748 }, { "epoch": 44.05882352941177, "grad_norm": 5.388699054718018, "learning_rate": 1.9664987105185602e-05, "loss": 0.358, "step": 749 }, { "epoch": 44.11764705882353, "grad_norm": 4.288206577301025, "learning_rate": 1.9663763514744482e-05, "loss": 0.3152, "step": 750 }, { "epoch": 44.1764705882353, "grad_norm": 6.591547966003418, "learning_rate": 1.966253773208033e-05, "loss": 0.3517, "step": 751 }, { "epoch": 44.23529411764706, "grad_norm": 5.071956157684326, "learning_rate": 1.966130975747121e-05, "loss": 0.3119, "step": 752 }, { "epoch": 44.294117647058826, "grad_norm": 5.038777828216553, "learning_rate": 1.966007959119569e-05, "loss": 0.1833, "step": 753 }, { "epoch": 44.35294117647059, "grad_norm": 4.318075656890869, "learning_rate": 1.965884723353283e-05, "loss": 0.2914, "step": 754 }, { "epoch": 44.411764705882355, "grad_norm": 5.409462928771973, "learning_rate": 1.9657612684762193e-05, "loss": 0.1966, "step": 755 }, { "epoch": 44.470588235294116, "grad_norm": 6.185449600219727, "learning_rate": 1.9656375945163834e-05, "loss": 0.3994, "step": 756 }, { "epoch": 44.529411764705884, "grad_norm": 5.372772693634033, "learning_rate": 1.965513701501831e-05, "loss": 0.4061, "step": 757 }, { "epoch": 44.588235294117645, "grad_norm": 4.747186183929443, "learning_rate": 1.9653895894606673e-05, "loss": 0.2387, "step": 758 }, { "epoch": 44.64705882352941, "grad_norm": 5.100881576538086, "learning_rate": 1.9652652584210466e-05, "loss": 0.2594, "step": 759 }, { "epoch": 44.705882352941174, "grad_norm": 5.548305988311768, "learning_rate": 1.965140708411174e-05, "loss": 0.3054, "step": 760 }, { "epoch": 44.76470588235294, "grad_norm": 6.44931697845459, "learning_rate": 1.9650159394593027e-05, "loss": 0.3091, "step": 761 }, { "epoch": 44.8235294117647, "grad_norm": 4.750927448272705, "learning_rate": 1.964890951593738e-05, "loss": 0.2349, "step": 762 }, { "epoch": 44.88235294117647, "grad_norm": 5.766330718994141, "learning_rate": 1.9647657448428313e-05, "loss": 0.2095, "step": 763 }, { "epoch": 44.94117647058823, "grad_norm": 5.360480785369873, "learning_rate": 1.9646403192349876e-05, "loss": 0.269, "step": 764 }, { "epoch": 45.0, "grad_norm": 5.823049545288086, "learning_rate": 1.964514674798659e-05, "loss": 0.2497, "step": 765 }, { "epoch": 45.05882352941177, "grad_norm": 3.5708792209625244, "learning_rate": 1.9643888115623478e-05, "loss": 0.2284, "step": 766 }, { "epoch": 45.11764705882353, "grad_norm": 4.206412315368652, "learning_rate": 1.9642627295546063e-05, "loss": 0.3136, "step": 767 }, { "epoch": 45.1764705882353, "grad_norm": 4.208456039428711, "learning_rate": 1.964136428804036e-05, "loss": 0.2182, "step": 768 }, { "epoch": 45.23529411764706, "grad_norm": 4.894114017486572, "learning_rate": 1.964009909339288e-05, "loss": 0.2881, "step": 769 }, { "epoch": 45.294117647058826, "grad_norm": 4.914184093475342, "learning_rate": 1.9638831711890633e-05, "loss": 0.3619, "step": 770 }, { "epoch": 45.35294117647059, "grad_norm": 4.575415134429932, "learning_rate": 1.963756214382113e-05, "loss": 0.276, "step": 771 }, { "epoch": 45.411764705882355, "grad_norm": 5.610871315002441, "learning_rate": 1.963629038947237e-05, "loss": 0.3859, "step": 772 }, { "epoch": 45.470588235294116, "grad_norm": 4.476993560791016, "learning_rate": 1.9635016449132842e-05, "loss": 0.197, "step": 773 }, { "epoch": 45.529411764705884, "grad_norm": 4.395232200622559, "learning_rate": 1.9633740323091547e-05, "loss": 0.2405, "step": 774 }, { "epoch": 45.588235294117645, "grad_norm": 3.7335774898529053, "learning_rate": 1.9632462011637977e-05, "loss": 0.1511, "step": 775 }, { "epoch": 45.64705882352941, "grad_norm": 5.168849468231201, "learning_rate": 1.9631181515062108e-05, "loss": 0.3428, "step": 776 }, { "epoch": 45.705882352941174, "grad_norm": 4.064977169036865, "learning_rate": 1.9629898833654425e-05, "loss": 0.2547, "step": 777 }, { "epoch": 45.76470588235294, "grad_norm": 4.383057117462158, "learning_rate": 1.9628613967705905e-05, "loss": 0.2429, "step": 778 }, { "epoch": 45.8235294117647, "grad_norm": 5.444320201873779, "learning_rate": 1.9627326917508015e-05, "loss": 0.2734, "step": 779 }, { "epoch": 45.88235294117647, "grad_norm": 4.8192853927612305, "learning_rate": 1.962603768335273e-05, "loss": 0.3847, "step": 780 }, { "epoch": 45.94117647058823, "grad_norm": 22.47605323791504, "learning_rate": 1.9624746265532508e-05, "loss": 0.2473, "step": 781 }, { "epoch": 46.0, "grad_norm": 3.9888768196105957, "learning_rate": 1.9623452664340305e-05, "loss": 0.28, "step": 782 }, { "epoch": 46.05882352941177, "grad_norm": 4.819196701049805, "learning_rate": 1.9622156880069573e-05, "loss": 0.2274, "step": 783 }, { "epoch": 46.11764705882353, "grad_norm": 5.370273113250732, "learning_rate": 1.9620858913014268e-05, "loss": 0.1565, "step": 784 }, { "epoch": 46.1764705882353, "grad_norm": 4.4638752937316895, "learning_rate": 1.961955876346883e-05, "loss": 0.2353, "step": 785 }, { "epoch": 46.23529411764706, "grad_norm": 4.309454917907715, "learning_rate": 1.961825643172819e-05, "loss": 0.3634, "step": 786 }, { "epoch": 46.294117647058826, "grad_norm": 4.191205978393555, "learning_rate": 1.9616951918087795e-05, "loss": 0.2311, "step": 787 }, { "epoch": 46.35294117647059, "grad_norm": 8.80100154876709, "learning_rate": 1.9615645222843566e-05, "loss": 0.2473, "step": 788 }, { "epoch": 46.411764705882355, "grad_norm": 4.730323314666748, "learning_rate": 1.9614336346291926e-05, "loss": 0.2803, "step": 789 }, { "epoch": 46.470588235294116, "grad_norm": 5.767907619476318, "learning_rate": 1.9613025288729796e-05, "loss": 0.1628, "step": 790 }, { "epoch": 46.529411764705884, "grad_norm": 5.137070178985596, "learning_rate": 1.9611712050454588e-05, "loss": 0.2869, "step": 791 }, { "epoch": 46.588235294117645, "grad_norm": 5.723021984100342, "learning_rate": 1.961039663176421e-05, "loss": 0.3061, "step": 792 }, { "epoch": 46.64705882352941, "grad_norm": 4.674069881439209, "learning_rate": 1.9609079032957067e-05, "loss": 0.1741, "step": 793 }, { "epoch": 46.705882352941174, "grad_norm": 3.8607256412506104, "learning_rate": 1.960775925433205e-05, "loss": 0.2563, "step": 794 }, { "epoch": 46.76470588235294, "grad_norm": 4.929813861846924, "learning_rate": 1.9606437296188557e-05, "loss": 0.2778, "step": 795 }, { "epoch": 46.8235294117647, "grad_norm": 4.774641036987305, "learning_rate": 1.960511315882647e-05, "loss": 0.3671, "step": 796 }, { "epoch": 46.88235294117647, "grad_norm": 5.571505069732666, "learning_rate": 1.960378684254617e-05, "loss": 0.2903, "step": 797 }, { "epoch": 46.94117647058823, "grad_norm": 4.910965442657471, "learning_rate": 1.9602458347648535e-05, "loss": 0.2174, "step": 798 }, { "epoch": 47.0, "grad_norm": 5.4289093017578125, "learning_rate": 1.960112767443493e-05, "loss": 0.2286, "step": 799 }, { "epoch": 47.05882352941177, "grad_norm": 4.494365215301514, "learning_rate": 1.9599794823207217e-05, "loss": 0.2563, "step": 800 }, { "epoch": 47.11764705882353, "grad_norm": 4.149231910705566, "learning_rate": 1.9598459794267758e-05, "loss": 0.1436, "step": 801 }, { "epoch": 47.1764705882353, "grad_norm": 4.3212456703186035, "learning_rate": 1.9597122587919398e-05, "loss": 0.2291, "step": 802 }, { "epoch": 47.23529411764706, "grad_norm": 3.7840874195098877, "learning_rate": 1.9595783204465486e-05, "loss": 0.197, "step": 803 }, { "epoch": 47.294117647058826, "grad_norm": 4.591668605804443, "learning_rate": 1.9594441644209863e-05, "loss": 0.177, "step": 804 }, { "epoch": 47.35294117647059, "grad_norm": 4.767429828643799, "learning_rate": 1.9593097907456855e-05, "loss": 0.4672, "step": 805 }, { "epoch": 47.411764705882355, "grad_norm": 4.898504734039307, "learning_rate": 1.9591751994511294e-05, "loss": 0.3214, "step": 806 }, { "epoch": 47.470588235294116, "grad_norm": 5.302005290985107, "learning_rate": 1.95904039056785e-05, "loss": 0.2529, "step": 807 }, { "epoch": 47.529411764705884, "grad_norm": 5.205804347991943, "learning_rate": 1.958905364126428e-05, "loss": 0.3026, "step": 808 }, { "epoch": 47.588235294117645, "grad_norm": 4.180668354034424, "learning_rate": 1.958770120157495e-05, "loss": 0.1947, "step": 809 }, { "epoch": 47.64705882352941, "grad_norm": 4.294533729553223, "learning_rate": 1.9586346586917306e-05, "loss": 0.1994, "step": 810 }, { "epoch": 47.705882352941174, "grad_norm": 3.7973148822784424, "learning_rate": 1.9584989797598647e-05, "loss": 0.2122, "step": 811 }, { "epoch": 47.76470588235294, "grad_norm": 4.431242942810059, "learning_rate": 1.958363083392675e-05, "loss": 0.1188, "step": 812 }, { "epoch": 47.8235294117647, "grad_norm": 8.332415580749512, "learning_rate": 1.9582269696209907e-05, "loss": 0.3332, "step": 813 }, { "epoch": 47.88235294117647, "grad_norm": 6.139277935028076, "learning_rate": 1.9580906384756887e-05, "loss": 0.2303, "step": 814 }, { "epoch": 47.94117647058823, "grad_norm": 4.657686233520508, "learning_rate": 1.9579540899876955e-05, "loss": 0.1857, "step": 815 }, { "epoch": 48.0, "grad_norm": 5.035534381866455, "learning_rate": 1.957817324187987e-05, "loss": 0.3002, "step": 816 }, { "epoch": 48.05882352941177, "grad_norm": 4.51795768737793, "learning_rate": 1.9576803411075894e-05, "loss": 0.1189, "step": 817 }, { "epoch": 48.11764705882353, "grad_norm": 4.013894557952881, "learning_rate": 1.9575431407775766e-05, "loss": 0.2381, "step": 818 }, { "epoch": 48.1764705882353, "grad_norm": 3.7041118144989014, "learning_rate": 1.957405723229072e-05, "loss": 0.2182, "step": 819 }, { "epoch": 48.23529411764706, "grad_norm": 4.096446514129639, "learning_rate": 1.9572680884932496e-05, "loss": 0.199, "step": 820 }, { "epoch": 48.294117647058826, "grad_norm": 4.394288063049316, "learning_rate": 1.9571302366013317e-05, "loss": 0.2601, "step": 821 }, { "epoch": 48.35294117647059, "grad_norm": 4.337723731994629, "learning_rate": 1.95699216758459e-05, "loss": 0.143, "step": 822 }, { "epoch": 48.411764705882355, "grad_norm": 3.391415596008301, "learning_rate": 1.956853881474345e-05, "loss": 0.2714, "step": 823 }, { "epoch": 48.470588235294116, "grad_norm": 3.2390048503875732, "learning_rate": 1.956715378301967e-05, "loss": 0.2519, "step": 824 }, { "epoch": 48.529411764705884, "grad_norm": 4.721830368041992, "learning_rate": 1.9565766580988754e-05, "loss": 0.146, "step": 825 }, { "epoch": 48.588235294117645, "grad_norm": 4.5465192794799805, "learning_rate": 1.9564377208965392e-05, "loss": 0.1444, "step": 826 }, { "epoch": 48.64705882352941, "grad_norm": 3.6630971431732178, "learning_rate": 1.956298566726476e-05, "loss": 0.3375, "step": 827 }, { "epoch": 48.705882352941174, "grad_norm": 4.778774261474609, "learning_rate": 1.956159195620253e-05, "loss": 0.1715, "step": 828 }, { "epoch": 48.76470588235294, "grad_norm": 4.311033248901367, "learning_rate": 1.9560196076094864e-05, "loss": 0.4523, "step": 829 }, { "epoch": 48.8235294117647, "grad_norm": 5.546796798706055, "learning_rate": 1.9558798027258416e-05, "loss": 0.2585, "step": 830 }, { "epoch": 48.88235294117647, "grad_norm": 4.051539421081543, "learning_rate": 1.9557397810010335e-05, "loss": 0.1768, "step": 831 }, { "epoch": 48.94117647058823, "grad_norm": 7.393955230712891, "learning_rate": 1.9555995424668266e-05, "loss": 0.2769, "step": 832 }, { "epoch": 49.0, "grad_norm": 4.3384785652160645, "learning_rate": 1.955459087155033e-05, "loss": 0.1383, "step": 833 }, { "epoch": 49.05882352941177, "grad_norm": 4.361390113830566, "learning_rate": 1.955318415097515e-05, "loss": 0.2081, "step": 834 }, { "epoch": 49.11764705882353, "grad_norm": 4.742649555206299, "learning_rate": 1.955177526326185e-05, "loss": 0.3001, "step": 835 }, { "epoch": 49.1764705882353, "grad_norm": 5.003532409667969, "learning_rate": 1.9550364208730026e-05, "loss": 0.2558, "step": 836 }, { "epoch": 49.23529411764706, "grad_norm": 4.220962047576904, "learning_rate": 1.954895098769978e-05, "loss": 0.2575, "step": 837 }, { "epoch": 49.294117647058826, "grad_norm": 3.500429630279541, "learning_rate": 1.95475356004917e-05, "loss": 0.2058, "step": 838 }, { "epoch": 49.35294117647059, "grad_norm": 4.800041675567627, "learning_rate": 1.9546118047426864e-05, "loss": 0.2441, "step": 839 }, { "epoch": 49.411764705882355, "grad_norm": 3.7959470748901367, "learning_rate": 1.954469832882685e-05, "loss": 0.2181, "step": 840 }, { "epoch": 49.470588235294116, "grad_norm": 4.276721000671387, "learning_rate": 1.9543276445013717e-05, "loss": 0.1593, "step": 841 }, { "epoch": 49.529411764705884, "grad_norm": 3.565321683883667, "learning_rate": 1.9541852396310018e-05, "loss": 0.1653, "step": 842 }, { "epoch": 49.588235294117645, "grad_norm": 4.025339603424072, "learning_rate": 1.95404261830388e-05, "loss": 0.232, "step": 843 }, { "epoch": 49.64705882352941, "grad_norm": 3.1838533878326416, "learning_rate": 1.9538997805523595e-05, "loss": 0.2762, "step": 844 }, { "epoch": 49.705882352941174, "grad_norm": 4.749746799468994, "learning_rate": 1.9537567264088438e-05, "loss": 0.187, "step": 845 }, { "epoch": 49.76470588235294, "grad_norm": 3.3911590576171875, "learning_rate": 1.9536134559057843e-05, "loss": 0.1542, "step": 846 }, { "epoch": 49.8235294117647, "grad_norm": 3.6200621128082275, "learning_rate": 1.9534699690756814e-05, "loss": 0.2379, "step": 847 }, { "epoch": 49.88235294117647, "grad_norm": 4.436811923980713, "learning_rate": 1.9533262659510858e-05, "loss": 0.2714, "step": 848 }, { "epoch": 49.94117647058823, "grad_norm": 3.2609446048736572, "learning_rate": 1.9531823465645962e-05, "loss": 0.1472, "step": 849 }, { "epoch": 50.0, "grad_norm": 3.6866581439971924, "learning_rate": 1.953038210948861e-05, "loss": 0.2721, "step": 850 }, { "epoch": 50.05882352941177, "grad_norm": 3.2753918170928955, "learning_rate": 1.9528938591365772e-05, "loss": 0.2859, "step": 851 }, { "epoch": 50.11764705882353, "grad_norm": 3.9907655715942383, "learning_rate": 1.9527492911604905e-05, "loss": 0.233, "step": 852 }, { "epoch": 50.1764705882353, "grad_norm": 4.458572864532471, "learning_rate": 1.9526045070533964e-05, "loss": 0.1568, "step": 853 }, { "epoch": 50.23529411764706, "grad_norm": 4.144460201263428, "learning_rate": 1.9524595068481393e-05, "loss": 0.2855, "step": 854 }, { "epoch": 50.294117647058826, "grad_norm": 3.260004997253418, "learning_rate": 1.9523142905776127e-05, "loss": 0.1504, "step": 855 }, { "epoch": 50.35294117647059, "grad_norm": 3.0891268253326416, "learning_rate": 1.9521688582747585e-05, "loss": 0.1638, "step": 856 }, { "epoch": 50.411764705882355, "grad_norm": 3.612243175506592, "learning_rate": 1.952023209972568e-05, "loss": 0.1087, "step": 857 }, { "epoch": 50.470588235294116, "grad_norm": 3.4696872234344482, "learning_rate": 1.9518773457040818e-05, "loss": 0.1825, "step": 858 }, { "epoch": 50.529411764705884, "grad_norm": 5.158644199371338, "learning_rate": 1.9517312655023883e-05, "loss": 0.2896, "step": 859 }, { "epoch": 50.588235294117645, "grad_norm": 5.202609062194824, "learning_rate": 1.9515849694006272e-05, "loss": 0.2282, "step": 860 }, { "epoch": 50.64705882352941, "grad_norm": 4.396500110626221, "learning_rate": 1.9514384574319845e-05, "loss": 0.2179, "step": 861 }, { "epoch": 50.705882352941174, "grad_norm": 3.518514633178711, "learning_rate": 1.951291729629697e-05, "loss": 0.2061, "step": 862 }, { "epoch": 50.76470588235294, "grad_norm": 3.902836561203003, "learning_rate": 1.95114478602705e-05, "loss": 0.1546, "step": 863 }, { "epoch": 50.8235294117647, "grad_norm": 3.984537124633789, "learning_rate": 1.950997626657377e-05, "loss": 0.1534, "step": 864 }, { "epoch": 50.88235294117647, "grad_norm": 4.701574325561523, "learning_rate": 1.9508502515540617e-05, "loss": 0.3242, "step": 865 }, { "epoch": 50.94117647058823, "grad_norm": 3.7004544734954834, "learning_rate": 1.9507026607505358e-05, "loss": 0.1813, "step": 866 }, { "epoch": 51.0, "grad_norm": 3.90329909324646, "learning_rate": 1.9505548542802805e-05, "loss": 0.2488, "step": 867 }, { "epoch": 51.05882352941177, "grad_norm": 3.509725570678711, "learning_rate": 1.950406832176825e-05, "loss": 0.185, "step": 868 }, { "epoch": 51.11764705882353, "grad_norm": 4.141098499298096, "learning_rate": 1.9502585944737494e-05, "loss": 0.1659, "step": 869 }, { "epoch": 51.1764705882353, "grad_norm": 4.0475687980651855, "learning_rate": 1.95011014120468e-05, "loss": 0.2641, "step": 870 }, { "epoch": 51.23529411764706, "grad_norm": 4.303914546966553, "learning_rate": 1.949961472403294e-05, "loss": 0.3508, "step": 871 }, { "epoch": 51.294117647058826, "grad_norm": 3.9996023178100586, "learning_rate": 1.949812588103317e-05, "loss": 0.1352, "step": 872 }, { "epoch": 51.35294117647059, "grad_norm": 3.3009350299835205, "learning_rate": 1.949663488338523e-05, "loss": 0.1729, "step": 873 }, { "epoch": 51.411764705882355, "grad_norm": 4.281827926635742, "learning_rate": 1.949514173142736e-05, "loss": 0.1267, "step": 874 }, { "epoch": 51.470588235294116, "grad_norm": 4.025966167449951, "learning_rate": 1.9493646425498275e-05, "loss": 0.2085, "step": 875 }, { "epoch": 51.529411764705884, "grad_norm": 4.287881374359131, "learning_rate": 1.9492148965937187e-05, "loss": 0.3495, "step": 876 }, { "epoch": 51.588235294117645, "grad_norm": 4.491772174835205, "learning_rate": 1.9490649353083794e-05, "loss": 0.2155, "step": 877 }, { "epoch": 51.64705882352941, "grad_norm": 3.4126946926116943, "learning_rate": 1.9489147587278278e-05, "loss": 0.1413, "step": 878 }, { "epoch": 51.705882352941174, "grad_norm": 5.266879081726074, "learning_rate": 1.948764366886132e-05, "loss": 0.1693, "step": 879 }, { "epoch": 51.76470588235294, "grad_norm": 3.47599196434021, "learning_rate": 1.9486137598174085e-05, "loss": 0.1203, "step": 880 }, { "epoch": 51.8235294117647, "grad_norm": 4.081455707550049, "learning_rate": 1.9484629375558223e-05, "loss": 0.2311, "step": 881 }, { "epoch": 51.88235294117647, "grad_norm": 3.987111806869507, "learning_rate": 1.948311900135587e-05, "loss": 0.1531, "step": 882 }, { "epoch": 51.94117647058823, "grad_norm": 4.289024353027344, "learning_rate": 1.948160647590966e-05, "loss": 0.1929, "step": 883 }, { "epoch": 52.0, "grad_norm": 4.530048370361328, "learning_rate": 1.9480091799562706e-05, "loss": 0.2558, "step": 884 }, { "epoch": 52.05882352941177, "grad_norm": 3.793919324874878, "learning_rate": 1.9478574972658612e-05, "loss": 0.2131, "step": 885 }, { "epoch": 52.11764705882353, "grad_norm": 10.647246360778809, "learning_rate": 1.9477055995541473e-05, "loss": 0.1455, "step": 886 }, { "epoch": 52.1764705882353, "grad_norm": 3.5447137355804443, "learning_rate": 1.9475534868555865e-05, "loss": 0.1731, "step": 887 }, { "epoch": 52.23529411764706, "grad_norm": 4.532516956329346, "learning_rate": 1.9474011592046855e-05, "loss": 0.1906, "step": 888 }, { "epoch": 52.294117647058826, "grad_norm": 3.948963165283203, "learning_rate": 1.9472486166360004e-05, "loss": 0.2447, "step": 889 }, { "epoch": 52.35294117647059, "grad_norm": 4.310669898986816, "learning_rate": 1.9470958591841347e-05, "loss": 0.1654, "step": 890 }, { "epoch": 52.411764705882355, "grad_norm": 3.573646306991577, "learning_rate": 1.9469428868837415e-05, "loss": 0.2506, "step": 891 }, { "epoch": 52.470588235294116, "grad_norm": 8.133570671081543, "learning_rate": 1.9467896997695232e-05, "loss": 0.1774, "step": 892 }, { "epoch": 52.529411764705884, "grad_norm": 4.3726396560668945, "learning_rate": 1.9466362978762298e-05, "loss": 0.1534, "step": 893 }, { "epoch": 52.588235294117645, "grad_norm": 4.288900852203369, "learning_rate": 1.9464826812386604e-05, "loss": 0.1835, "step": 894 }, { "epoch": 52.64705882352941, "grad_norm": 5.748040676116943, "learning_rate": 1.9463288498916627e-05, "loss": 0.2881, "step": 895 }, { "epoch": 52.705882352941174, "grad_norm": 3.9548914432525635, "learning_rate": 1.9461748038701344e-05, "loss": 0.1368, "step": 896 }, { "epoch": 52.76470588235294, "grad_norm": 4.73676061630249, "learning_rate": 1.9460205432090193e-05, "loss": 0.2196, "step": 897 }, { "epoch": 52.8235294117647, "grad_norm": 3.69358491897583, "learning_rate": 1.9458660679433124e-05, "loss": 0.1585, "step": 898 }, { "epoch": 52.88235294117647, "grad_norm": 3.314542055130005, "learning_rate": 1.9457113781080563e-05, "loss": 0.1737, "step": 899 }, { "epoch": 52.94117647058823, "grad_norm": 6.481629371643066, "learning_rate": 1.9455564737383423e-05, "loss": 0.1651, "step": 900 }, { "epoch": 53.0, "grad_norm": 4.538586139678955, "learning_rate": 1.9454013548693103e-05, "loss": 0.1903, "step": 901 }, { "epoch": 53.05882352941177, "grad_norm": 3.7092478275299072, "learning_rate": 1.945246021536149e-05, "loss": 0.1185, "step": 902 }, { "epoch": 53.11764705882353, "grad_norm": 4.1238112449646, "learning_rate": 1.9450904737740955e-05, "loss": 0.2429, "step": 903 }, { "epoch": 53.1764705882353, "grad_norm": 4.488550186157227, "learning_rate": 1.9449347116184364e-05, "loss": 0.2122, "step": 904 }, { "epoch": 53.23529411764706, "grad_norm": 3.9206793308258057, "learning_rate": 1.9447787351045058e-05, "loss": 0.1207, "step": 905 }, { "epoch": 53.294117647058826, "grad_norm": 2.780393600463867, "learning_rate": 1.9446225442676873e-05, "loss": 0.0977, "step": 906 }, { "epoch": 53.35294117647059, "grad_norm": 3.185286283493042, "learning_rate": 1.9444661391434122e-05, "loss": 0.1525, "step": 907 }, { "epoch": 53.411764705882355, "grad_norm": 3.8083279132843018, "learning_rate": 1.9443095197671615e-05, "loss": 0.2629, "step": 908 }, { "epoch": 53.470588235294116, "grad_norm": 4.3490400314331055, "learning_rate": 1.9441526861744644e-05, "loss": 0.1605, "step": 909 }, { "epoch": 53.529411764705884, "grad_norm": 3.6245622634887695, "learning_rate": 1.9439956384008983e-05, "loss": 0.1724, "step": 910 }, { "epoch": 53.588235294117645, "grad_norm": 4.163418292999268, "learning_rate": 1.9438383764820892e-05, "loss": 0.2176, "step": 911 }, { "epoch": 53.64705882352941, "grad_norm": 3.608077049255371, "learning_rate": 1.943680900453712e-05, "loss": 0.1903, "step": 912 }, { "epoch": 53.705882352941174, "grad_norm": 4.20957612991333, "learning_rate": 1.9435232103514905e-05, "loss": 0.3062, "step": 913 }, { "epoch": 53.76470588235294, "grad_norm": 4.823846340179443, "learning_rate": 1.9433653062111965e-05, "loss": 0.1979, "step": 914 }, { "epoch": 53.8235294117647, "grad_norm": 4.353592395782471, "learning_rate": 1.943207188068651e-05, "loss": 0.1847, "step": 915 }, { "epoch": 53.88235294117647, "grad_norm": 3.4310758113861084, "learning_rate": 1.943048855959722e-05, "loss": 0.1363, "step": 916 }, { "epoch": 53.94117647058823, "grad_norm": 4.170695781707764, "learning_rate": 1.9428903099203277e-05, "loss": 0.2001, "step": 917 }, { "epoch": 54.0, "grad_norm": 3.556330442428589, "learning_rate": 1.9427315499864345e-05, "loss": 0.1748, "step": 918 }, { "epoch": 54.05882352941177, "grad_norm": 5.29950475692749, "learning_rate": 1.9425725761940564e-05, "loss": 0.1587, "step": 919 }, { "epoch": 54.11764705882353, "grad_norm": 3.526763677597046, "learning_rate": 1.9424133885792575e-05, "loss": 0.2213, "step": 920 }, { "epoch": 54.1764705882353, "grad_norm": 3.0246334075927734, "learning_rate": 1.9422539871781484e-05, "loss": 0.1846, "step": 921 }, { "epoch": 54.23529411764706, "grad_norm": 3.057827949523926, "learning_rate": 1.94209437202689e-05, "loss": 0.1335, "step": 922 }, { "epoch": 54.294117647058826, "grad_norm": 2.826415538787842, "learning_rate": 1.9419345431616913e-05, "loss": 0.1011, "step": 923 }, { "epoch": 54.35294117647059, "grad_norm": 4.607483863830566, "learning_rate": 1.9417745006188082e-05, "loss": 0.222, "step": 924 }, { "epoch": 54.411764705882355, "grad_norm": 4.292031288146973, "learning_rate": 1.9416142444345475e-05, "loss": 0.1752, "step": 925 }, { "epoch": 54.470588235294116, "grad_norm": 3.4859890937805176, "learning_rate": 1.941453774645263e-05, "loss": 0.1799, "step": 926 }, { "epoch": 54.529411764705884, "grad_norm": 8.979043960571289, "learning_rate": 1.9412930912873568e-05, "loss": 0.1607, "step": 927 }, { "epoch": 54.588235294117645, "grad_norm": 3.7354657649993896, "learning_rate": 1.94113219439728e-05, "loss": 0.1535, "step": 928 }, { "epoch": 54.64705882352941, "grad_norm": 3.5276734828948975, "learning_rate": 1.9409710840115326e-05, "loss": 0.1283, "step": 929 }, { "epoch": 54.705882352941174, "grad_norm": 3.766387462615967, "learning_rate": 1.940809760166662e-05, "loss": 0.3318, "step": 930 }, { "epoch": 54.76470588235294, "grad_norm": 3.589230537414551, "learning_rate": 1.9406482228992645e-05, "loss": 0.1793, "step": 931 }, { "epoch": 54.8235294117647, "grad_norm": 3.774152994155884, "learning_rate": 1.9404864722459852e-05, "loss": 0.084, "step": 932 }, { "epoch": 54.88235294117647, "grad_norm": 4.7347893714904785, "learning_rate": 1.940324508243516e-05, "loss": 0.2139, "step": 933 }, { "epoch": 54.94117647058823, "grad_norm": 4.804427623748779, "learning_rate": 1.9401623309286e-05, "loss": 0.1804, "step": 934 }, { "epoch": 55.0, "grad_norm": 3.095799446105957, "learning_rate": 1.9399999403380266e-05, "loss": 0.1101, "step": 935 }, { "epoch": 55.05882352941177, "grad_norm": 3.528852701187134, "learning_rate": 1.9398373365086335e-05, "loss": 0.1858, "step": 936 }, { "epoch": 55.11764705882353, "grad_norm": 2.785170316696167, "learning_rate": 1.9396745194773078e-05, "loss": 0.1618, "step": 937 }, { "epoch": 55.1764705882353, "grad_norm": 2.8291966915130615, "learning_rate": 1.939511489280984e-05, "loss": 0.1191, "step": 938 }, { "epoch": 55.23529411764706, "grad_norm": 3.2450883388519287, "learning_rate": 1.9393482459566464e-05, "loss": 0.1167, "step": 939 }, { "epoch": 55.294117647058826, "grad_norm": 4.054742336273193, "learning_rate": 1.939184789541326e-05, "loss": 0.184, "step": 940 }, { "epoch": 55.35294117647059, "grad_norm": 4.916069984436035, "learning_rate": 1.939021120072103e-05, "loss": 0.2499, "step": 941 }, { "epoch": 55.411764705882355, "grad_norm": 4.10253381729126, "learning_rate": 1.938857237586106e-05, "loss": 0.1339, "step": 942 }, { "epoch": 55.470588235294116, "grad_norm": 3.4371986389160156, "learning_rate": 1.9386931421205118e-05, "loss": 0.2126, "step": 943 }, { "epoch": 55.529411764705884, "grad_norm": 3.6122660636901855, "learning_rate": 1.9385288337125448e-05, "loss": 0.2453, "step": 944 }, { "epoch": 55.588235294117645, "grad_norm": 3.525784969329834, "learning_rate": 1.9383643123994788e-05, "loss": 0.1723, "step": 945 }, { "epoch": 55.64705882352941, "grad_norm": 2.4326012134552, "learning_rate": 1.9381995782186353e-05, "loss": 0.1452, "step": 946 }, { "epoch": 55.705882352941174, "grad_norm": 3.646690845489502, "learning_rate": 1.9380346312073847e-05, "loss": 0.1872, "step": 947 }, { "epoch": 55.76470588235294, "grad_norm": 3.3960518836975098, "learning_rate": 1.9378694714031442e-05, "loss": 0.2071, "step": 948 }, { "epoch": 55.8235294117647, "grad_norm": 3.822484254837036, "learning_rate": 1.9377040988433812e-05, "loss": 0.1873, "step": 949 }, { "epoch": 55.88235294117647, "grad_norm": 4.057368755340576, "learning_rate": 1.93753851356561e-05, "loss": 0.1658, "step": 950 }, { "epoch": 55.94117647058823, "grad_norm": 3.230653762817383, "learning_rate": 1.937372715607394e-05, "loss": 0.1245, "step": 951 }, { "epoch": 56.0, "grad_norm": 4.079827785491943, "learning_rate": 1.937206705006344e-05, "loss": 0.1184, "step": 952 }, { "epoch": 56.05882352941177, "grad_norm": 4.3897294998168945, "learning_rate": 1.9370404818001192e-05, "loss": 0.1483, "step": 953 }, { "epoch": 56.11764705882353, "grad_norm": 3.7159464359283447, "learning_rate": 1.936874046026428e-05, "loss": 0.1988, "step": 954 }, { "epoch": 56.1764705882353, "grad_norm": 2.7475225925445557, "learning_rate": 1.936707397723026e-05, "loss": 0.0933, "step": 955 }, { "epoch": 56.23529411764706, "grad_norm": 3.2600605487823486, "learning_rate": 1.9365405369277172e-05, "loss": 0.1765, "step": 956 }, { "epoch": 56.294117647058826, "grad_norm": 3.133700370788574, "learning_rate": 1.9363734636783542e-05, "loss": 0.1258, "step": 957 }, { "epoch": 56.35294117647059, "grad_norm": 5.953769683837891, "learning_rate": 1.9362061780128375e-05, "loss": 0.1499, "step": 958 }, { "epoch": 56.411764705882355, "grad_norm": 2.6389737129211426, "learning_rate": 1.9360386799691158e-05, "loss": 0.1251, "step": 959 }, { "epoch": 56.470588235294116, "grad_norm": 2.9787437915802, "learning_rate": 1.935870969585186e-05, "loss": 0.1508, "step": 960 }, { "epoch": 56.529411764705884, "grad_norm": 5.1410651206970215, "learning_rate": 1.9357030468990928e-05, "loss": 0.2294, "step": 961 }, { "epoch": 56.588235294117645, "grad_norm": 2.671355962753296, "learning_rate": 1.9355349119489303e-05, "loss": 0.2148, "step": 962 }, { "epoch": 56.64705882352941, "grad_norm": 2.720798969268799, "learning_rate": 1.935366564772839e-05, "loss": 0.1683, "step": 963 }, { "epoch": 56.705882352941174, "grad_norm": 4.553972244262695, "learning_rate": 1.9351980054090086e-05, "loss": 0.2961, "step": 964 }, { "epoch": 56.76470588235294, "grad_norm": 3.5752487182617188, "learning_rate": 1.9350292338956773e-05, "loss": 0.1249, "step": 965 }, { "epoch": 56.8235294117647, "grad_norm": 2.852506637573242, "learning_rate": 1.9348602502711305e-05, "loss": 0.1091, "step": 966 }, { "epoch": 56.88235294117647, "grad_norm": 3.3729918003082275, "learning_rate": 1.9346910545737023e-05, "loss": 0.1206, "step": 967 }, { "epoch": 56.94117647058823, "grad_norm": 3.65692138671875, "learning_rate": 1.9345216468417737e-05, "loss": 0.1745, "step": 968 }, { "epoch": 57.0, "grad_norm": 2.938875675201416, "learning_rate": 1.9343520271137764e-05, "loss": 0.1532, "step": 969 }, { "epoch": 57.05882352941177, "grad_norm": 3.1591556072235107, "learning_rate": 1.934182195428188e-05, "loss": 0.1984, "step": 970 }, { "epoch": 57.11764705882353, "grad_norm": 4.098965167999268, "learning_rate": 1.9340121518235342e-05, "loss": 0.136, "step": 971 }, { "epoch": 57.1764705882353, "grad_norm": 3.1074180603027344, "learning_rate": 1.93384189633839e-05, "loss": 0.1946, "step": 972 }, { "epoch": 57.23529411764706, "grad_norm": 3.1172850131988525, "learning_rate": 1.9336714290113778e-05, "loss": 0.1381, "step": 973 }, { "epoch": 57.294117647058826, "grad_norm": 3.4877026081085205, "learning_rate": 1.9335007498811675e-05, "loss": 0.2184, "step": 974 }, { "epoch": 57.35294117647059, "grad_norm": 3.0276153087615967, "learning_rate": 1.933329858986478e-05, "loss": 0.0961, "step": 975 }, { "epoch": 57.411764705882355, "grad_norm": 2.665484666824341, "learning_rate": 1.9331587563660766e-05, "loss": 0.1876, "step": 976 }, { "epoch": 57.470588235294116, "grad_norm": 2.8920981884002686, "learning_rate": 1.9329874420587767e-05, "loss": 0.1117, "step": 977 }, { "epoch": 57.529411764705884, "grad_norm": 3.438541889190674, "learning_rate": 1.9328159161034418e-05, "loss": 0.145, "step": 978 }, { "epoch": 57.588235294117645, "grad_norm": 3.432920455932617, "learning_rate": 1.9326441785389817e-05, "loss": 0.1356, "step": 979 }, { "epoch": 57.64705882352941, "grad_norm": 3.398447275161743, "learning_rate": 1.932472229404356e-05, "loss": 0.1045, "step": 980 }, { "epoch": 57.705882352941174, "grad_norm": 2.8219332695007324, "learning_rate": 1.9323000687385707e-05, "loss": 0.0934, "step": 981 }, { "epoch": 57.76470588235294, "grad_norm": 3.455918312072754, "learning_rate": 1.9321276965806802e-05, "loss": 0.1792, "step": 982 }, { "epoch": 57.8235294117647, "grad_norm": 3.0127670764923096, "learning_rate": 1.9319551129697878e-05, "loss": 0.1209, "step": 983 }, { "epoch": 57.88235294117647, "grad_norm": 4.266013145446777, "learning_rate": 1.9317823179450438e-05, "loss": 0.2981, "step": 984 }, { "epoch": 57.94117647058823, "grad_norm": 3.6102566719055176, "learning_rate": 1.9316093115456466e-05, "loss": 0.1847, "step": 985 }, { "epoch": 58.0, "grad_norm": 3.092414379119873, "learning_rate": 1.9314360938108427e-05, "loss": 0.1657, "step": 986 }, { "epoch": 58.05882352941177, "grad_norm": 3.3423032760620117, "learning_rate": 1.931262664779926e-05, "loss": 0.0877, "step": 987 }, { "epoch": 58.11764705882353, "grad_norm": 2.6530532836914062, "learning_rate": 1.93108902449224e-05, "loss": 0.1319, "step": 988 }, { "epoch": 58.1764705882353, "grad_norm": 2.6790380477905273, "learning_rate": 1.930915172987174e-05, "loss": 0.2613, "step": 989 }, { "epoch": 58.23529411764706, "grad_norm": 2.9084548950195312, "learning_rate": 1.9307411103041667e-05, "loss": 0.1485, "step": 990 }, { "epoch": 58.294117647058826, "grad_norm": 2.765460968017578, "learning_rate": 1.930566836482704e-05, "loss": 0.1177, "step": 991 }, { "epoch": 58.35294117647059, "grad_norm": 3.581092357635498, "learning_rate": 1.93039235156232e-05, "loss": 0.0833, "step": 992 }, { "epoch": 58.411764705882355, "grad_norm": 2.7948007583618164, "learning_rate": 1.9302176555825958e-05, "loss": 0.2293, "step": 993 }, { "epoch": 58.470588235294116, "grad_norm": 4.287092208862305, "learning_rate": 1.9300427485831627e-05, "loss": 0.1363, "step": 994 }, { "epoch": 58.529411764705884, "grad_norm": 3.2332780361175537, "learning_rate": 1.9298676306036968e-05, "loss": 0.1296, "step": 995 }, { "epoch": 58.588235294117645, "grad_norm": 3.5470454692840576, "learning_rate": 1.9296923016839243e-05, "loss": 0.0998, "step": 996 }, { "epoch": 58.64705882352941, "grad_norm": 3.3271985054016113, "learning_rate": 1.9295167618636186e-05, "loss": 0.1376, "step": 997 }, { "epoch": 58.705882352941174, "grad_norm": 3.3940253257751465, "learning_rate": 1.9293410111826006e-05, "loss": 0.0916, "step": 998 }, { "epoch": 58.76470588235294, "grad_norm": 3.164700984954834, "learning_rate": 1.9291650496807395e-05, "loss": 0.0886, "step": 999 }, { "epoch": 58.8235294117647, "grad_norm": 3.8645479679107666, "learning_rate": 1.9289888773979524e-05, "loss": 0.1767, "step": 1000 }, { "epoch": 58.88235294117647, "grad_norm": 5.71295166015625, "learning_rate": 1.928812494374203e-05, "loss": 0.2847, "step": 1001 }, { "epoch": 58.94117647058823, "grad_norm": 4.081841945648193, "learning_rate": 1.9286359006495052e-05, "loss": 0.0902, "step": 1002 }, { "epoch": 59.0, "grad_norm": 2.870450258255005, "learning_rate": 1.928459096263918e-05, "loss": 0.137, "step": 1003 }, { "epoch": 59.05882352941177, "grad_norm": 3.0956170558929443, "learning_rate": 1.9282820812575497e-05, "loss": 0.149, "step": 1004 }, { "epoch": 59.11764705882353, "grad_norm": 3.2928617000579834, "learning_rate": 1.9281048556705566e-05, "loss": 0.085, "step": 1005 }, { "epoch": 59.1764705882353, "grad_norm": 3.7280592918395996, "learning_rate": 1.927927419543142e-05, "loss": 0.2831, "step": 1006 }, { "epoch": 59.23529411764706, "grad_norm": 4.652352333068848, "learning_rate": 1.927749772915557e-05, "loss": 0.1248, "step": 1007 }, { "epoch": 59.294117647058826, "grad_norm": 2.864574909210205, "learning_rate": 1.927571915828101e-05, "loss": 0.1407, "step": 1008 }, { "epoch": 59.35294117647059, "grad_norm": 3.2774875164031982, "learning_rate": 1.9273938483211208e-05, "loss": 0.1401, "step": 1009 }, { "epoch": 59.411764705882355, "grad_norm": 3.2697067260742188, "learning_rate": 1.927215570435011e-05, "loss": 0.1027, "step": 1010 }, { "epoch": 59.470588235294116, "grad_norm": 3.7713425159454346, "learning_rate": 1.927037082210214e-05, "loss": 0.0962, "step": 1011 }, { "epoch": 59.529411764705884, "grad_norm": 2.674793243408203, "learning_rate": 1.9268583836872198e-05, "loss": 0.088, "step": 1012 }, { "epoch": 59.588235294117645, "grad_norm": 4.464625358581543, "learning_rate": 1.926679474906566e-05, "loss": 0.0971, "step": 1013 }, { "epoch": 59.64705882352941, "grad_norm": 2.771347999572754, "learning_rate": 1.9265003559088377e-05, "loss": 0.1163, "step": 1014 }, { "epoch": 59.705882352941174, "grad_norm": 2.6032357215881348, "learning_rate": 1.9263210267346688e-05, "loss": 0.1002, "step": 1015 }, { "epoch": 59.76470588235294, "grad_norm": 3.182267427444458, "learning_rate": 1.9261414874247394e-05, "loss": 0.1989, "step": 1016 }, { "epoch": 59.8235294117647, "grad_norm": 3.2551281452178955, "learning_rate": 1.9259617380197783e-05, "loss": 0.1426, "step": 1017 }, { "epoch": 59.88235294117647, "grad_norm": 2.9357833862304688, "learning_rate": 1.9257817785605617e-05, "loss": 0.1433, "step": 1018 }, { "epoch": 59.94117647058823, "grad_norm": 2.8074309825897217, "learning_rate": 1.9256016090879134e-05, "loss": 0.1513, "step": 1019 }, { "epoch": 60.0, "grad_norm": 3.9129509925842285, "learning_rate": 1.9254212296427043e-05, "loss": 0.3145, "step": 1020 }, { "epoch": 60.05882352941177, "grad_norm": 3.586446762084961, "learning_rate": 1.925240640265854e-05, "loss": 0.2163, "step": 1021 }, { "epoch": 60.11764705882353, "grad_norm": 2.628047466278076, "learning_rate": 1.925059840998329e-05, "loss": 0.097, "step": 1022 }, { "epoch": 60.1764705882353, "grad_norm": 3.6100316047668457, "learning_rate": 1.9248788318811433e-05, "loss": 0.0738, "step": 1023 }, { "epoch": 60.23529411764706, "grad_norm": 2.863971471786499, "learning_rate": 1.924697612955359e-05, "loss": 0.1667, "step": 1024 }, { "epoch": 60.294117647058826, "grad_norm": 3.61775541305542, "learning_rate": 1.9245161842620858e-05, "loss": 0.1059, "step": 1025 }, { "epoch": 60.35294117647059, "grad_norm": 3.6990580558776855, "learning_rate": 1.92433454584248e-05, "loss": 0.1392, "step": 1026 }, { "epoch": 60.411764705882355, "grad_norm": 3.6569924354553223, "learning_rate": 1.9241526977377477e-05, "loss": 0.1255, "step": 1027 }, { "epoch": 60.470588235294116, "grad_norm": 2.9314587116241455, "learning_rate": 1.9239706399891398e-05, "loss": 0.2101, "step": 1028 }, { "epoch": 60.529411764705884, "grad_norm": 5.219291687011719, "learning_rate": 1.9237883726379566e-05, "loss": 0.1105, "step": 1029 }, { "epoch": 60.588235294117645, "grad_norm": 3.0648984909057617, "learning_rate": 1.923605895725545e-05, "loss": 0.1726, "step": 1030 }, { "epoch": 60.64705882352941, "grad_norm": 2.9166762828826904, "learning_rate": 1.9234232092933003e-05, "loss": 0.0876, "step": 1031 }, { "epoch": 60.705882352941174, "grad_norm": 3.7657182216644287, "learning_rate": 1.923240313382665e-05, "loss": 0.225, "step": 1032 }, { "epoch": 60.76470588235294, "grad_norm": 2.888376474380493, "learning_rate": 1.9230572080351285e-05, "loss": 0.0929, "step": 1033 }, { "epoch": 60.8235294117647, "grad_norm": 3.058769464492798, "learning_rate": 1.9228738932922283e-05, "loss": 0.1497, "step": 1034 }, { "epoch": 60.88235294117647, "grad_norm": 3.1764538288116455, "learning_rate": 1.92269036919555e-05, "loss": 0.1262, "step": 1035 }, { "epoch": 60.94117647058823, "grad_norm": 3.537860631942749, "learning_rate": 1.922506635786725e-05, "loss": 0.1354, "step": 1036 }, { "epoch": 61.0, "grad_norm": 2.8915798664093018, "learning_rate": 1.922322693107434e-05, "loss": 0.1462, "step": 1037 }, { "epoch": 61.05882352941177, "grad_norm": 4.444347858428955, "learning_rate": 1.922138541199404e-05, "loss": 0.1511, "step": 1038 }, { "epoch": 61.11764705882353, "grad_norm": 2.7654967308044434, "learning_rate": 1.9219541801044096e-05, "loss": 0.1802, "step": 1039 }, { "epoch": 61.1764705882353, "grad_norm": 2.6177732944488525, "learning_rate": 1.9217696098642736e-05, "loss": 0.0901, "step": 1040 }, { "epoch": 61.23529411764706, "grad_norm": 3.484177350997925, "learning_rate": 1.9215848305208656e-05, "loss": 0.155, "step": 1041 }, { "epoch": 61.294117647058826, "grad_norm": 3.535118818283081, "learning_rate": 1.9213998421161025e-05, "loss": 0.2541, "step": 1042 }, { "epoch": 61.35294117647059, "grad_norm": 3.042645215988159, "learning_rate": 1.9212146446919487e-05, "loss": 0.1122, "step": 1043 }, { "epoch": 61.411764705882355, "grad_norm": 2.328216314315796, "learning_rate": 1.921029238290417e-05, "loss": 0.1176, "step": 1044 }, { "epoch": 61.470588235294116, "grad_norm": 3.4728081226348877, "learning_rate": 1.9208436229535662e-05, "loss": 0.2001, "step": 1045 }, { "epoch": 61.529411764705884, "grad_norm": 8.991009712219238, "learning_rate": 1.9206577987235034e-05, "loss": 0.1257, "step": 1046 }, { "epoch": 61.588235294117645, "grad_norm": 2.7267954349517822, "learning_rate": 1.9204717656423825e-05, "loss": 0.106, "step": 1047 }, { "epoch": 61.64705882352941, "grad_norm": 2.895099401473999, "learning_rate": 1.9202855237524053e-05, "loss": 0.1341, "step": 1048 }, { "epoch": 61.705882352941174, "grad_norm": 3.8511757850646973, "learning_rate": 1.9200990730958202e-05, "loss": 0.0863, "step": 1049 }, { "epoch": 61.76470588235294, "grad_norm": 3.256082773208618, "learning_rate": 1.9199124137149243e-05, "loss": 0.1394, "step": 1050 }, { "epoch": 61.8235294117647, "grad_norm": 2.9004065990448, "learning_rate": 1.919725545652061e-05, "loss": 0.0672, "step": 1051 }, { "epoch": 61.88235294117647, "grad_norm": 3.046902656555176, "learning_rate": 1.9195384689496212e-05, "loss": 0.1394, "step": 1052 }, { "epoch": 61.94117647058823, "grad_norm": 3.2619407176971436, "learning_rate": 1.919351183650043e-05, "loss": 0.1496, "step": 1053 }, { "epoch": 62.0, "grad_norm": 2.1947226524353027, "learning_rate": 1.9191636897958123e-05, "loss": 0.0736, "step": 1054 }, { "epoch": 62.05882352941177, "grad_norm": 2.978959321975708, "learning_rate": 1.918975987429462e-05, "loss": 0.1151, "step": 1055 }, { "epoch": 62.11764705882353, "grad_norm": 2.5879018306732178, "learning_rate": 1.9187880765935724e-05, "loss": 0.1009, "step": 1056 }, { "epoch": 62.1764705882353, "grad_norm": 2.6692965030670166, "learning_rate": 1.918599957330771e-05, "loss": 0.1208, "step": 1057 }, { "epoch": 62.23529411764706, "grad_norm": 3.405585289001465, "learning_rate": 1.9184116296837325e-05, "loss": 0.1632, "step": 1058 }, { "epoch": 62.294117647058826, "grad_norm": 3.0092878341674805, "learning_rate": 1.9182230936951793e-05, "loss": 0.1444, "step": 1059 }, { "epoch": 62.35294117647059, "grad_norm": 4.027172565460205, "learning_rate": 1.91803434940788e-05, "loss": 0.1359, "step": 1060 }, { "epoch": 62.411764705882355, "grad_norm": 3.1467108726501465, "learning_rate": 1.9178453968646523e-05, "loss": 0.1851, "step": 1061 }, { "epoch": 62.470588235294116, "grad_norm": 2.7846484184265137, "learning_rate": 1.9176562361083594e-05, "loss": 0.1388, "step": 1062 }, { "epoch": 62.529411764705884, "grad_norm": 3.614720106124878, "learning_rate": 1.9174668671819124e-05, "loss": 0.0762, "step": 1063 }, { "epoch": 62.588235294117645, "grad_norm": 4.231512546539307, "learning_rate": 1.9172772901282694e-05, "loss": 0.1194, "step": 1064 }, { "epoch": 62.64705882352941, "grad_norm": 2.44175386428833, "learning_rate": 1.9170875049904367e-05, "loss": 0.0859, "step": 1065 }, { "epoch": 62.705882352941174, "grad_norm": 3.26810622215271, "learning_rate": 1.9168975118114662e-05, "loss": 0.2184, "step": 1066 }, { "epoch": 62.76470588235294, "grad_norm": 2.4420812129974365, "learning_rate": 1.9167073106344584e-05, "loss": 0.0813, "step": 1067 }, { "epoch": 62.8235294117647, "grad_norm": 2.693211078643799, "learning_rate": 1.91651690150256e-05, "loss": 0.0957, "step": 1068 }, { "epoch": 62.88235294117647, "grad_norm": 3.269458293914795, "learning_rate": 1.9163262844589656e-05, "loss": 0.1167, "step": 1069 }, { "epoch": 62.94117647058823, "grad_norm": 2.628230333328247, "learning_rate": 1.9161354595469163e-05, "loss": 0.1908, "step": 1070 }, { "epoch": 63.0, "grad_norm": 3.0875511169433594, "learning_rate": 1.9159444268097012e-05, "loss": 0.0924, "step": 1071 }, { "epoch": 63.05882352941177, "grad_norm": 2.7398664951324463, "learning_rate": 1.9157531862906557e-05, "loss": 0.1239, "step": 1072 }, { "epoch": 63.11764705882353, "grad_norm": 2.4190683364868164, "learning_rate": 1.915561738033162e-05, "loss": 0.0857, "step": 1073 }, { "epoch": 63.1764705882353, "grad_norm": 3.12538743019104, "learning_rate": 1.9153700820806516e-05, "loss": 0.1321, "step": 1074 }, { "epoch": 63.23529411764706, "grad_norm": 3.0878052711486816, "learning_rate": 1.9151782184766007e-05, "loss": 0.1565, "step": 1075 }, { "epoch": 63.294117647058826, "grad_norm": 2.3415141105651855, "learning_rate": 1.914986147264533e-05, "loss": 0.0967, "step": 1076 }, { "epoch": 63.35294117647059, "grad_norm": 3.0721423625946045, "learning_rate": 1.9147938684880213e-05, "loss": 0.1701, "step": 1077 }, { "epoch": 63.411764705882355, "grad_norm": 3.1824989318847656, "learning_rate": 1.9146013821906824e-05, "loss": 0.1255, "step": 1078 }, { "epoch": 63.470588235294116, "grad_norm": 3.2265055179595947, "learning_rate": 1.9144086884161834e-05, "loss": 0.1543, "step": 1079 }, { "epoch": 63.529411764705884, "grad_norm": 3.789656639099121, "learning_rate": 1.9142157872082354e-05, "loss": 0.1017, "step": 1080 }, { "epoch": 63.588235294117645, "grad_norm": 3.0065839290618896, "learning_rate": 1.9140226786105987e-05, "loss": 0.1469, "step": 1081 }, { "epoch": 63.64705882352941, "grad_norm": 2.7523739337921143, "learning_rate": 1.9138293626670797e-05, "loss": 0.0753, "step": 1082 }, { "epoch": 63.705882352941174, "grad_norm": 2.8877956867218018, "learning_rate": 1.9136358394215328e-05, "loss": 0.1476, "step": 1083 }, { "epoch": 63.76470588235294, "grad_norm": 2.6006622314453125, "learning_rate": 1.9134421089178578e-05, "loss": 0.0707, "step": 1084 }, { "epoch": 63.8235294117647, "grad_norm": 2.619020700454712, "learning_rate": 1.913248171200003e-05, "loss": 0.1499, "step": 1085 }, { "epoch": 63.88235294117647, "grad_norm": 2.5467023849487305, "learning_rate": 1.9130540263119627e-05, "loss": 0.1472, "step": 1086 }, { "epoch": 63.94117647058823, "grad_norm": 2.5657193660736084, "learning_rate": 1.9128596742977786e-05, "loss": 0.0605, "step": 1087 }, { "epoch": 64.0, "grad_norm": 5.078986167907715, "learning_rate": 1.9126651152015404e-05, "loss": 0.1661, "step": 1088 }, { "epoch": 64.05882352941177, "grad_norm": 3.119093179702759, "learning_rate": 1.9124703490673825e-05, "loss": 0.0852, "step": 1089 }, { "epoch": 64.11764705882354, "grad_norm": 2.945331335067749, "learning_rate": 1.9122753759394884e-05, "loss": 0.0989, "step": 1090 }, { "epoch": 64.17647058823529, "grad_norm": 1.8183804750442505, "learning_rate": 1.9120801958620876e-05, "loss": 0.0557, "step": 1091 }, { "epoch": 64.23529411764706, "grad_norm": 2.8344345092773438, "learning_rate": 1.911884808879456e-05, "loss": 0.1277, "step": 1092 }, { "epoch": 64.29411764705883, "grad_norm": 3.5465662479400635, "learning_rate": 1.911689215035918e-05, "loss": 0.1465, "step": 1093 }, { "epoch": 64.3529411764706, "grad_norm": 5.026786804199219, "learning_rate": 1.9114934143758437e-05, "loss": 0.2704, "step": 1094 }, { "epoch": 64.41176470588235, "grad_norm": 2.490187644958496, "learning_rate": 1.9112974069436502e-05, "loss": 0.0763, "step": 1095 }, { "epoch": 64.47058823529412, "grad_norm": 2.3925838470458984, "learning_rate": 1.9111011927838024e-05, "loss": 0.099, "step": 1096 }, { "epoch": 64.52941176470588, "grad_norm": 3.3312532901763916, "learning_rate": 1.9109047719408106e-05, "loss": 0.1322, "step": 1097 }, { "epoch": 64.58823529411765, "grad_norm": 3.086801290512085, "learning_rate": 1.9107081444592334e-05, "loss": 0.1719, "step": 1098 }, { "epoch": 64.6470588235294, "grad_norm": 2.425783157348633, "learning_rate": 1.9105113103836753e-05, "loss": 0.1098, "step": 1099 }, { "epoch": 64.70588235294117, "grad_norm": 2.6356887817382812, "learning_rate": 1.9103142697587884e-05, "loss": 0.1011, "step": 1100 }, { "epoch": 64.76470588235294, "grad_norm": 2.803956985473633, "learning_rate": 1.910117022629271e-05, "loss": 0.1333, "step": 1101 }, { "epoch": 64.82352941176471, "grad_norm": 2.50880765914917, "learning_rate": 1.9099195690398694e-05, "loss": 0.1068, "step": 1102 }, { "epoch": 64.88235294117646, "grad_norm": 2.1581685543060303, "learning_rate": 1.909721909035375e-05, "loss": 0.0665, "step": 1103 }, { "epoch": 64.94117647058823, "grad_norm": 2.506258487701416, "learning_rate": 1.9095240426606273e-05, "loss": 0.0906, "step": 1104 }, { "epoch": 65.0, "grad_norm": 3.2203142642974854, "learning_rate": 1.9093259699605125e-05, "loss": 0.1732, "step": 1105 }, { "epoch": 65.05882352941177, "grad_norm": 2.2390756607055664, "learning_rate": 1.9091276909799626e-05, "loss": 0.0695, "step": 1106 }, { "epoch": 65.11764705882354, "grad_norm": 3.4881227016448975, "learning_rate": 1.9089292057639577e-05, "loss": 0.111, "step": 1107 }, { "epoch": 65.17647058823529, "grad_norm": 2.976530075073242, "learning_rate": 1.9087305143575245e-05, "loss": 0.1468, "step": 1108 }, { "epoch": 65.23529411764706, "grad_norm": 2.459482431411743, "learning_rate": 1.9085316168057356e-05, "loss": 0.1535, "step": 1109 }, { "epoch": 65.29411764705883, "grad_norm": 2.607757806777954, "learning_rate": 1.9083325131537104e-05, "loss": 0.1791, "step": 1110 }, { "epoch": 65.3529411764706, "grad_norm": 3.6620357036590576, "learning_rate": 1.9081332034466168e-05, "loss": 0.1104, "step": 1111 }, { "epoch": 65.41176470588235, "grad_norm": 2.5928165912628174, "learning_rate": 1.907933687729667e-05, "loss": 0.0993, "step": 1112 }, { "epoch": 65.47058823529412, "grad_norm": 2.297097682952881, "learning_rate": 1.9077339660481216e-05, "loss": 0.0603, "step": 1113 }, { "epoch": 65.52941176470588, "grad_norm": 2.090744733810425, "learning_rate": 1.9075340384472876e-05, "loss": 0.0614, "step": 1114 }, { "epoch": 65.58823529411765, "grad_norm": 3.5636823177337646, "learning_rate": 1.907333904972518e-05, "loss": 0.1148, "step": 1115 }, { "epoch": 65.6470588235294, "grad_norm": 2.507556438446045, "learning_rate": 1.907133565669214e-05, "loss": 0.139, "step": 1116 }, { "epoch": 65.70588235294117, "grad_norm": 3.1707956790924072, "learning_rate": 1.9069330205828213e-05, "loss": 0.0838, "step": 1117 }, { "epoch": 65.76470588235294, "grad_norm": 3.229764223098755, "learning_rate": 1.9067322697588342e-05, "loss": 0.1448, "step": 1118 }, { "epoch": 65.82352941176471, "grad_norm": 2.263679027557373, "learning_rate": 1.906531313242793e-05, "loss": 0.1105, "step": 1119 }, { "epoch": 65.88235294117646, "grad_norm": 2.648106336593628, "learning_rate": 1.9063301510802846e-05, "loss": 0.1562, "step": 1120 }, { "epoch": 65.94117647058823, "grad_norm": 2.4351158142089844, "learning_rate": 1.9061287833169425e-05, "loss": 0.1137, "step": 1121 }, { "epoch": 66.0, "grad_norm": 2.837212562561035, "learning_rate": 1.905927209998447e-05, "loss": 0.0845, "step": 1122 }, { "epoch": 66.05882352941177, "grad_norm": 1.7672443389892578, "learning_rate": 1.905725431170525e-05, "loss": 0.062, "step": 1123 }, { "epoch": 66.11764705882354, "grad_norm": 4.2426276206970215, "learning_rate": 1.90552344687895e-05, "loss": 0.069, "step": 1124 }, { "epoch": 66.17647058823529, "grad_norm": 3.0040578842163086, "learning_rate": 1.905321257169542e-05, "loss": 0.1351, "step": 1125 }, { "epoch": 66.23529411764706, "grad_norm": 3.3721961975097656, "learning_rate": 1.905118862088168e-05, "loss": 0.106, "step": 1126 }, { "epoch": 66.29411764705883, "grad_norm": 2.747145414352417, "learning_rate": 1.904916261680741e-05, "loss": 0.1264, "step": 1127 }, { "epoch": 66.3529411764706, "grad_norm": 2.9852042198181152, "learning_rate": 1.9047134559932203e-05, "loss": 0.1423, "step": 1128 }, { "epoch": 66.41176470588235, "grad_norm": 2.8037233352661133, "learning_rate": 1.9045104450716137e-05, "loss": 0.1343, "step": 1129 }, { "epoch": 66.47058823529412, "grad_norm": 2.9854493141174316, "learning_rate": 1.9043072289619732e-05, "loss": 0.1924, "step": 1130 }, { "epoch": 66.52941176470588, "grad_norm": 3.042239189147949, "learning_rate": 1.9041038077103983e-05, "loss": 0.1445, "step": 1131 }, { "epoch": 66.58823529411765, "grad_norm": 3.1286141872406006, "learning_rate": 1.9039001813630357e-05, "loss": 0.1485, "step": 1132 }, { "epoch": 66.6470588235294, "grad_norm": 3.179755687713623, "learning_rate": 1.9036963499660778e-05, "loss": 0.1778, "step": 1133 }, { "epoch": 66.70588235294117, "grad_norm": 2.333437204360962, "learning_rate": 1.903492313565763e-05, "loss": 0.0748, "step": 1134 }, { "epoch": 66.76470588235294, "grad_norm": 2.139292001724243, "learning_rate": 1.903288072208378e-05, "loss": 0.0614, "step": 1135 }, { "epoch": 66.82352941176471, "grad_norm": 2.579580068588257, "learning_rate": 1.9030836259402545e-05, "loss": 0.1006, "step": 1136 }, { "epoch": 66.88235294117646, "grad_norm": 2.3168106079101562, "learning_rate": 1.9028789748077706e-05, "loss": 0.0856, "step": 1137 }, { "epoch": 66.94117647058823, "grad_norm": 2.1573266983032227, "learning_rate": 1.902674118857352e-05, "loss": 0.0508, "step": 1138 }, { "epoch": 67.0, "grad_norm": 2.2280385494232178, "learning_rate": 1.90246905813547e-05, "loss": 0.091, "step": 1139 }, { "epoch": 67.05882352941177, "grad_norm": 2.250701904296875, "learning_rate": 1.9022637926886427e-05, "loss": 0.0569, "step": 1140 }, { "epoch": 67.11764705882354, "grad_norm": 2.4631810188293457, "learning_rate": 1.9020583225634343e-05, "loss": 0.1828, "step": 1141 }, { "epoch": 67.17647058823529, "grad_norm": 3.08685302734375, "learning_rate": 1.901852647806456e-05, "loss": 0.0835, "step": 1142 }, { "epoch": 67.23529411764706, "grad_norm": 2.2575173377990723, "learning_rate": 1.9016467684643646e-05, "loss": 0.0855, "step": 1143 }, { "epoch": 67.29411764705883, "grad_norm": 2.6734745502471924, "learning_rate": 1.9014406845838643e-05, "loss": 0.1226, "step": 1144 }, { "epoch": 67.3529411764706, "grad_norm": 2.8674063682556152, "learning_rate": 1.901234396211705e-05, "loss": 0.1439, "step": 1145 }, { "epoch": 67.41176470588235, "grad_norm": 2.1477556228637695, "learning_rate": 1.901027903394683e-05, "loss": 0.0716, "step": 1146 }, { "epoch": 67.47058823529412, "grad_norm": 2.476001024246216, "learning_rate": 1.9008212061796412e-05, "loss": 0.1175, "step": 1147 }, { "epoch": 67.52941176470588, "grad_norm": 5.112062454223633, "learning_rate": 1.900614304613469e-05, "loss": 0.0893, "step": 1148 }, { "epoch": 67.58823529411765, "grad_norm": 2.371962785720825, "learning_rate": 1.900407198743102e-05, "loss": 0.0726, "step": 1149 }, { "epoch": 67.6470588235294, "grad_norm": 2.526057481765747, "learning_rate": 1.900199888615522e-05, "loss": 0.082, "step": 1150 }, { "epoch": 67.70588235294117, "grad_norm": 3.1541061401367188, "learning_rate": 1.8999923742777574e-05, "loss": 0.0615, "step": 1151 }, { "epoch": 67.76470588235294, "grad_norm": 2.107006549835205, "learning_rate": 1.899784655776883e-05, "loss": 0.06, "step": 1152 }, { "epoch": 67.82352941176471, "grad_norm": 3.805428981781006, "learning_rate": 1.8995767331600187e-05, "loss": 0.1472, "step": 1153 }, { "epoch": 67.88235294117646, "grad_norm": 3.1271004676818848, "learning_rate": 1.8993686064743332e-05, "loss": 0.221, "step": 1154 }, { "epoch": 67.94117647058823, "grad_norm": 2.120483875274658, "learning_rate": 1.8991602757670387e-05, "loss": 0.1148, "step": 1155 }, { "epoch": 68.0, "grad_norm": 2.1922144889831543, "learning_rate": 1.8989517410853956e-05, "loss": 0.0797, "step": 1156 }, { "epoch": 68.05882352941177, "grad_norm": 2.476539373397827, "learning_rate": 1.8987430024767097e-05, "loss": 0.0864, "step": 1157 }, { "epoch": 68.11764705882354, "grad_norm": 2.700256824493408, "learning_rate": 1.8985340599883338e-05, "loss": 0.2188, "step": 1158 }, { "epoch": 68.17647058823529, "grad_norm": 2.925081968307495, "learning_rate": 1.898324913667666e-05, "loss": 0.1043, "step": 1159 }, { "epoch": 68.23529411764706, "grad_norm": 2.7741777896881104, "learning_rate": 1.898115563562151e-05, "loss": 0.0517, "step": 1160 }, { "epoch": 68.29411764705883, "grad_norm": 3.36214280128479, "learning_rate": 1.8979060097192806e-05, "loss": 0.1308, "step": 1161 }, { "epoch": 68.3529411764706, "grad_norm": 2.5130226612091064, "learning_rate": 1.897696252186591e-05, "loss": 0.1474, "step": 1162 }, { "epoch": 68.41176470588235, "grad_norm": 2.7618801593780518, "learning_rate": 1.8974862910116664e-05, "loss": 0.0574, "step": 1163 }, { "epoch": 68.47058823529412, "grad_norm": 3.9398837089538574, "learning_rate": 1.8972761262421363e-05, "loss": 0.1234, "step": 1164 }, { "epoch": 68.52941176470588, "grad_norm": 2.2951104640960693, "learning_rate": 1.8970657579256766e-05, "loss": 0.0863, "step": 1165 }, { "epoch": 68.58823529411765, "grad_norm": 3.1059322357177734, "learning_rate": 1.8968551861100087e-05, "loss": 0.0524, "step": 1166 }, { "epoch": 68.6470588235294, "grad_norm": 2.7522854804992676, "learning_rate": 1.896644410842902e-05, "loss": 0.1444, "step": 1167 }, { "epoch": 68.70588235294117, "grad_norm": 2.6287848949432373, "learning_rate": 1.8964334321721698e-05, "loss": 0.1298, "step": 1168 }, { "epoch": 68.76470588235294, "grad_norm": 2.5375070571899414, "learning_rate": 1.8962222501456725e-05, "loss": 0.1178, "step": 1169 }, { "epoch": 68.82352941176471, "grad_norm": 2.6812820434570312, "learning_rate": 1.896010864811318e-05, "loss": 0.1031, "step": 1170 }, { "epoch": 68.88235294117646, "grad_norm": 2.3605918884277344, "learning_rate": 1.895799276217057e-05, "loss": 0.1042, "step": 1171 }, { "epoch": 68.94117647058823, "grad_norm": 3.5294713973999023, "learning_rate": 1.8955874844108903e-05, "loss": 0.0671, "step": 1172 }, { "epoch": 69.0, "grad_norm": 1.9464576244354248, "learning_rate": 1.8953754894408617e-05, "loss": 0.0467, "step": 1173 }, { "epoch": 69.05882352941177, "grad_norm": 2.496877670288086, "learning_rate": 1.8951632913550625e-05, "loss": 0.0687, "step": 1174 }, { "epoch": 69.11764705882354, "grad_norm": 2.6279172897338867, "learning_rate": 1.89495089020163e-05, "loss": 0.1511, "step": 1175 }, { "epoch": 69.17647058823529, "grad_norm": 2.516484260559082, "learning_rate": 1.894738286028747e-05, "loss": 0.0994, "step": 1176 }, { "epoch": 69.23529411764706, "grad_norm": 3.2600972652435303, "learning_rate": 1.8945254788846426e-05, "loss": 0.1192, "step": 1177 }, { "epoch": 69.29411764705883, "grad_norm": 2.4166338443756104, "learning_rate": 1.8943124688175927e-05, "loss": 0.0601, "step": 1178 }, { "epoch": 69.3529411764706, "grad_norm": 1.9776358604431152, "learning_rate": 1.894099255875918e-05, "loss": 0.1026, "step": 1179 }, { "epoch": 69.41176470588235, "grad_norm": 3.0244150161743164, "learning_rate": 1.8938858401079866e-05, "loss": 0.1469, "step": 1180 }, { "epoch": 69.47058823529412, "grad_norm": 2.77238130569458, "learning_rate": 1.8936722215622108e-05, "loss": 0.0944, "step": 1181 }, { "epoch": 69.52941176470588, "grad_norm": 2.239079475402832, "learning_rate": 1.8934584002870506e-05, "loss": 0.0487, "step": 1182 }, { "epoch": 69.58823529411765, "grad_norm": 2.324981689453125, "learning_rate": 1.893244376331011e-05, "loss": 0.0821, "step": 1183 }, { "epoch": 69.6470588235294, "grad_norm": 3.25018310546875, "learning_rate": 1.8930301497426434e-05, "loss": 0.2202, "step": 1184 }, { "epoch": 69.70588235294117, "grad_norm": 1.745521903038025, "learning_rate": 1.8928157205705453e-05, "loss": 0.0508, "step": 1185 }, { "epoch": 69.76470588235294, "grad_norm": 3.1457560062408447, "learning_rate": 1.8926010888633597e-05, "loss": 0.0812, "step": 1186 }, { "epoch": 69.82352941176471, "grad_norm": 2.787945508956909, "learning_rate": 1.8923862546697757e-05, "loss": 0.082, "step": 1187 }, { "epoch": 69.88235294117646, "grad_norm": 2.3270905017852783, "learning_rate": 1.8921712180385287e-05, "loss": 0.1219, "step": 1188 }, { "epoch": 69.94117647058823, "grad_norm": 2.2133352756500244, "learning_rate": 1.891955979018399e-05, "loss": 0.1195, "step": 1189 }, { "epoch": 70.0, "grad_norm": 2.5660150051116943, "learning_rate": 1.8917405376582144e-05, "loss": 0.0826, "step": 1190 }, { "epoch": 70.05882352941177, "grad_norm": 2.375840902328491, "learning_rate": 1.8915248940068477e-05, "loss": 0.1106, "step": 1191 }, { "epoch": 70.11764705882354, "grad_norm": 2.5654959678649902, "learning_rate": 1.891309048113217e-05, "loss": 0.1202, "step": 1192 }, { "epoch": 70.17647058823529, "grad_norm": 1.872757077217102, "learning_rate": 1.891093000026287e-05, "loss": 0.0696, "step": 1193 }, { "epoch": 70.23529411764706, "grad_norm": 2.1665847301483154, "learning_rate": 1.8908767497950684e-05, "loss": 0.0673, "step": 1194 }, { "epoch": 70.29411764705883, "grad_norm": 2.892188549041748, "learning_rate": 1.8906602974686178e-05, "loss": 0.144, "step": 1195 }, { "epoch": 70.3529411764706, "grad_norm": 2.9906375408172607, "learning_rate": 1.890443643096037e-05, "loss": 0.2371, "step": 1196 }, { "epoch": 70.41176470588235, "grad_norm": 2.8414220809936523, "learning_rate": 1.8902267867264736e-05, "loss": 0.0687, "step": 1197 }, { "epoch": 70.47058823529412, "grad_norm": 1.7512704133987427, "learning_rate": 1.8900097284091222e-05, "loss": 0.0346, "step": 1198 }, { "epoch": 70.52941176470588, "grad_norm": 2.4632656574249268, "learning_rate": 1.8897924681932226e-05, "loss": 0.1152, "step": 1199 }, { "epoch": 70.58823529411765, "grad_norm": 2.1218318939208984, "learning_rate": 1.889575006128059e-05, "loss": 0.0512, "step": 1200 }, { "epoch": 70.6470588235294, "grad_norm": 2.231593370437622, "learning_rate": 1.8893573422629638e-05, "loss": 0.0554, "step": 1201 }, { "epoch": 70.70588235294117, "grad_norm": 2.976027011871338, "learning_rate": 1.8891394766473138e-05, "loss": 0.1089, "step": 1202 }, { "epoch": 70.76470588235294, "grad_norm": 2.2057886123657227, "learning_rate": 1.888921409330531e-05, "loss": 0.0685, "step": 1203 }, { "epoch": 70.82352941176471, "grad_norm": 3.1849427223205566, "learning_rate": 1.888703140362085e-05, "loss": 0.1486, "step": 1204 }, { "epoch": 70.88235294117646, "grad_norm": 2.173415422439575, "learning_rate": 1.88848466979149e-05, "loss": 0.0857, "step": 1205 }, { "epoch": 70.94117647058823, "grad_norm": 2.6944198608398438, "learning_rate": 1.8882659976683052e-05, "loss": 0.1256, "step": 1206 }, { "epoch": 71.0, "grad_norm": 2.914268732070923, "learning_rate": 1.8880471240421365e-05, "loss": 0.0972, "step": 1207 }, { "epoch": 71.05882352941177, "grad_norm": 3.3814213275909424, "learning_rate": 1.887828048962636e-05, "loss": 0.0524, "step": 1208 }, { "epoch": 71.11764705882354, "grad_norm": 2.3270108699798584, "learning_rate": 1.8876087724795002e-05, "loss": 0.0561, "step": 1209 }, { "epoch": 71.17647058823529, "grad_norm": 3.1374354362487793, "learning_rate": 1.8873892946424722e-05, "loss": 0.2144, "step": 1210 }, { "epoch": 71.23529411764706, "grad_norm": 1.900593876838684, "learning_rate": 1.8871696155013406e-05, "loss": 0.0591, "step": 1211 }, { "epoch": 71.29411764705883, "grad_norm": 2.5184407234191895, "learning_rate": 1.8869497351059393e-05, "loss": 0.0934, "step": 1212 }, { "epoch": 71.3529411764706, "grad_norm": 2.5890800952911377, "learning_rate": 1.8867296535061484e-05, "loss": 0.1164, "step": 1213 }, { "epoch": 71.41176470588235, "grad_norm": 2.8938238620758057, "learning_rate": 1.8865093707518934e-05, "loss": 0.1109, "step": 1214 }, { "epoch": 71.47058823529412, "grad_norm": 2.5008718967437744, "learning_rate": 1.886288886893145e-05, "loss": 0.0554, "step": 1215 }, { "epoch": 71.52941176470588, "grad_norm": 2.043760061264038, "learning_rate": 1.8860682019799203e-05, "loss": 0.0967, "step": 1216 }, { "epoch": 71.58823529411765, "grad_norm": 2.5460448265075684, "learning_rate": 1.8858473160622816e-05, "loss": 0.0886, "step": 1217 }, { "epoch": 71.6470588235294, "grad_norm": 2.7194771766662598, "learning_rate": 1.8856262291903363e-05, "loss": 0.1229, "step": 1218 }, { "epoch": 71.70588235294117, "grad_norm": 2.1861612796783447, "learning_rate": 1.8854049414142387e-05, "loss": 0.0513, "step": 1219 }, { "epoch": 71.76470588235294, "grad_norm": 3.201399803161621, "learning_rate": 1.8851834527841877e-05, "loss": 0.0593, "step": 1220 }, { "epoch": 71.82352941176471, "grad_norm": 2.5041825771331787, "learning_rate": 1.8849617633504276e-05, "loss": 0.1247, "step": 1221 }, { "epoch": 71.88235294117646, "grad_norm": 2.09238338470459, "learning_rate": 1.8847398731632487e-05, "loss": 0.0476, "step": 1222 }, { "epoch": 71.94117647058823, "grad_norm": 2.985854387283325, "learning_rate": 1.8845177822729872e-05, "loss": 0.1368, "step": 1223 }, { "epoch": 72.0, "grad_norm": 2.973884105682373, "learning_rate": 1.8842954907300236e-05, "loss": 0.1331, "step": 1224 }, { "epoch": 72.05882352941177, "grad_norm": 2.872263193130493, "learning_rate": 1.8840729985847854e-05, "loss": 0.0554, "step": 1225 }, { "epoch": 72.11764705882354, "grad_norm": 2.6654605865478516, "learning_rate": 1.8838503058877447e-05, "loss": 0.057, "step": 1226 }, { "epoch": 72.17647058823529, "grad_norm": 2.719630241394043, "learning_rate": 1.8836274126894196e-05, "loss": 0.1351, "step": 1227 }, { "epoch": 72.23529411764706, "grad_norm": 2.6504573822021484, "learning_rate": 1.8834043190403727e-05, "loss": 0.1539, "step": 1228 }, { "epoch": 72.29411764705883, "grad_norm": 2.325439453125, "learning_rate": 1.8831810249912127e-05, "loss": 0.1269, "step": 1229 }, { "epoch": 72.3529411764706, "grad_norm": 3.535092830657959, "learning_rate": 1.8829575305925947e-05, "loss": 0.1306, "step": 1230 }, { "epoch": 72.41176470588235, "grad_norm": 2.9817001819610596, "learning_rate": 1.8827338358952176e-05, "loss": 0.1129, "step": 1231 }, { "epoch": 72.47058823529412, "grad_norm": 2.199814796447754, "learning_rate": 1.882509940949827e-05, "loss": 0.0427, "step": 1232 }, { "epoch": 72.52941176470588, "grad_norm": 2.111070394515991, "learning_rate": 1.882285845807213e-05, "loss": 0.0585, "step": 1233 }, { "epoch": 72.58823529411765, "grad_norm": 3.057264804840088, "learning_rate": 1.8820615505182117e-05, "loss": 0.1065, "step": 1234 }, { "epoch": 72.6470588235294, "grad_norm": 2.3345377445220947, "learning_rate": 1.8818370551337046e-05, "loss": 0.1018, "step": 1235 }, { "epoch": 72.70588235294117, "grad_norm": 1.7865279912948608, "learning_rate": 1.8816123597046183e-05, "loss": 0.0524, "step": 1236 }, { "epoch": 72.76470588235294, "grad_norm": 2.425959587097168, "learning_rate": 1.8813874642819248e-05, "loss": 0.0632, "step": 1237 }, { "epoch": 72.82352941176471, "grad_norm": 2.920097827911377, "learning_rate": 1.881162368916642e-05, "loss": 0.1329, "step": 1238 }, { "epoch": 72.88235294117646, "grad_norm": 1.8901885747909546, "learning_rate": 1.8809370736598324e-05, "loss": 0.0968, "step": 1239 }, { "epoch": 72.94117647058823, "grad_norm": 2.315859794616699, "learning_rate": 1.8807115785626043e-05, "loss": 0.1148, "step": 1240 }, { "epoch": 73.0, "grad_norm": 1.9736469984054565, "learning_rate": 1.880485883676111e-05, "loss": 0.0548, "step": 1241 }, { "epoch": 73.05882352941177, "grad_norm": 3.0065066814422607, "learning_rate": 1.8802599890515515e-05, "loss": 0.0959, "step": 1242 }, { "epoch": 73.11764705882354, "grad_norm": 2.405287504196167, "learning_rate": 1.88003389474017e-05, "loss": 0.0942, "step": 1243 }, { "epoch": 73.17647058823529, "grad_norm": 2.7479071617126465, "learning_rate": 1.879807600793256e-05, "loss": 0.1043, "step": 1244 }, { "epoch": 73.23529411764706, "grad_norm": 1.908052921295166, "learning_rate": 1.8795811072621444e-05, "loss": 0.0605, "step": 1245 }, { "epoch": 73.29411764705883, "grad_norm": 4.1395697593688965, "learning_rate": 1.879354414198215e-05, "loss": 0.1263, "step": 1246 }, { "epoch": 73.3529411764706, "grad_norm": 1.961534857749939, "learning_rate": 1.879127521652893e-05, "loss": 0.0582, "step": 1247 }, { "epoch": 73.41176470588235, "grad_norm": 2.5118143558502197, "learning_rate": 1.878900429677649e-05, "loss": 0.1589, "step": 1248 }, { "epoch": 73.47058823529412, "grad_norm": 2.2753524780273438, "learning_rate": 1.8786731383239985e-05, "loss": 0.0746, "step": 1249 }, { "epoch": 73.52941176470588, "grad_norm": 1.8724608421325684, "learning_rate": 1.878445647643503e-05, "loss": 0.1015, "step": 1250 }, { "epoch": 73.58823529411765, "grad_norm": 2.165526866912842, "learning_rate": 1.8782179576877684e-05, "loss": 0.0636, "step": 1251 }, { "epoch": 73.6470588235294, "grad_norm": 2.040289878845215, "learning_rate": 1.8779900685084462e-05, "loss": 0.1067, "step": 1252 }, { "epoch": 73.70588235294117, "grad_norm": 1.9233617782592773, "learning_rate": 1.8777619801572333e-05, "loss": 0.0515, "step": 1253 }, { "epoch": 73.76470588235294, "grad_norm": 2.2815985679626465, "learning_rate": 1.8775336926858713e-05, "loss": 0.0686, "step": 1254 }, { "epoch": 73.82352941176471, "grad_norm": 2.7601430416107178, "learning_rate": 1.8773052061461466e-05, "loss": 0.1397, "step": 1255 }, { "epoch": 73.88235294117646, "grad_norm": 1.877687931060791, "learning_rate": 1.8770765205898925e-05, "loss": 0.0675, "step": 1256 }, { "epoch": 73.94117647058823, "grad_norm": 1.8621126413345337, "learning_rate": 1.8768476360689854e-05, "loss": 0.0544, "step": 1257 }, { "epoch": 74.0, "grad_norm": 1.7026634216308594, "learning_rate": 1.876618552635348e-05, "loss": 0.0829, "step": 1258 }, { "epoch": 74.05882352941177, "grad_norm": 3.5468127727508545, "learning_rate": 1.8763892703409478e-05, "loss": 0.0648, "step": 1259 }, { "epoch": 74.11764705882354, "grad_norm": 1.6780835390090942, "learning_rate": 1.876159789237797e-05, "loss": 0.0467, "step": 1260 }, { "epoch": 74.17647058823529, "grad_norm": 2.1991162300109863, "learning_rate": 1.8759301093779542e-05, "loss": 0.06, "step": 1261 }, { "epoch": 74.23529411764706, "grad_norm": 2.2178497314453125, "learning_rate": 1.875700230813522e-05, "loss": 0.09, "step": 1262 }, { "epoch": 74.29411764705883, "grad_norm": 1.933241367340088, "learning_rate": 1.8754701535966475e-05, "loss": 0.0449, "step": 1263 }, { "epoch": 74.3529411764706, "grad_norm": 2.583937883377075, "learning_rate": 1.875239877779525e-05, "loss": 0.0727, "step": 1264 }, { "epoch": 74.41176470588235, "grad_norm": 2.7915289402008057, "learning_rate": 1.8750094034143914e-05, "loss": 0.0931, "step": 1265 }, { "epoch": 74.47058823529412, "grad_norm": 2.6280815601348877, "learning_rate": 1.8747787305535302e-05, "loss": 0.1558, "step": 1266 }, { "epoch": 74.52941176470588, "grad_norm": 2.439059019088745, "learning_rate": 1.8745478592492698e-05, "loss": 0.0811, "step": 1267 }, { "epoch": 74.58823529411765, "grad_norm": 2.152700901031494, "learning_rate": 1.8743167895539827e-05, "loss": 0.0606, "step": 1268 }, { "epoch": 74.6470588235294, "grad_norm": 1.8157734870910645, "learning_rate": 1.8740855215200876e-05, "loss": 0.0479, "step": 1269 }, { "epoch": 74.70588235294117, "grad_norm": 2.5607516765594482, "learning_rate": 1.8738540552000472e-05, "loss": 0.121, "step": 1270 }, { "epoch": 74.76470588235294, "grad_norm": 2.3475232124328613, "learning_rate": 1.8736223906463698e-05, "loss": 0.1112, "step": 1271 }, { "epoch": 74.82352941176471, "grad_norm": 2.5522828102111816, "learning_rate": 1.8733905279116083e-05, "loss": 0.235, "step": 1272 }, { "epoch": 74.88235294117646, "grad_norm": 1.8398431539535522, "learning_rate": 1.873158467048361e-05, "loss": 0.0707, "step": 1273 }, { "epoch": 74.94117647058823, "grad_norm": 1.4202181100845337, "learning_rate": 1.8729262081092705e-05, "loss": 0.0363, "step": 1274 }, { "epoch": 75.0, "grad_norm": 1.9617218971252441, "learning_rate": 1.8726937511470247e-05, "loss": 0.0637, "step": 1275 }, { "epoch": 75.05882352941177, "grad_norm": 2.324826240539551, "learning_rate": 1.872461096214357e-05, "loss": 0.0828, "step": 1276 }, { "epoch": 75.11764705882354, "grad_norm": 2.5338072776794434, "learning_rate": 1.8722282433640445e-05, "loss": 0.1781, "step": 1277 }, { "epoch": 75.17647058823529, "grad_norm": 1.595013976097107, "learning_rate": 1.8719951926489103e-05, "loss": 0.0741, "step": 1278 }, { "epoch": 75.23529411764706, "grad_norm": 2.119633913040161, "learning_rate": 1.8717619441218216e-05, "loss": 0.064, "step": 1279 }, { "epoch": 75.29411764705883, "grad_norm": 2.846858024597168, "learning_rate": 1.8715284978356908e-05, "loss": 0.082, "step": 1280 }, { "epoch": 75.3529411764706, "grad_norm": 1.8661333322525024, "learning_rate": 1.871294853843475e-05, "loss": 0.058, "step": 1281 }, { "epoch": 75.41176470588235, "grad_norm": 2.3111865520477295, "learning_rate": 1.8710610121981768e-05, "loss": 0.1445, "step": 1282 }, { "epoch": 75.47058823529412, "grad_norm": 1.7884002923965454, "learning_rate": 1.870826972952843e-05, "loss": 0.0533, "step": 1283 }, { "epoch": 75.52941176470588, "grad_norm": 2.0154049396514893, "learning_rate": 1.870592736160565e-05, "loss": 0.0406, "step": 1284 }, { "epoch": 75.58823529411765, "grad_norm": 2.048150062561035, "learning_rate": 1.8703583018744794e-05, "loss": 0.0538, "step": 1285 }, { "epoch": 75.6470588235294, "grad_norm": 1.6988866329193115, "learning_rate": 1.870123670147768e-05, "loss": 0.0996, "step": 1286 }, { "epoch": 75.70588235294117, "grad_norm": 2.390122413635254, "learning_rate": 1.8698888410336565e-05, "loss": 0.0725, "step": 1287 }, { "epoch": 75.76470588235294, "grad_norm": 1.5217033624649048, "learning_rate": 1.869653814585416e-05, "loss": 0.0784, "step": 1288 }, { "epoch": 75.82352941176471, "grad_norm": 1.9148656129837036, "learning_rate": 1.8694185908563627e-05, "loss": 0.042, "step": 1289 }, { "epoch": 75.88235294117646, "grad_norm": 2.597513437271118, "learning_rate": 1.8691831698998565e-05, "loss": 0.072, "step": 1290 }, { "epoch": 75.94117647058823, "grad_norm": 2.232128143310547, "learning_rate": 1.868947551769303e-05, "loss": 0.0778, "step": 1291 }, { "epoch": 76.0, "grad_norm": 2.098985195159912, "learning_rate": 1.8687117365181514e-05, "loss": 0.0848, "step": 1292 }, { "epoch": 76.05882352941177, "grad_norm": 2.5954666137695312, "learning_rate": 1.8684757241998967e-05, "loss": 0.0942, "step": 1293 }, { "epoch": 76.11764705882354, "grad_norm": 2.5923993587493896, "learning_rate": 1.8682395148680786e-05, "loss": 0.1205, "step": 1294 }, { "epoch": 76.17647058823529, "grad_norm": 3.8656423091888428, "learning_rate": 1.868003108576281e-05, "loss": 0.0527, "step": 1295 }, { "epoch": 76.23529411764706, "grad_norm": 1.8851734399795532, "learning_rate": 1.8677665053781324e-05, "loss": 0.0719, "step": 1296 }, { "epoch": 76.29411764705883, "grad_norm": 2.569098711013794, "learning_rate": 1.8675297053273066e-05, "loss": 0.1446, "step": 1297 }, { "epoch": 76.3529411764706, "grad_norm": 2.1960647106170654, "learning_rate": 1.867292708477521e-05, "loss": 0.089, "step": 1298 }, { "epoch": 76.41176470588235, "grad_norm": 2.3084001541137695, "learning_rate": 1.867055514882539e-05, "loss": 0.0992, "step": 1299 }, { "epoch": 76.47058823529412, "grad_norm": 2.6402971744537354, "learning_rate": 1.8668181245961678e-05, "loss": 0.0877, "step": 1300 }, { "epoch": 76.52941176470588, "grad_norm": 2.0500383377075195, "learning_rate": 1.8665805376722586e-05, "loss": 0.074, "step": 1301 }, { "epoch": 76.58823529411765, "grad_norm": 4.44792366027832, "learning_rate": 1.8663427541647085e-05, "loss": 0.0555, "step": 1302 }, { "epoch": 76.6470588235294, "grad_norm": 1.6254891157150269, "learning_rate": 1.866104774127459e-05, "loss": 0.052, "step": 1303 }, { "epoch": 76.70588235294117, "grad_norm": 1.6194461584091187, "learning_rate": 1.8658665976144955e-05, "loss": 0.072, "step": 1304 }, { "epoch": 76.76470588235294, "grad_norm": 2.1727423667907715, "learning_rate": 1.865628224679848e-05, "loss": 0.0739, "step": 1305 }, { "epoch": 76.82352941176471, "grad_norm": 1.9356844425201416, "learning_rate": 1.8653896553775918e-05, "loss": 0.0507, "step": 1306 }, { "epoch": 76.88235294117646, "grad_norm": 1.574021577835083, "learning_rate": 1.865150889761846e-05, "loss": 0.038, "step": 1307 }, { "epoch": 76.94117647058823, "grad_norm": 2.3383522033691406, "learning_rate": 1.864911927886775e-05, "loss": 0.1033, "step": 1308 }, { "epoch": 77.0, "grad_norm": 2.688098430633545, "learning_rate": 1.8646727698065865e-05, "loss": 0.1313, "step": 1309 }, { "epoch": 77.05882352941177, "grad_norm": 4.291703701019287, "learning_rate": 1.8644334155755342e-05, "loss": 0.0762, "step": 1310 }, { "epoch": 77.11764705882354, "grad_norm": 2.1041669845581055, "learning_rate": 1.864193865247915e-05, "loss": 0.0971, "step": 1311 }, { "epoch": 77.17647058823529, "grad_norm": 2.598104476928711, "learning_rate": 1.8639541188780712e-05, "loss": 0.1246, "step": 1312 }, { "epoch": 77.23529411764706, "grad_norm": 1.8122011423110962, "learning_rate": 1.8637141765203893e-05, "loss": 0.0497, "step": 1313 }, { "epoch": 77.29411764705883, "grad_norm": 1.5489773750305176, "learning_rate": 1.8634740382293e-05, "loss": 0.0747, "step": 1314 }, { "epoch": 77.3529411764706, "grad_norm": 1.7298179864883423, "learning_rate": 1.8632337040592782e-05, "loss": 0.0416, "step": 1315 }, { "epoch": 77.41176470588235, "grad_norm": 2.410013198852539, "learning_rate": 1.8629931740648446e-05, "loss": 0.05, "step": 1316 }, { "epoch": 77.47058823529412, "grad_norm": 2.66249418258667, "learning_rate": 1.8627524483005627e-05, "loss": 0.0951, "step": 1317 }, { "epoch": 77.52941176470588, "grad_norm": 1.7345154285430908, "learning_rate": 1.8625115268210415e-05, "loss": 0.0882, "step": 1318 }, { "epoch": 77.58823529411765, "grad_norm": 2.282170057296753, "learning_rate": 1.8622704096809335e-05, "loss": 0.0995, "step": 1319 }, { "epoch": 77.6470588235294, "grad_norm": 2.6160027980804443, "learning_rate": 1.8620290969349365e-05, "loss": 0.0514, "step": 1320 }, { "epoch": 77.70588235294117, "grad_norm": 2.596527338027954, "learning_rate": 1.861787588637792e-05, "loss": 0.0982, "step": 1321 }, { "epoch": 77.76470588235294, "grad_norm": 2.9203901290893555, "learning_rate": 1.8615458848442864e-05, "loss": 0.1109, "step": 1322 }, { "epoch": 77.82352941176471, "grad_norm": 2.132915496826172, "learning_rate": 1.86130398560925e-05, "loss": 0.0641, "step": 1323 }, { "epoch": 77.88235294117646, "grad_norm": 2.524930715560913, "learning_rate": 1.8610618909875575e-05, "loss": 0.0978, "step": 1324 }, { "epoch": 77.94117647058823, "grad_norm": 1.7449380159378052, "learning_rate": 1.860819601034128e-05, "loss": 0.0561, "step": 1325 }, { "epoch": 78.0, "grad_norm": 3.1940698623657227, "learning_rate": 1.8605771158039253e-05, "loss": 0.0913, "step": 1326 }, { "epoch": 78.05882352941177, "grad_norm": 1.8370976448059082, "learning_rate": 1.8603344353519567e-05, "loss": 0.0883, "step": 1327 }, { "epoch": 78.11764705882354, "grad_norm": 2.3358848094940186, "learning_rate": 1.8600915597332748e-05, "loss": 0.0523, "step": 1328 }, { "epoch": 78.17647058823529, "grad_norm": 2.345669984817505, "learning_rate": 1.859848489002975e-05, "loss": 0.096, "step": 1329 }, { "epoch": 78.23529411764706, "grad_norm": 2.235283613204956, "learning_rate": 1.8596052232161985e-05, "loss": 0.0494, "step": 1330 }, { "epoch": 78.29411764705883, "grad_norm": 2.4934146404266357, "learning_rate": 1.8593617624281298e-05, "loss": 0.0577, "step": 1331 }, { "epoch": 78.3529411764706, "grad_norm": 2.265598773956299, "learning_rate": 1.8591181066939984e-05, "loss": 0.1046, "step": 1332 }, { "epoch": 78.41176470588235, "grad_norm": 2.6064133644104004, "learning_rate": 1.8588742560690768e-05, "loss": 0.0806, "step": 1333 }, { "epoch": 78.47058823529412, "grad_norm": 1.8293113708496094, "learning_rate": 1.8586302106086832e-05, "loss": 0.0868, "step": 1334 }, { "epoch": 78.52941176470588, "grad_norm": 2.3590567111968994, "learning_rate": 1.858385970368179e-05, "loss": 0.1934, "step": 1335 }, { "epoch": 78.58823529411765, "grad_norm": 1.7181494235992432, "learning_rate": 1.85814153540297e-05, "loss": 0.0632, "step": 1336 }, { "epoch": 78.6470588235294, "grad_norm": 1.8208975791931152, "learning_rate": 1.8578969057685063e-05, "loss": 0.0579, "step": 1337 }, { "epoch": 78.70588235294117, "grad_norm": 1.7769207954406738, "learning_rate": 1.857652081520282e-05, "loss": 0.0604, "step": 1338 }, { "epoch": 78.76470588235294, "grad_norm": 1.6641309261322021, "learning_rate": 1.8574070627138353e-05, "loss": 0.0477, "step": 1339 }, { "epoch": 78.82352941176471, "grad_norm": 2.7438883781433105, "learning_rate": 1.8571618494047494e-05, "loss": 0.0696, "step": 1340 }, { "epoch": 78.88235294117646, "grad_norm": 1.949159026145935, "learning_rate": 1.85691644164865e-05, "loss": 0.0699, "step": 1341 }, { "epoch": 78.94117647058823, "grad_norm": 2.1296331882476807, "learning_rate": 1.8566708395012082e-05, "loss": 0.0557, "step": 1342 }, { "epoch": 79.0, "grad_norm": 1.7414978742599487, "learning_rate": 1.8564250430181387e-05, "loss": 0.0552, "step": 1343 }, { "epoch": 79.05882352941177, "grad_norm": 2.0853874683380127, "learning_rate": 1.8561790522552005e-05, "loss": 0.1304, "step": 1344 }, { "epoch": 79.11764705882354, "grad_norm": 1.88166081905365, "learning_rate": 1.8559328672681966e-05, "loss": 0.0617, "step": 1345 }, { "epoch": 79.17647058823529, "grad_norm": 2.1048200130462646, "learning_rate": 1.8556864881129737e-05, "loss": 0.0804, "step": 1346 }, { "epoch": 79.23529411764706, "grad_norm": 2.1066036224365234, "learning_rate": 1.855439914845423e-05, "loss": 0.1025, "step": 1347 }, { "epoch": 79.29411764705883, "grad_norm": 2.0704445838928223, "learning_rate": 1.85519314752148e-05, "loss": 0.1122, "step": 1348 }, { "epoch": 79.3529411764706, "grad_norm": 1.5528854131698608, "learning_rate": 1.8549461861971236e-05, "loss": 0.0705, "step": 1349 }, { "epoch": 79.41176470588235, "grad_norm": 1.7515337467193604, "learning_rate": 1.8546990309283767e-05, "loss": 0.034, "step": 1350 }, { "epoch": 79.47058823529412, "grad_norm": 2.675987958908081, "learning_rate": 1.8544516817713067e-05, "loss": 0.1141, "step": 1351 }, { "epoch": 79.52941176470588, "grad_norm": 2.6636719703674316, "learning_rate": 1.8542041387820244e-05, "loss": 0.0629, "step": 1352 }, { "epoch": 79.58823529411765, "grad_norm": 1.821618676185608, "learning_rate": 1.853956402016685e-05, "loss": 0.0378, "step": 1353 }, { "epoch": 79.6470588235294, "grad_norm": 1.8645890951156616, "learning_rate": 1.853708471531488e-05, "loss": 0.0411, "step": 1354 }, { "epoch": 79.70588235294117, "grad_norm": 2.829866409301758, "learning_rate": 1.8534603473826756e-05, "loss": 0.0979, "step": 1355 }, { "epoch": 79.76470588235294, "grad_norm": 2.0046744346618652, "learning_rate": 1.853212029626536e-05, "loss": 0.0348, "step": 1356 }, { "epoch": 79.82352941176471, "grad_norm": 2.395674228668213, "learning_rate": 1.852963518319398e-05, "loss": 0.0881, "step": 1357 }, { "epoch": 79.88235294117646, "grad_norm": 2.064730644226074, "learning_rate": 1.8527148135176384e-05, "loss": 0.0744, "step": 1358 }, { "epoch": 79.94117647058823, "grad_norm": 1.9510775804519653, "learning_rate": 1.8524659152776748e-05, "loss": 0.0514, "step": 1359 }, { "epoch": 80.0, "grad_norm": 2.844282388687134, "learning_rate": 1.8522168236559693e-05, "loss": 0.0948, "step": 1360 }, { "epoch": 80.05882352941177, "grad_norm": 1.8370670080184937, "learning_rate": 1.8519675387090296e-05, "loss": 0.0679, "step": 1361 }, { "epoch": 80.11764705882354, "grad_norm": 1.5811898708343506, "learning_rate": 1.8517180604934046e-05, "loss": 0.0368, "step": 1362 }, { "epoch": 80.17647058823529, "grad_norm": 1.8014968633651733, "learning_rate": 1.8514683890656893e-05, "loss": 0.0705, "step": 1363 }, { "epoch": 80.23529411764706, "grad_norm": 1.8991668224334717, "learning_rate": 1.8512185244825215e-05, "loss": 0.0542, "step": 1364 }, { "epoch": 80.29411764705883, "grad_norm": 2.657694101333618, "learning_rate": 1.850968466800582e-05, "loss": 0.0586, "step": 1365 }, { "epoch": 80.3529411764706, "grad_norm": 2.4490809440612793, "learning_rate": 1.850718216076598e-05, "loss": 0.0842, "step": 1366 }, { "epoch": 80.41176470588235, "grad_norm": 1.9681060314178467, "learning_rate": 1.8504677723673376e-05, "loss": 0.0835, "step": 1367 }, { "epoch": 80.47058823529412, "grad_norm": 2.8201794624328613, "learning_rate": 1.8502171357296144e-05, "loss": 0.0454, "step": 1368 }, { "epoch": 80.52941176470588, "grad_norm": 1.8317617177963257, "learning_rate": 1.8499663062202848e-05, "loss": 0.0758, "step": 1369 }, { "epoch": 80.58823529411765, "grad_norm": 1.6954500675201416, "learning_rate": 1.84971528389625e-05, "loss": 0.0386, "step": 1370 }, { "epoch": 80.6470588235294, "grad_norm": 1.7593839168548584, "learning_rate": 1.8494640688144542e-05, "loss": 0.0697, "step": 1371 }, { "epoch": 80.70588235294117, "grad_norm": 2.0556764602661133, "learning_rate": 1.849212661031885e-05, "loss": 0.0712, "step": 1372 }, { "epoch": 80.76470588235294, "grad_norm": 2.3007261753082275, "learning_rate": 1.848961060605575e-05, "loss": 0.1024, "step": 1373 }, { "epoch": 80.82352941176471, "grad_norm": 1.620357871055603, "learning_rate": 1.848709267592599e-05, "loss": 0.1, "step": 1374 }, { "epoch": 80.88235294117646, "grad_norm": 2.0030484199523926, "learning_rate": 1.8484572820500768e-05, "loss": 0.0847, "step": 1375 }, { "epoch": 80.94117647058823, "grad_norm": 1.8600631952285767, "learning_rate": 1.848205104035171e-05, "loss": 0.06, "step": 1376 }, { "epoch": 81.0, "grad_norm": 2.4476921558380127, "learning_rate": 1.847952733605088e-05, "loss": 0.0847, "step": 1377 }, { "epoch": 81.05882352941177, "grad_norm": 2.1897709369659424, "learning_rate": 1.847700170817078e-05, "loss": 0.0755, "step": 1378 }, { "epoch": 81.11764705882354, "grad_norm": 2.0918960571289062, "learning_rate": 1.8474474157284355e-05, "loss": 0.0714, "step": 1379 }, { "epoch": 81.17647058823529, "grad_norm": 2.448802947998047, "learning_rate": 1.8471944683964972e-05, "loss": 0.0448, "step": 1380 }, { "epoch": 81.23529411764706, "grad_norm": 2.263730049133301, "learning_rate": 1.8469413288786443e-05, "loss": 0.0674, "step": 1381 }, { "epoch": 81.29411764705883, "grad_norm": 2.124058485031128, "learning_rate": 1.8466879972323015e-05, "loss": 0.0431, "step": 1382 }, { "epoch": 81.3529411764706, "grad_norm": 2.4925713539123535, "learning_rate": 1.8464344735149367e-05, "loss": 0.1063, "step": 1383 }, { "epoch": 81.41176470588235, "grad_norm": 1.664058804512024, "learning_rate": 1.8461807577840628e-05, "loss": 0.0584, "step": 1384 }, { "epoch": 81.47058823529412, "grad_norm": 1.7661957740783691, "learning_rate": 1.845926850097234e-05, "loss": 0.0423, "step": 1385 }, { "epoch": 81.52941176470588, "grad_norm": 2.4492568969726562, "learning_rate": 1.8456727505120495e-05, "loss": 0.0489, "step": 1386 }, { "epoch": 81.58823529411765, "grad_norm": 2.3687751293182373, "learning_rate": 1.8454184590861524e-05, "loss": 0.1583, "step": 1387 }, { "epoch": 81.6470588235294, "grad_norm": 2.3510751724243164, "learning_rate": 1.8451639758772275e-05, "loss": 0.0506, "step": 1388 }, { "epoch": 81.70588235294117, "grad_norm": 1.669118881225586, "learning_rate": 1.8449093009430055e-05, "loss": 0.0467, "step": 1389 }, { "epoch": 81.76470588235294, "grad_norm": 1.6625759601593018, "learning_rate": 1.8446544343412585e-05, "loss": 0.0526, "step": 1390 }, { "epoch": 81.82352941176471, "grad_norm": 1.9186087846755981, "learning_rate": 1.8443993761298033e-05, "loss": 0.1073, "step": 1391 }, { "epoch": 81.88235294117646, "grad_norm": 2.2475595474243164, "learning_rate": 1.8441441263665e-05, "loss": 0.0664, "step": 1392 }, { "epoch": 81.94117647058823, "grad_norm": 3.079580783843994, "learning_rate": 1.8438886851092514e-05, "loss": 0.0785, "step": 1393 }, { "epoch": 82.0, "grad_norm": 3.1631407737731934, "learning_rate": 1.8436330524160048e-05, "loss": 0.0928, "step": 1394 }, { "epoch": 82.05882352941177, "grad_norm": 2.7215559482574463, "learning_rate": 1.8433772283447504e-05, "loss": 0.0559, "step": 1395 }, { "epoch": 82.11764705882354, "grad_norm": 2.639380693435669, "learning_rate": 1.843121212953521e-05, "loss": 0.0935, "step": 1396 }, { "epoch": 82.17647058823529, "grad_norm": 1.9521805047988892, "learning_rate": 1.842865006300395e-05, "loss": 0.0288, "step": 1397 }, { "epoch": 82.23529411764706, "grad_norm": 2.293628215789795, "learning_rate": 1.842608608443492e-05, "loss": 0.0384, "step": 1398 }, { "epoch": 82.29411764705883, "grad_norm": 1.937187671661377, "learning_rate": 1.8423520194409757e-05, "loss": 0.0302, "step": 1399 }, { "epoch": 82.3529411764706, "grad_norm": 2.2090001106262207, "learning_rate": 1.842095239351054e-05, "loss": 0.1235, "step": 1400 }, { "epoch": 82.41176470588235, "grad_norm": 2.018460750579834, "learning_rate": 1.8418382682319768e-05, "loss": 0.0466, "step": 1401 }, { "epoch": 82.47058823529412, "grad_norm": 2.324237108230591, "learning_rate": 1.8415811061420383e-05, "loss": 0.08, "step": 1402 }, { "epoch": 82.52941176470588, "grad_norm": 2.8905186653137207, "learning_rate": 1.8413237531395748e-05, "loss": 0.0565, "step": 1403 }, { "epoch": 82.58823529411765, "grad_norm": 2.4539058208465576, "learning_rate": 1.841066209282968e-05, "loss": 0.0462, "step": 1404 }, { "epoch": 82.6470588235294, "grad_norm": 3.437904119491577, "learning_rate": 1.8408084746306412e-05, "loss": 0.103, "step": 1405 }, { "epoch": 82.70588235294117, "grad_norm": 1.819402813911438, "learning_rate": 1.8405505492410616e-05, "loss": 0.0856, "step": 1406 }, { "epoch": 82.76470588235294, "grad_norm": 1.3310905694961548, "learning_rate": 1.840292433172739e-05, "loss": 0.042, "step": 1407 }, { "epoch": 82.82352941176471, "grad_norm": 2.6431028842926025, "learning_rate": 1.8400341264842276e-05, "loss": 0.1484, "step": 1408 }, { "epoch": 82.88235294117646, "grad_norm": 1.878577709197998, "learning_rate": 1.8397756292341236e-05, "loss": 0.0428, "step": 1409 }, { "epoch": 82.94117647058823, "grad_norm": 1.7390649318695068, "learning_rate": 1.8395169414810677e-05, "loss": 0.0674, "step": 1410 }, { "epoch": 83.0, "grad_norm": 1.9927383661270142, "learning_rate": 1.8392580632837423e-05, "loss": 0.085, "step": 1411 }, { "epoch": 83.05882352941177, "grad_norm": 2.2263224124908447, "learning_rate": 1.8389989947008753e-05, "loss": 0.0947, "step": 1412 }, { "epoch": 83.11764705882354, "grad_norm": 1.8158313035964966, "learning_rate": 1.838739735791235e-05, "loss": 0.0741, "step": 1413 }, { "epoch": 83.17647058823529, "grad_norm": 1.9380757808685303, "learning_rate": 1.838480286613635e-05, "loss": 0.096, "step": 1414 }, { "epoch": 83.23529411764706, "grad_norm": 2.1911323070526123, "learning_rate": 1.8382206472269306e-05, "loss": 0.0528, "step": 1415 }, { "epoch": 83.29411764705883, "grad_norm": 2.7794787883758545, "learning_rate": 1.837960817690022e-05, "loss": 0.0829, "step": 1416 }, { "epoch": 83.3529411764706, "grad_norm": 2.310318946838379, "learning_rate": 1.8377007980618503e-05, "loss": 0.1014, "step": 1417 }, { "epoch": 83.41176470588235, "grad_norm": 1.4093114137649536, "learning_rate": 1.8374405884014018e-05, "loss": 0.0682, "step": 1418 }, { "epoch": 83.47058823529412, "grad_norm": 3.120938539505005, "learning_rate": 1.837180188767705e-05, "loss": 0.0421, "step": 1419 }, { "epoch": 83.52941176470588, "grad_norm": 2.162107467651367, "learning_rate": 1.836919599219831e-05, "loss": 0.0672, "step": 1420 }, { "epoch": 83.58823529411765, "grad_norm": 2.34367299079895, "learning_rate": 1.8366588198168943e-05, "loss": 0.0831, "step": 1421 }, { "epoch": 83.6470588235294, "grad_norm": 2.6268856525421143, "learning_rate": 1.836397850618054e-05, "loss": 0.0587, "step": 1422 }, { "epoch": 83.70588235294117, "grad_norm": 1.7602202892303467, "learning_rate": 1.836136691682509e-05, "loss": 0.038, "step": 1423 }, { "epoch": 83.76470588235294, "grad_norm": 2.5072360038757324, "learning_rate": 1.8358753430695047e-05, "loss": 0.0613, "step": 1424 }, { "epoch": 83.82352941176471, "grad_norm": 1.9519636631011963, "learning_rate": 1.8356138048383273e-05, "loss": 0.0471, "step": 1425 }, { "epoch": 83.88235294117646, "grad_norm": 1.8506600856781006, "learning_rate": 1.8353520770483067e-05, "loss": 0.0522, "step": 1426 }, { "epoch": 83.94117647058823, "grad_norm": 2.559447765350342, "learning_rate": 1.835090159758816e-05, "loss": 0.0849, "step": 1427 }, { "epoch": 84.0, "grad_norm": 4.218313694000244, "learning_rate": 1.8348280530292712e-05, "loss": 0.0901, "step": 1428 }, { "epoch": 84.05882352941177, "grad_norm": 1.6398316621780396, "learning_rate": 1.834565756919131e-05, "loss": 0.061, "step": 1429 }, { "epoch": 84.11764705882354, "grad_norm": 1.8575254678726196, "learning_rate": 1.834303271487897e-05, "loss": 0.0632, "step": 1430 }, { "epoch": 84.17647058823529, "grad_norm": 2.442760705947876, "learning_rate": 1.8340405967951144e-05, "loss": 0.0442, "step": 1431 }, { "epoch": 84.23529411764706, "grad_norm": 2.4268198013305664, "learning_rate": 1.8337777329003707e-05, "loss": 0.0702, "step": 1432 }, { "epoch": 84.29411764705883, "grad_norm": 3.69362211227417, "learning_rate": 1.8335146798632965e-05, "loss": 0.1297, "step": 1433 }, { "epoch": 84.3529411764706, "grad_norm": 1.6984264850616455, "learning_rate": 1.833251437743565e-05, "loss": 0.0527, "step": 1434 }, { "epoch": 84.41176470588235, "grad_norm": 2.575195789337158, "learning_rate": 1.8329880066008936e-05, "loss": 0.049, "step": 1435 }, { "epoch": 84.47058823529412, "grad_norm": 2.071697473526001, "learning_rate": 1.8327243864950405e-05, "loss": 0.057, "step": 1436 }, { "epoch": 84.52941176470588, "grad_norm": 2.05735445022583, "learning_rate": 1.8324605774858087e-05, "loss": 0.0901, "step": 1437 }, { "epoch": 84.58823529411765, "grad_norm": 2.52120304107666, "learning_rate": 1.832196579633043e-05, "loss": 0.0778, "step": 1438 }, { "epoch": 84.6470588235294, "grad_norm": 1.5532045364379883, "learning_rate": 1.8319323929966304e-05, "loss": 0.03, "step": 1439 }, { "epoch": 84.70588235294117, "grad_norm": 1.5201053619384766, "learning_rate": 1.8316680176365032e-05, "loss": 0.0716, "step": 1440 }, { "epoch": 84.76470588235294, "grad_norm": 2.86199688911438, "learning_rate": 1.8314034536126337e-05, "loss": 0.0404, "step": 1441 }, { "epoch": 84.82352941176471, "grad_norm": 2.7172863483428955, "learning_rate": 1.8311387009850387e-05, "loss": 0.0553, "step": 1442 }, { "epoch": 84.88235294117646, "grad_norm": 2.3381762504577637, "learning_rate": 1.8308737598137768e-05, "loss": 0.054, "step": 1443 }, { "epoch": 84.94117647058823, "grad_norm": 2.738109588623047, "learning_rate": 1.8306086301589505e-05, "loss": 0.1034, "step": 1444 }, { "epoch": 85.0, "grad_norm": 2.5925180912017822, "learning_rate": 1.8303433120807043e-05, "loss": 0.1063, "step": 1445 }, { "epoch": 85.05882352941177, "grad_norm": 2.085922956466675, "learning_rate": 1.830077805639225e-05, "loss": 0.0962, "step": 1446 }, { "epoch": 85.11764705882354, "grad_norm": 1.4055918455123901, "learning_rate": 1.8298121108947432e-05, "loss": 0.0607, "step": 1447 }, { "epoch": 85.17647058823529, "grad_norm": 2.170665740966797, "learning_rate": 1.8295462279075317e-05, "loss": 0.0703, "step": 1448 }, { "epoch": 85.23529411764706, "grad_norm": 2.069058418273926, "learning_rate": 1.8292801567379058e-05, "loss": 0.037, "step": 1449 }, { "epoch": 85.29411764705883, "grad_norm": 1.8105394840240479, "learning_rate": 1.8290138974462237e-05, "loss": 0.0184, "step": 1450 }, { "epoch": 85.3529411764706, "grad_norm": 2.1406497955322266, "learning_rate": 1.8287474500928864e-05, "loss": 0.1002, "step": 1451 }, { "epoch": 85.41176470588235, "grad_norm": 2.3991377353668213, "learning_rate": 1.8284808147383374e-05, "loss": 0.0311, "step": 1452 }, { "epoch": 85.47058823529412, "grad_norm": 1.6441478729248047, "learning_rate": 1.828213991443063e-05, "loss": 0.0351, "step": 1453 }, { "epoch": 85.52941176470588, "grad_norm": 2.7212419509887695, "learning_rate": 1.8279469802675914e-05, "loss": 0.0742, "step": 1454 }, { "epoch": 85.58823529411765, "grad_norm": 2.883471965789795, "learning_rate": 1.827679781272495e-05, "loss": 0.0535, "step": 1455 }, { "epoch": 85.6470588235294, "grad_norm": 1.7521142959594727, "learning_rate": 1.8274123945183874e-05, "loss": 0.0455, "step": 1456 }, { "epoch": 85.70588235294117, "grad_norm": 1.8489210605621338, "learning_rate": 1.8271448200659253e-05, "loss": 0.0393, "step": 1457 }, { "epoch": 85.76470588235294, "grad_norm": 2.2152788639068604, "learning_rate": 1.8268770579758075e-05, "loss": 0.0848, "step": 1458 }, { "epoch": 85.82352941176471, "grad_norm": 2.505643367767334, "learning_rate": 1.8266091083087762e-05, "loss": 0.0312, "step": 1459 }, { "epoch": 85.88235294117646, "grad_norm": 1.6644675731658936, "learning_rate": 1.8263409711256158e-05, "loss": 0.0721, "step": 1460 }, { "epoch": 85.94117647058823, "grad_norm": 2.432786703109741, "learning_rate": 1.8260726464871526e-05, "loss": 0.1485, "step": 1461 }, { "epoch": 86.0, "grad_norm": 2.551567554473877, "learning_rate": 1.8258041344542567e-05, "loss": 0.0981, "step": 1462 }, { "epoch": 86.05882352941177, "grad_norm": 1.3867769241333008, "learning_rate": 1.8255354350878396e-05, "loss": 0.034, "step": 1463 }, { "epoch": 86.11764705882354, "grad_norm": 2.205317258834839, "learning_rate": 1.8252665484488558e-05, "loss": 0.0914, "step": 1464 }, { "epoch": 86.17647058823529, "grad_norm": 2.017078161239624, "learning_rate": 1.8249974745983023e-05, "loss": 0.0455, "step": 1465 }, { "epoch": 86.23529411764706, "grad_norm": 2.6334617137908936, "learning_rate": 1.8247282135972188e-05, "loss": 0.0784, "step": 1466 }, { "epoch": 86.29411764705883, "grad_norm": 1.9802526235580444, "learning_rate": 1.824458765506686e-05, "loss": 0.0471, "step": 1467 }, { "epoch": 86.3529411764706, "grad_norm": 1.8319212198257446, "learning_rate": 1.8241891303878292e-05, "loss": 0.0388, "step": 1468 }, { "epoch": 86.41176470588235, "grad_norm": 2.0008397102355957, "learning_rate": 1.8239193083018144e-05, "loss": 0.0596, "step": 1469 }, { "epoch": 86.47058823529412, "grad_norm": 2.4474596977233887, "learning_rate": 1.8236492993098513e-05, "loss": 0.0779, "step": 1470 }, { "epoch": 86.52941176470588, "grad_norm": 1.7087352275848389, "learning_rate": 1.823379103473191e-05, "loss": 0.0514, "step": 1471 }, { "epoch": 86.58823529411765, "grad_norm": 2.4991564750671387, "learning_rate": 1.8231087208531272e-05, "loss": 0.0926, "step": 1472 }, { "epoch": 86.6470588235294, "grad_norm": 1.714446783065796, "learning_rate": 1.8228381515109966e-05, "loss": 0.0358, "step": 1473 }, { "epoch": 86.70588235294117, "grad_norm": 1.618778109550476, "learning_rate": 1.8225673955081778e-05, "loss": 0.0754, "step": 1474 }, { "epoch": 86.76470588235294, "grad_norm": 2.2522244453430176, "learning_rate": 1.8222964529060915e-05, "loss": 0.0829, "step": 1475 }, { "epoch": 86.82352941176471, "grad_norm": 1.7609907388687134, "learning_rate": 1.822025323766201e-05, "loss": 0.0789, "step": 1476 }, { "epoch": 86.88235294117646, "grad_norm": 1.7951048612594604, "learning_rate": 1.8217540081500117e-05, "loss": 0.0616, "step": 1477 }, { "epoch": 86.94117647058823, "grad_norm": 2.147639036178589, "learning_rate": 1.8214825061190723e-05, "loss": 0.0661, "step": 1478 }, { "epoch": 87.0, "grad_norm": 3.657674789428711, "learning_rate": 1.8212108177349722e-05, "loss": 0.0753, "step": 1479 }, { "epoch": 87.05882352941177, "grad_norm": 2.36893367767334, "learning_rate": 1.820938943059344e-05, "loss": 0.039, "step": 1480 }, { "epoch": 87.11764705882354, "grad_norm": 1.6748565435409546, "learning_rate": 1.8206668821538623e-05, "loss": 0.0336, "step": 1481 }, { "epoch": 87.17647058823529, "grad_norm": 2.201324224472046, "learning_rate": 1.8203946350802445e-05, "loss": 0.088, "step": 1482 }, { "epoch": 87.23529411764706, "grad_norm": 1.5417512655258179, "learning_rate": 1.8201222019002494e-05, "loss": 0.0378, "step": 1483 }, { "epoch": 87.29411764705883, "grad_norm": 2.0137410163879395, "learning_rate": 1.819849582675679e-05, "loss": 0.08, "step": 1484 }, { "epoch": 87.3529411764706, "grad_norm": 1.9922975301742554, "learning_rate": 1.8195767774683766e-05, "loss": 0.0854, "step": 1485 }, { "epoch": 87.41176470588235, "grad_norm": 2.279508113861084, "learning_rate": 1.8193037863402277e-05, "loss": 0.0918, "step": 1486 }, { "epoch": 87.47058823529412, "grad_norm": 2.4385321140289307, "learning_rate": 1.8190306093531605e-05, "loss": 0.0806, "step": 1487 }, { "epoch": 87.52941176470588, "grad_norm": 1.8017222881317139, "learning_rate": 1.818757246569145e-05, "loss": 0.0862, "step": 1488 }, { "epoch": 87.58823529411765, "grad_norm": 2.1747753620147705, "learning_rate": 1.8184836980501942e-05, "loss": 0.1139, "step": 1489 }, { "epoch": 87.6470588235294, "grad_norm": 1.2721800804138184, "learning_rate": 1.8182099638583618e-05, "loss": 0.0316, "step": 1490 }, { "epoch": 87.70588235294117, "grad_norm": 1.6490246057510376, "learning_rate": 1.817936044055744e-05, "loss": 0.0339, "step": 1491 }, { "epoch": 87.76470588235294, "grad_norm": 2.296433925628662, "learning_rate": 1.8176619387044807e-05, "loss": 0.0391, "step": 1492 }, { "epoch": 87.82352941176471, "grad_norm": 3.159403085708618, "learning_rate": 1.8173876478667517e-05, "loss": 0.0656, "step": 1493 }, { "epoch": 87.88235294117646, "grad_norm": 2.721250295639038, "learning_rate": 1.8171131716047804e-05, "loss": 0.0535, "step": 1494 }, { "epoch": 87.94117647058823, "grad_norm": 3.085463285446167, "learning_rate": 1.816838509980831e-05, "loss": 0.0638, "step": 1495 }, { "epoch": 88.0, "grad_norm": 1.062067985534668, "learning_rate": 1.816563663057211e-05, "loss": 0.0229, "step": 1496 }, { "epoch": 88.05882352941177, "grad_norm": 1.750744342803955, "learning_rate": 1.816288630896269e-05, "loss": 0.0416, "step": 1497 }, { "epoch": 88.11764705882354, "grad_norm": 1.5097171068191528, "learning_rate": 1.8160134135603967e-05, "loss": 0.0362, "step": 1498 }, { "epoch": 88.17647058823529, "grad_norm": 2.843423366546631, "learning_rate": 1.8157380111120266e-05, "loss": 0.06, "step": 1499 }, { "epoch": 88.23529411764706, "grad_norm": 1.5212370157241821, "learning_rate": 1.8154624236136333e-05, "loss": 0.0365, "step": 1500 }, { "epoch": 88.29411764705883, "grad_norm": 1.584837794303894, "learning_rate": 1.8151866511277342e-05, "loss": 0.045, "step": 1501 }, { "epoch": 88.3529411764706, "grad_norm": 1.613487720489502, "learning_rate": 1.8149106937168883e-05, "loss": 0.0595, "step": 1502 }, { "epoch": 88.41176470588235, "grad_norm": 2.0874180793762207, "learning_rate": 1.8146345514436966e-05, "loss": 0.0593, "step": 1503 }, { "epoch": 88.47058823529412, "grad_norm": 3.3478899002075195, "learning_rate": 1.8143582243708016e-05, "loss": 0.0902, "step": 1504 }, { "epoch": 88.52941176470588, "grad_norm": 2.3174331188201904, "learning_rate": 1.814081712560888e-05, "loss": 0.0682, "step": 1505 }, { "epoch": 88.58823529411765, "grad_norm": 1.8179455995559692, "learning_rate": 1.813805016076683e-05, "loss": 0.0986, "step": 1506 }, { "epoch": 88.6470588235294, "grad_norm": 3.5588951110839844, "learning_rate": 1.8135281349809547e-05, "loss": 0.0782, "step": 1507 }, { "epoch": 88.70588235294117, "grad_norm": 1.8120471239089966, "learning_rate": 1.8132510693365135e-05, "loss": 0.0331, "step": 1508 }, { "epoch": 88.76470588235294, "grad_norm": 2.2063138484954834, "learning_rate": 1.812973819206212e-05, "loss": 0.0518, "step": 1509 }, { "epoch": 88.82352941176471, "grad_norm": 2.937530040740967, "learning_rate": 1.8126963846529437e-05, "loss": 0.0725, "step": 1510 }, { "epoch": 88.88235294117646, "grad_norm": 2.658135414123535, "learning_rate": 1.8124187657396456e-05, "loss": 0.1089, "step": 1511 }, { "epoch": 88.94117647058823, "grad_norm": 2.0603559017181396, "learning_rate": 1.8121409625292946e-05, "loss": 0.053, "step": 1512 }, { "epoch": 89.0, "grad_norm": 1.8373000621795654, "learning_rate": 1.8118629750849106e-05, "loss": 0.0696, "step": 1513 }, { "epoch": 89.05882352941177, "grad_norm": 1.739209771156311, "learning_rate": 1.8115848034695552e-05, "loss": 0.0826, "step": 1514 }, { "epoch": 89.11764705882354, "grad_norm": 2.675480365753174, "learning_rate": 1.8113064477463314e-05, "loss": 0.0597, "step": 1515 }, { "epoch": 89.17647058823529, "grad_norm": 1.7226619720458984, "learning_rate": 1.811027907978384e-05, "loss": 0.04, "step": 1516 }, { "epoch": 89.23529411764706, "grad_norm": 2.024468421936035, "learning_rate": 1.8107491842289e-05, "loss": 0.1034, "step": 1517 }, { "epoch": 89.29411764705883, "grad_norm": 1.2199058532714844, "learning_rate": 1.8104702765611077e-05, "loss": 0.0524, "step": 1518 }, { "epoch": 89.3529411764706, "grad_norm": 1.6353362798690796, "learning_rate": 1.8101911850382773e-05, "loss": 0.0684, "step": 1519 }, { "epoch": 89.41176470588235, "grad_norm": 1.55868661403656, "learning_rate": 1.8099119097237207e-05, "loss": 0.0462, "step": 1520 }, { "epoch": 89.47058823529412, "grad_norm": 2.2351367473602295, "learning_rate": 1.809632450680791e-05, "loss": 0.0456, "step": 1521 }, { "epoch": 89.52941176470588, "grad_norm": 1.9134317636489868, "learning_rate": 1.8093528079728842e-05, "loss": 0.0614, "step": 1522 }, { "epoch": 89.58823529411765, "grad_norm": 2.7117087841033936, "learning_rate": 1.8090729816634363e-05, "loss": 0.051, "step": 1523 }, { "epoch": 89.6470588235294, "grad_norm": 1.7790085077285767, "learning_rate": 1.808792971815927e-05, "loss": 0.0478, "step": 1524 }, { "epoch": 89.70588235294117, "grad_norm": 2.0677196979522705, "learning_rate": 1.8085127784938755e-05, "loss": 0.105, "step": 1525 }, { "epoch": 89.76470588235294, "grad_norm": 1.9203234910964966, "learning_rate": 1.808232401760844e-05, "loss": 0.071, "step": 1526 }, { "epoch": 89.82352941176471, "grad_norm": 2.533353090286255, "learning_rate": 1.807951841680436e-05, "loss": 0.0405, "step": 1527 }, { "epoch": 89.88235294117646, "grad_norm": 2.9593331813812256, "learning_rate": 1.8076710983162963e-05, "loss": 0.0786, "step": 1528 }, { "epoch": 89.94117647058823, "grad_norm": 1.5580748319625854, "learning_rate": 1.8073901717321113e-05, "loss": 0.0306, "step": 1529 }, { "epoch": 90.0, "grad_norm": 1.8200117349624634, "learning_rate": 1.8071090619916095e-05, "loss": 0.0531, "step": 1530 }, { "epoch": 90.05882352941177, "grad_norm": 1.8105355501174927, "learning_rate": 1.80682776915856e-05, "loss": 0.0261, "step": 1531 }, { "epoch": 90.11764705882354, "grad_norm": 2.7021448612213135, "learning_rate": 1.8065462932967754e-05, "loss": 0.0386, "step": 1532 }, { "epoch": 90.17647058823529, "grad_norm": 1.574390172958374, "learning_rate": 1.8062646344701072e-05, "loss": 0.0301, "step": 1533 }, { "epoch": 90.23529411764706, "grad_norm": 2.25439453125, "learning_rate": 1.80598279274245e-05, "loss": 0.0668, "step": 1534 }, { "epoch": 90.29411764705883, "grad_norm": 1.7907463312149048, "learning_rate": 1.8057007681777398e-05, "loss": 0.0376, "step": 1535 }, { "epoch": 90.3529411764706, "grad_norm": 1.9817458391189575, "learning_rate": 1.8054185608399534e-05, "loss": 0.1113, "step": 1536 }, { "epoch": 90.41176470588235, "grad_norm": 2.2965941429138184, "learning_rate": 1.805136170793109e-05, "loss": 0.098, "step": 1537 }, { "epoch": 90.47058823529412, "grad_norm": 1.7663452625274658, "learning_rate": 1.8048535981012683e-05, "loss": 0.0661, "step": 1538 }, { "epoch": 90.52941176470588, "grad_norm": 2.092289686203003, "learning_rate": 1.8045708428285317e-05, "loss": 0.0411, "step": 1539 }, { "epoch": 90.58823529411765, "grad_norm": 1.6264805793762207, "learning_rate": 1.8042879050390426e-05, "loss": 0.0564, "step": 1540 }, { "epoch": 90.6470588235294, "grad_norm": 1.9531893730163574, "learning_rate": 1.804004784796985e-05, "loss": 0.049, "step": 1541 }, { "epoch": 90.70588235294117, "grad_norm": 1.979540467262268, "learning_rate": 1.8037214821665848e-05, "loss": 0.0521, "step": 1542 }, { "epoch": 90.76470588235294, "grad_norm": 2.112757682800293, "learning_rate": 1.8034379972121096e-05, "loss": 0.0474, "step": 1543 }, { "epoch": 90.82352941176471, "grad_norm": 1.992295742034912, "learning_rate": 1.8031543299978673e-05, "loss": 0.0474, "step": 1544 }, { "epoch": 90.88235294117646, "grad_norm": 1.8046190738677979, "learning_rate": 1.802870480588208e-05, "loss": 0.051, "step": 1545 }, { "epoch": 90.94117647058823, "grad_norm": 1.6612266302108765, "learning_rate": 1.8025864490475232e-05, "loss": 0.0417, "step": 1546 }, { "epoch": 91.0, "grad_norm": 2.3650221824645996, "learning_rate": 1.802302235440245e-05, "loss": 0.1073, "step": 1547 }, { "epoch": 91.05882352941177, "grad_norm": 1.6479785442352295, "learning_rate": 1.8020178398308475e-05, "loss": 0.073, "step": 1548 }, { "epoch": 91.11764705882354, "grad_norm": 1.9653253555297852, "learning_rate": 1.8017332622838453e-05, "loss": 0.046, "step": 1549 }, { "epoch": 91.17647058823529, "grad_norm": 2.6059508323669434, "learning_rate": 1.8014485028637954e-05, "loss": 0.0525, "step": 1550 }, { "epoch": 91.23529411764706, "grad_norm": 2.225743055343628, "learning_rate": 1.8011635616352948e-05, "loss": 0.0994, "step": 1551 }, { "epoch": 91.29411764705883, "grad_norm": 1.9568254947662354, "learning_rate": 1.8008784386629828e-05, "loss": 0.0484, "step": 1552 }, { "epoch": 91.3529411764706, "grad_norm": 1.6628891229629517, "learning_rate": 1.8005931340115395e-05, "loss": 0.03, "step": 1553 }, { "epoch": 91.41176470588235, "grad_norm": 1.6750397682189941, "learning_rate": 1.8003076477456858e-05, "loss": 0.0578, "step": 1554 }, { "epoch": 91.47058823529412, "grad_norm": 1.331879734992981, "learning_rate": 1.8000219799301847e-05, "loss": 0.0676, "step": 1555 }, { "epoch": 91.52941176470588, "grad_norm": 1.7180500030517578, "learning_rate": 1.7997361306298398e-05, "loss": 0.0315, "step": 1556 }, { "epoch": 91.58823529411765, "grad_norm": 2.3183846473693848, "learning_rate": 1.7994500999094957e-05, "loss": 0.0488, "step": 1557 }, { "epoch": 91.6470588235294, "grad_norm": 1.6364188194274902, "learning_rate": 1.7991638878340386e-05, "loss": 0.0281, "step": 1558 }, { "epoch": 91.70588235294117, "grad_norm": 2.323977470397949, "learning_rate": 1.7988774944683956e-05, "loss": 0.1001, "step": 1559 }, { "epoch": 91.76470588235294, "grad_norm": 2.2934153079986572, "learning_rate": 1.798590919877535e-05, "loss": 0.0501, "step": 1560 }, { "epoch": 91.82352941176471, "grad_norm": 1.3957247734069824, "learning_rate": 1.7983041641264662e-05, "loss": 0.0615, "step": 1561 }, { "epoch": 91.88235294117646, "grad_norm": 2.371381998062134, "learning_rate": 1.7980172272802398e-05, "loss": 0.0599, "step": 1562 }, { "epoch": 91.94117647058823, "grad_norm": 1.284427523612976, "learning_rate": 1.797730109403947e-05, "loss": 0.0299, "step": 1563 }, { "epoch": 92.0, "grad_norm": 1.7634559869766235, "learning_rate": 1.797442810562721e-05, "loss": 0.0734, "step": 1564 }, { "epoch": 92.05882352941177, "grad_norm": 1.7584888935089111, "learning_rate": 1.797155330821735e-05, "loss": 0.0454, "step": 1565 }, { "epoch": 92.11764705882354, "grad_norm": 1.702060341835022, "learning_rate": 1.7968676702462042e-05, "loss": 0.0549, "step": 1566 }, { "epoch": 92.17647058823529, "grad_norm": 1.7490814924240112, "learning_rate": 1.796579828901384e-05, "loss": 0.0419, "step": 1567 }, { "epoch": 92.23529411764706, "grad_norm": 1.4453809261322021, "learning_rate": 1.796291806852571e-05, "loss": 0.0615, "step": 1568 }, { "epoch": 92.29411764705883, "grad_norm": 1.8087310791015625, "learning_rate": 1.7960036041651032e-05, "loss": 0.053, "step": 1569 }, { "epoch": 92.3529411764706, "grad_norm": 1.9587125778198242, "learning_rate": 1.7957152209043595e-05, "loss": 0.0895, "step": 1570 }, { "epoch": 92.41176470588235, "grad_norm": 1.443472146987915, "learning_rate": 1.7954266571357593e-05, "loss": 0.0388, "step": 1571 }, { "epoch": 92.47058823529412, "grad_norm": 2.1531736850738525, "learning_rate": 1.7951379129247634e-05, "loss": 0.0612, "step": 1572 }, { "epoch": 92.52941176470588, "grad_norm": 1.1477892398834229, "learning_rate": 1.7948489883368732e-05, "loss": 0.0282, "step": 1573 }, { "epoch": 92.58823529411765, "grad_norm": 3.652163028717041, "learning_rate": 1.7945598834376318e-05, "loss": 0.0452, "step": 1574 }, { "epoch": 92.6470588235294, "grad_norm": 1.2059022188186646, "learning_rate": 1.7942705982926218e-05, "loss": 0.0245, "step": 1575 }, { "epoch": 92.70588235294117, "grad_norm": 1.9060192108154297, "learning_rate": 1.7939811329674673e-05, "loss": 0.0367, "step": 1576 }, { "epoch": 92.76470588235294, "grad_norm": 2.484677791595459, "learning_rate": 1.7936914875278348e-05, "loss": 0.0897, "step": 1577 }, { "epoch": 92.82352941176471, "grad_norm": 1.5883426666259766, "learning_rate": 1.7934016620394288e-05, "loss": 0.0235, "step": 1578 }, { "epoch": 92.88235294117646, "grad_norm": 2.049988031387329, "learning_rate": 1.7931116565679972e-05, "loss": 0.0898, "step": 1579 }, { "epoch": 92.94117647058823, "grad_norm": 1.2422784566879272, "learning_rate": 1.7928214711793272e-05, "loss": 0.0275, "step": 1580 }, { "epoch": 93.0, "grad_norm": 1.8944602012634277, "learning_rate": 1.7925311059392472e-05, "loss": 0.085, "step": 1581 }, { "epoch": 93.05882352941177, "grad_norm": 2.3935370445251465, "learning_rate": 1.792240560913627e-05, "loss": 0.0435, "step": 1582 }, { "epoch": 93.11764705882354, "grad_norm": 1.3563398122787476, "learning_rate": 1.791949836168376e-05, "loss": 0.0288, "step": 1583 }, { "epoch": 93.17647058823529, "grad_norm": 2.160761833190918, "learning_rate": 1.7916589317694463e-05, "loss": 0.0825, "step": 1584 }, { "epoch": 93.23529411764706, "grad_norm": 1.7155150175094604, "learning_rate": 1.791367847782828e-05, "loss": 0.0708, "step": 1585 }, { "epoch": 93.29411764705883, "grad_norm": 3.841660737991333, "learning_rate": 1.7910765842745543e-05, "loss": 0.0429, "step": 1586 }, { "epoch": 93.3529411764706, "grad_norm": 2.2175374031066895, "learning_rate": 1.7907851413106982e-05, "loss": 0.0898, "step": 1587 }, { "epoch": 93.41176470588235, "grad_norm": 4.713409900665283, "learning_rate": 1.7904935189573733e-05, "loss": 0.1121, "step": 1588 }, { "epoch": 93.47058823529412, "grad_norm": 1.4861716032028198, "learning_rate": 1.790201717280734e-05, "loss": 0.0291, "step": 1589 }, { "epoch": 93.52941176470588, "grad_norm": 2.4293389320373535, "learning_rate": 1.7899097363469757e-05, "loss": 0.0512, "step": 1590 }, { "epoch": 93.58823529411765, "grad_norm": 2.0849711894989014, "learning_rate": 1.789617576222334e-05, "loss": 0.0663, "step": 1591 }, { "epoch": 93.6470588235294, "grad_norm": 1.2364544868469238, "learning_rate": 1.789325236973086e-05, "loss": 0.0238, "step": 1592 }, { "epoch": 93.70588235294117, "grad_norm": 2.235275983810425, "learning_rate": 1.789032718665548e-05, "loss": 0.0486, "step": 1593 }, { "epoch": 93.76470588235294, "grad_norm": 1.8000755310058594, "learning_rate": 1.7887400213660782e-05, "loss": 0.034, "step": 1594 }, { "epoch": 93.82352941176471, "grad_norm": 1.5551108121871948, "learning_rate": 1.7884471451410748e-05, "loss": 0.0264, "step": 1595 }, { "epoch": 93.88235294117646, "grad_norm": 2.35131573677063, "learning_rate": 1.7881540900569767e-05, "loss": 0.088, "step": 1596 }, { "epoch": 93.94117647058823, "grad_norm": 1.9291877746582031, "learning_rate": 1.787860856180263e-05, "loss": 0.0712, "step": 1597 }, { "epoch": 94.0, "grad_norm": 2.928333044052124, "learning_rate": 1.7875674435774546e-05, "loss": 0.0353, "step": 1598 }, { "epoch": 94.05882352941177, "grad_norm": 2.3241117000579834, "learning_rate": 1.7872738523151112e-05, "loss": 0.0622, "step": 1599 }, { "epoch": 94.11764705882354, "grad_norm": 1.5717693567276, "learning_rate": 1.7869800824598347e-05, "loss": 0.0293, "step": 1600 }, { "epoch": 94.17647058823529, "grad_norm": 1.4208874702453613, "learning_rate": 1.7866861340782663e-05, "loss": 0.0327, "step": 1601 }, { "epoch": 94.23529411764706, "grad_norm": 2.931729793548584, "learning_rate": 1.7863920072370886e-05, "loss": 0.0462, "step": 1602 }, { "epoch": 94.29411764705883, "grad_norm": 1.898577094078064, "learning_rate": 1.7860977020030236e-05, "loss": 0.0525, "step": 1603 }, { "epoch": 94.3529411764706, "grad_norm": 1.766728401184082, "learning_rate": 1.7858032184428348e-05, "loss": 0.0492, "step": 1604 }, { "epoch": 94.41176470588235, "grad_norm": 3.090740203857422, "learning_rate": 1.785508556623325e-05, "loss": 0.0674, "step": 1605 }, { "epoch": 94.47058823529412, "grad_norm": 1.9945586919784546, "learning_rate": 1.7852137166113396e-05, "loss": 0.1121, "step": 1606 }, { "epoch": 94.52941176470588, "grad_norm": 1.8471086025238037, "learning_rate": 1.7849186984737618e-05, "loss": 0.0284, "step": 1607 }, { "epoch": 94.58823529411765, "grad_norm": 1.5899170637130737, "learning_rate": 1.7846235022775165e-05, "loss": 0.0431, "step": 1608 }, { "epoch": 94.6470588235294, "grad_norm": 1.9571020603179932, "learning_rate": 1.7843281280895696e-05, "loss": 0.0738, "step": 1609 }, { "epoch": 94.70588235294117, "grad_norm": 2.211077928543091, "learning_rate": 1.7840325759769263e-05, "loss": 0.0354, "step": 1610 }, { "epoch": 94.76470588235294, "grad_norm": 1.8573541641235352, "learning_rate": 1.7837368460066327e-05, "loss": 0.0386, "step": 1611 }, { "epoch": 94.82352941176471, "grad_norm": 1.514466643333435, "learning_rate": 1.7834409382457745e-05, "loss": 0.0647, "step": 1612 }, { "epoch": 94.88235294117646, "grad_norm": 1.7675526142120361, "learning_rate": 1.783144852761479e-05, "loss": 0.0648, "step": 1613 }, { "epoch": 94.94117647058823, "grad_norm": 1.9615827798843384, "learning_rate": 1.782848589620913e-05, "loss": 0.0378, "step": 1614 }, { "epoch": 95.0, "grad_norm": 1.5752280950546265, "learning_rate": 1.7825521488912833e-05, "loss": 0.0298, "step": 1615 }, { "epoch": 95.05882352941177, "grad_norm": 2.4691619873046875, "learning_rate": 1.7822555306398378e-05, "loss": 0.0802, "step": 1616 }, { "epoch": 95.11764705882354, "grad_norm": 2.3730127811431885, "learning_rate": 1.7819587349338643e-05, "loss": 0.0674, "step": 1617 }, { "epoch": 95.17647058823529, "grad_norm": 2.1717045307159424, "learning_rate": 1.7816617618406908e-05, "loss": 0.0819, "step": 1618 }, { "epoch": 95.23529411764706, "grad_norm": 3.4564926624298096, "learning_rate": 1.7813646114276855e-05, "loss": 0.0276, "step": 1619 }, { "epoch": 95.29411764705883, "grad_norm": 1.623011589050293, "learning_rate": 1.7810672837622573e-05, "loss": 0.0442, "step": 1620 }, { "epoch": 95.3529411764706, "grad_norm": 1.1184393167495728, "learning_rate": 1.7807697789118544e-05, "loss": 0.0234, "step": 1621 }, { "epoch": 95.41176470588235, "grad_norm": 1.5963099002838135, "learning_rate": 1.780472096943966e-05, "loss": 0.0385, "step": 1622 }, { "epoch": 95.47058823529412, "grad_norm": 1.3105285167694092, "learning_rate": 1.7801742379261212e-05, "loss": 0.083, "step": 1623 }, { "epoch": 95.52941176470588, "grad_norm": 1.8208165168762207, "learning_rate": 1.7798762019258892e-05, "loss": 0.0392, "step": 1624 }, { "epoch": 95.58823529411765, "grad_norm": 1.7544909715652466, "learning_rate": 1.7795779890108797e-05, "loss": 0.0267, "step": 1625 }, { "epoch": 95.6470588235294, "grad_norm": 1.7716467380523682, "learning_rate": 1.7792795992487417e-05, "loss": 0.0775, "step": 1626 }, { "epoch": 95.70588235294117, "grad_norm": 1.737410545349121, "learning_rate": 1.7789810327071653e-05, "loss": 0.0305, "step": 1627 }, { "epoch": 95.76470588235294, "grad_norm": 1.7336982488632202, "learning_rate": 1.77868228945388e-05, "loss": 0.0604, "step": 1628 }, { "epoch": 95.82352941176471, "grad_norm": 2.052408456802368, "learning_rate": 1.778383369556656e-05, "loss": 0.0555, "step": 1629 }, { "epoch": 95.88235294117646, "grad_norm": 1.340322732925415, "learning_rate": 1.7780842730833026e-05, "loss": 0.05, "step": 1630 }, { "epoch": 95.94117647058823, "grad_norm": 1.4964410066604614, "learning_rate": 1.7777850001016705e-05, "loss": 0.0337, "step": 1631 }, { "epoch": 96.0, "grad_norm": 1.5737507343292236, "learning_rate": 1.7774855506796497e-05, "loss": 0.046, "step": 1632 }, { "epoch": 96.05882352941177, "grad_norm": 1.609706997871399, "learning_rate": 1.7771859248851693e-05, "loss": 0.0289, "step": 1633 }, { "epoch": 96.11764705882354, "grad_norm": 2.5697813034057617, "learning_rate": 1.7768861227862005e-05, "loss": 0.0527, "step": 1634 }, { "epoch": 96.17647058823529, "grad_norm": 1.6581978797912598, "learning_rate": 1.7765861444507528e-05, "loss": 0.0629, "step": 1635 }, { "epoch": 96.23529411764706, "grad_norm": 1.3137010335922241, "learning_rate": 1.776285989946876e-05, "loss": 0.0672, "step": 1636 }, { "epoch": 96.29411764705883, "grad_norm": 2.2677714824676514, "learning_rate": 1.7759856593426606e-05, "loss": 0.0375, "step": 1637 }, { "epoch": 96.3529411764706, "grad_norm": 2.0530083179473877, "learning_rate": 1.7756851527062364e-05, "loss": 0.0525, "step": 1638 }, { "epoch": 96.41176470588235, "grad_norm": 1.201953411102295, "learning_rate": 1.7753844701057728e-05, "loss": 0.0262, "step": 1639 }, { "epoch": 96.47058823529412, "grad_norm": 2.037827491760254, "learning_rate": 1.7750836116094805e-05, "loss": 0.0288, "step": 1640 }, { "epoch": 96.52941176470588, "grad_norm": 1.6412959098815918, "learning_rate": 1.7747825772856088e-05, "loss": 0.0586, "step": 1641 }, { "epoch": 96.58823529411765, "grad_norm": 1.361325740814209, "learning_rate": 1.7744813672024467e-05, "loss": 0.0282, "step": 1642 }, { "epoch": 96.6470588235294, "grad_norm": 1.6787927150726318, "learning_rate": 1.7741799814283246e-05, "loss": 0.053, "step": 1643 }, { "epoch": 96.70588235294117, "grad_norm": 1.9429420232772827, "learning_rate": 1.773878420031611e-05, "loss": 0.0672, "step": 1644 }, { "epoch": 96.76470588235294, "grad_norm": 1.4442633390426636, "learning_rate": 1.7735766830807154e-05, "loss": 0.0274, "step": 1645 }, { "epoch": 96.82352941176471, "grad_norm": 2.624765396118164, "learning_rate": 1.7732747706440873e-05, "loss": 0.0545, "step": 1646 }, { "epoch": 96.88235294117646, "grad_norm": 1.4606629610061646, "learning_rate": 1.7729726827902144e-05, "loss": 0.0297, "step": 1647 }, { "epoch": 96.94117647058823, "grad_norm": 2.2985241413116455, "learning_rate": 1.772670419587626e-05, "loss": 0.1014, "step": 1648 }, { "epoch": 97.0, "grad_norm": 1.880741000175476, "learning_rate": 1.7723679811048904e-05, "loss": 0.0723, "step": 1649 }, { "epoch": 97.05882352941177, "grad_norm": 1.0589730739593506, "learning_rate": 1.7720653674106153e-05, "loss": 0.0241, "step": 1650 }, { "epoch": 97.11764705882354, "grad_norm": 1.5087612867355347, "learning_rate": 1.771762578573449e-05, "loss": 0.0411, "step": 1651 }, { "epoch": 97.17647058823529, "grad_norm": 1.479968786239624, "learning_rate": 1.7714596146620794e-05, "loss": 0.0436, "step": 1652 }, { "epoch": 97.23529411764706, "grad_norm": 1.47006356716156, "learning_rate": 1.7711564757452328e-05, "loss": 0.0346, "step": 1653 }, { "epoch": 97.29411764705883, "grad_norm": 1.6996369361877441, "learning_rate": 1.770853161891677e-05, "loss": 0.0565, "step": 1654 }, { "epoch": 97.3529411764706, "grad_norm": 1.3959237337112427, "learning_rate": 1.7705496731702185e-05, "loss": 0.056, "step": 1655 }, { "epoch": 97.41176470588235, "grad_norm": 1.2996845245361328, "learning_rate": 1.7702460096497035e-05, "loss": 0.0247, "step": 1656 }, { "epoch": 97.47058823529412, "grad_norm": 1.7472931146621704, "learning_rate": 1.7699421713990183e-05, "loss": 0.0359, "step": 1657 }, { "epoch": 97.52941176470588, "grad_norm": 1.6591665744781494, "learning_rate": 1.7696381584870887e-05, "loss": 0.0507, "step": 1658 }, { "epoch": 97.58823529411765, "grad_norm": 1.7665784358978271, "learning_rate": 1.769333970982879e-05, "loss": 0.0297, "step": 1659 }, { "epoch": 97.6470588235294, "grad_norm": 1.271668553352356, "learning_rate": 1.7690296089553952e-05, "loss": 0.0385, "step": 1660 }, { "epoch": 97.70588235294117, "grad_norm": 2.42152738571167, "learning_rate": 1.768725072473681e-05, "loss": 0.0629, "step": 1661 }, { "epoch": 97.76470588235294, "grad_norm": 3.1797780990600586, "learning_rate": 1.7684203616068215e-05, "loss": 0.0578, "step": 1662 }, { "epoch": 97.82352941176471, "grad_norm": 1.5189545154571533, "learning_rate": 1.768115476423939e-05, "loss": 0.0621, "step": 1663 }, { "epoch": 97.88235294117646, "grad_norm": 1.8784703016281128, "learning_rate": 1.7678104169941972e-05, "loss": 0.0816, "step": 1664 }, { "epoch": 97.94117647058823, "grad_norm": 1.9927204847335815, "learning_rate": 1.7675051833867987e-05, "loss": 0.0452, "step": 1665 }, { "epoch": 98.0, "grad_norm": 1.9190739393234253, "learning_rate": 1.767199775670986e-05, "loss": 0.058, "step": 1666 }, { "epoch": 98.05882352941177, "grad_norm": 2.642402410507202, "learning_rate": 1.766894193916041e-05, "loss": 0.0385, "step": 1667 }, { "epoch": 98.11764705882354, "grad_norm": 1.664349913597107, "learning_rate": 1.766588438191284e-05, "loss": 0.0419, "step": 1668 }, { "epoch": 98.17647058823529, "grad_norm": 2.1495115756988525, "learning_rate": 1.7662825085660762e-05, "loss": 0.0923, "step": 1669 }, { "epoch": 98.23529411764706, "grad_norm": 1.9126770496368408, "learning_rate": 1.7659764051098175e-05, "loss": 0.0465, "step": 1670 }, { "epoch": 98.29411764705883, "grad_norm": 1.3478167057037354, "learning_rate": 1.7656701278919475e-05, "loss": 0.0571, "step": 1671 }, { "epoch": 98.3529411764706, "grad_norm": 1.955019474029541, "learning_rate": 1.765363676981945e-05, "loss": 0.0317, "step": 1672 }, { "epoch": 98.41176470588235, "grad_norm": 2.4480698108673096, "learning_rate": 1.7650570524493285e-05, "loss": 0.0902, "step": 1673 }, { "epoch": 98.47058823529412, "grad_norm": 1.5863829851150513, "learning_rate": 1.764750254363656e-05, "loss": 0.0281, "step": 1674 }, { "epoch": 98.52941176470588, "grad_norm": 1.198224425315857, "learning_rate": 1.764443282794524e-05, "loss": 0.0272, "step": 1675 }, { "epoch": 98.58823529411765, "grad_norm": 1.6166054010391235, "learning_rate": 1.764136137811569e-05, "loss": 0.0348, "step": 1676 }, { "epoch": 98.6470588235294, "grad_norm": 1.2561644315719604, "learning_rate": 1.7638288194844673e-05, "loss": 0.0249, "step": 1677 }, { "epoch": 98.70588235294117, "grad_norm": 2.67470383644104, "learning_rate": 1.7635213278829337e-05, "loss": 0.0813, "step": 1678 }, { "epoch": 98.76470588235294, "grad_norm": 1.2840080261230469, "learning_rate": 1.7632136630767227e-05, "loss": 0.0287, "step": 1679 }, { "epoch": 98.82352941176471, "grad_norm": 1.3939580917358398, "learning_rate": 1.7629058251356277e-05, "loss": 0.0273, "step": 1680 }, { "epoch": 98.88235294117646, "grad_norm": 1.4860727787017822, "learning_rate": 1.762597814129482e-05, "loss": 0.0526, "step": 1681 }, { "epoch": 98.94117647058823, "grad_norm": 1.969660997390747, "learning_rate": 1.762289630128158e-05, "loss": 0.0582, "step": 1682 }, { "epoch": 99.0, "grad_norm": 1.2440426349639893, "learning_rate": 1.7619812732015664e-05, "loss": 0.0383, "step": 1683 }, { "epoch": 99.05882352941177, "grad_norm": 1.6305581331253052, "learning_rate": 1.7616727434196587e-05, "loss": 0.0456, "step": 1684 }, { "epoch": 99.11764705882354, "grad_norm": 1.4542900323867798, "learning_rate": 1.7613640408524248e-05, "loss": 0.0378, "step": 1685 }, { "epoch": 99.17647058823529, "grad_norm": 2.241928815841675, "learning_rate": 1.761055165569893e-05, "loss": 0.0324, "step": 1686 }, { "epoch": 99.23529411764706, "grad_norm": 1.3947665691375732, "learning_rate": 1.7607461176421327e-05, "loss": 0.031, "step": 1687 }, { "epoch": 99.29411764705883, "grad_norm": 1.6482771635055542, "learning_rate": 1.7604368971392508e-05, "loss": 0.0322, "step": 1688 }, { "epoch": 99.3529411764706, "grad_norm": 1.2584768533706665, "learning_rate": 1.760127504131394e-05, "loss": 0.0441, "step": 1689 }, { "epoch": 99.41176470588235, "grad_norm": 1.700767159461975, "learning_rate": 1.759817938688748e-05, "loss": 0.0276, "step": 1690 }, { "epoch": 99.47058823529412, "grad_norm": 1.9819145202636719, "learning_rate": 1.759508200881537e-05, "loss": 0.0424, "step": 1691 }, { "epoch": 99.52941176470588, "grad_norm": 2.0128066539764404, "learning_rate": 1.7591982907800264e-05, "loss": 0.0869, "step": 1692 }, { "epoch": 99.58823529411765, "grad_norm": 2.0404534339904785, "learning_rate": 1.7588882084545185e-05, "loss": 0.0433, "step": 1693 }, { "epoch": 99.6470588235294, "grad_norm": 2.944464921951294, "learning_rate": 1.7585779539753554e-05, "loss": 0.0519, "step": 1694 }, { "epoch": 99.70588235294117, "grad_norm": 1.818827748298645, "learning_rate": 1.7582675274129184e-05, "loss": 0.0332, "step": 1695 }, { "epoch": 99.76470588235294, "grad_norm": 1.812803030014038, "learning_rate": 1.7579569288376276e-05, "loss": 0.0838, "step": 1696 }, { "epoch": 99.82352941176471, "grad_norm": 3.169074296951294, "learning_rate": 1.757646158319942e-05, "loss": 0.0549, "step": 1697 }, { "epoch": 99.88235294117646, "grad_norm": 1.3782893419265747, "learning_rate": 1.75733521593036e-05, "loss": 0.029, "step": 1698 }, { "epoch": 99.94117647058823, "grad_norm": 1.3789900541305542, "learning_rate": 1.7570241017394193e-05, "loss": 0.0807, "step": 1699 }, { "epoch": 100.0, "grad_norm": 1.2581475973129272, "learning_rate": 1.7567128158176955e-05, "loss": 0.0304, "step": 1700 }, { "epoch": 100.05882352941177, "grad_norm": 2.1706457138061523, "learning_rate": 1.756401358235804e-05, "loss": 0.0314, "step": 1701 }, { "epoch": 100.11764705882354, "grad_norm": 1.2990175485610962, "learning_rate": 1.7560897290643986e-05, "loss": 0.027, "step": 1702 }, { "epoch": 100.17647058823529, "grad_norm": 1.1966122388839722, "learning_rate": 1.7557779283741728e-05, "loss": 0.0492, "step": 1703 }, { "epoch": 100.23529411764706, "grad_norm": 1.3850892782211304, "learning_rate": 1.7554659562358585e-05, "loss": 0.0373, "step": 1704 }, { "epoch": 100.29411764705883, "grad_norm": 1.6300454139709473, "learning_rate": 1.7551538127202265e-05, "loss": 0.043, "step": 1705 }, { "epoch": 100.3529411764706, "grad_norm": 1.803338885307312, "learning_rate": 1.7548414978980858e-05, "loss": 0.0365, "step": 1706 }, { "epoch": 100.41176470588235, "grad_norm": 1.5439374446868896, "learning_rate": 1.7545290118402855e-05, "loss": 0.0666, "step": 1707 }, { "epoch": 100.47058823529412, "grad_norm": 1.2292075157165527, "learning_rate": 1.7542163546177136e-05, "loss": 0.0513, "step": 1708 }, { "epoch": 100.52941176470588, "grad_norm": 2.154632329940796, "learning_rate": 1.7539035263012954e-05, "loss": 0.0611, "step": 1709 }, { "epoch": 100.58823529411765, "grad_norm": 1.5575110912322998, "learning_rate": 1.753590526961996e-05, "loss": 0.0461, "step": 1710 }, { "epoch": 100.6470588235294, "grad_norm": 1.2199803590774536, "learning_rate": 1.75327735667082e-05, "loss": 0.0491, "step": 1711 }, { "epoch": 100.70588235294117, "grad_norm": 1.3668755292892456, "learning_rate": 1.7529640154988095e-05, "loss": 0.0312, "step": 1712 }, { "epoch": 100.76470588235294, "grad_norm": 1.2796716690063477, "learning_rate": 1.7526505035170457e-05, "loss": 0.0314, "step": 1713 }, { "epoch": 100.82352941176471, "grad_norm": 1.240775465965271, "learning_rate": 1.7523368207966492e-05, "loss": 0.0613, "step": 1714 }, { "epoch": 100.88235294117646, "grad_norm": 1.6735870838165283, "learning_rate": 1.7520229674087784e-05, "loss": 0.0487, "step": 1715 }, { "epoch": 100.94117647058823, "grad_norm": 2.2820262908935547, "learning_rate": 1.7517089434246316e-05, "loss": 0.02, "step": 1716 }, { "epoch": 101.0, "grad_norm": 1.2829147577285767, "learning_rate": 1.7513947489154443e-05, "loss": 0.0203, "step": 1717 }, { "epoch": 101.05882352941177, "grad_norm": 1.81283438205719, "learning_rate": 1.751080383952492e-05, "loss": 0.0673, "step": 1718 }, { "epoch": 101.11764705882354, "grad_norm": 2.0193283557891846, "learning_rate": 1.7507658486070877e-05, "loss": 0.0311, "step": 1719 }, { "epoch": 101.17647058823529, "grad_norm": 1.1294535398483276, "learning_rate": 1.7504511429505845e-05, "loss": 0.034, "step": 1720 }, { "epoch": 101.23529411764706, "grad_norm": 2.46940016746521, "learning_rate": 1.750136267054373e-05, "loss": 0.061, "step": 1721 }, { "epoch": 101.29411764705883, "grad_norm": 2.4558448791503906, "learning_rate": 1.7498212209898824e-05, "loss": 0.0377, "step": 1722 }, { "epoch": 101.3529411764706, "grad_norm": 1.4986668825149536, "learning_rate": 1.7495060048285813e-05, "loss": 0.0381, "step": 1723 }, { "epoch": 101.41176470588235, "grad_norm": 1.499153971672058, "learning_rate": 1.7491906186419762e-05, "loss": 0.0485, "step": 1724 }, { "epoch": 101.47058823529412, "grad_norm": 1.306340217590332, "learning_rate": 1.7488750625016126e-05, "loss": 0.0274, "step": 1725 }, { "epoch": 101.52941176470588, "grad_norm": 1.611656665802002, "learning_rate": 1.748559336479074e-05, "loss": 0.0447, "step": 1726 }, { "epoch": 101.58823529411765, "grad_norm": 1.238024115562439, "learning_rate": 1.748243440645983e-05, "loss": 0.0243, "step": 1727 }, { "epoch": 101.6470588235294, "grad_norm": 2.1621174812316895, "learning_rate": 1.7479273750740006e-05, "loss": 0.0387, "step": 1728 }, { "epoch": 101.70588235294117, "grad_norm": 1.5682286024093628, "learning_rate": 1.7476111398348262e-05, "loss": 0.0621, "step": 1729 }, { "epoch": 101.76470588235294, "grad_norm": 1.1740323305130005, "learning_rate": 1.7472947350001978e-05, "loss": 0.0203, "step": 1730 }, { "epoch": 101.82352941176471, "grad_norm": 2.8905675411224365, "learning_rate": 1.7469781606418915e-05, "loss": 0.0305, "step": 1731 }, { "epoch": 101.88235294117646, "grad_norm": 1.607574224472046, "learning_rate": 1.746661416831722e-05, "loss": 0.0576, "step": 1732 }, { "epoch": 101.94117647058823, "grad_norm": 1.7446776628494263, "learning_rate": 1.746344503641543e-05, "loss": 0.0548, "step": 1733 }, { "epoch": 102.0, "grad_norm": 1.5043896436691284, "learning_rate": 1.7460274211432463e-05, "loss": 0.0789, "step": 1734 }, { "epoch": 102.05882352941177, "grad_norm": 1.8612769842147827, "learning_rate": 1.7457101694087612e-05, "loss": 0.0224, "step": 1735 }, { "epoch": 102.11764705882354, "grad_norm": 1.2049081325531006, "learning_rate": 1.7453927485100573e-05, "loss": 0.0165, "step": 1736 }, { "epoch": 102.17647058823529, "grad_norm": 2.678624391555786, "learning_rate": 1.7450751585191406e-05, "loss": 0.0635, "step": 1737 }, { "epoch": 102.23529411764706, "grad_norm": 3.271991014480591, "learning_rate": 1.744757399508057e-05, "loss": 0.0329, "step": 1738 }, { "epoch": 102.29411764705883, "grad_norm": 1.7377547025680542, "learning_rate": 1.7444394715488892e-05, "loss": 0.0277, "step": 1739 }, { "epoch": 102.3529411764706, "grad_norm": 1.5392431020736694, "learning_rate": 1.74412137471376e-05, "loss": 0.0508, "step": 1740 }, { "epoch": 102.41176470588235, "grad_norm": 1.93121337890625, "learning_rate": 1.743803109074829e-05, "loss": 0.0645, "step": 1741 }, { "epoch": 102.47058823529412, "grad_norm": 3.234334945678711, "learning_rate": 1.743484674704295e-05, "loss": 0.0789, "step": 1742 }, { "epoch": 102.52941176470588, "grad_norm": 2.0652480125427246, "learning_rate": 1.743166071674395e-05, "loss": 0.0295, "step": 1743 }, { "epoch": 102.58823529411765, "grad_norm": 1.7859371900558472, "learning_rate": 1.7428473000574038e-05, "loss": 0.0246, "step": 1744 }, { "epoch": 102.6470588235294, "grad_norm": 1.874146580696106, "learning_rate": 1.7425283599256343e-05, "loss": 0.0639, "step": 1745 }, { "epoch": 102.70588235294117, "grad_norm": 1.2747596502304077, "learning_rate": 1.7422092513514387e-05, "loss": 0.0335, "step": 1746 }, { "epoch": 102.76470588235294, "grad_norm": 1.2983523607254028, "learning_rate": 1.741889974407206e-05, "loss": 0.0533, "step": 1747 }, { "epoch": 102.82352941176471, "grad_norm": 1.6107659339904785, "learning_rate": 1.7415705291653647e-05, "loss": 0.0593, "step": 1748 }, { "epoch": 102.88235294117646, "grad_norm": 1.968042016029358, "learning_rate": 1.741250915698381e-05, "loss": 0.0241, "step": 1749 }, { "epoch": 102.94117647058823, "grad_norm": 1.9397730827331543, "learning_rate": 1.7409311340787582e-05, "loss": 0.0603, "step": 1750 }, { "epoch": 103.0, "grad_norm": 1.0085450410842896, "learning_rate": 1.74061118437904e-05, "loss": 0.0202, "step": 1751 }, { "epoch": 103.05882352941177, "grad_norm": 1.705207109451294, "learning_rate": 1.740291066671806e-05, "loss": 0.0757, "step": 1752 }, { "epoch": 103.11764705882354, "grad_norm": 2.2956032752990723, "learning_rate": 1.739970781029675e-05, "loss": 0.059, "step": 1753 }, { "epoch": 103.17647058823529, "grad_norm": 1.0497366189956665, "learning_rate": 1.739650327525304e-05, "loss": 0.0254, "step": 1754 }, { "epoch": 103.23529411764706, "grad_norm": 1.160421371459961, "learning_rate": 1.739329706231388e-05, "loss": 0.0309, "step": 1755 }, { "epoch": 103.29411764705883, "grad_norm": 1.6769356727600098, "learning_rate": 1.7390089172206594e-05, "loss": 0.0222, "step": 1756 }, { "epoch": 103.3529411764706, "grad_norm": 1.493593454360962, "learning_rate": 1.738687960565889e-05, "loss": 0.0271, "step": 1757 }, { "epoch": 103.41176470588235, "grad_norm": 1.350970983505249, "learning_rate": 1.7383668363398866e-05, "loss": 0.041, "step": 1758 }, { "epoch": 103.47058823529412, "grad_norm": 1.1870250701904297, "learning_rate": 1.7380455446154982e-05, "loss": 0.0274, "step": 1759 }, { "epoch": 103.52941176470588, "grad_norm": 1.52970290184021, "learning_rate": 1.7377240854656094e-05, "loss": 0.032, "step": 1760 }, { "epoch": 103.58823529411765, "grad_norm": 4.5478434562683105, "learning_rate": 1.737402458963143e-05, "loss": 0.043, "step": 1761 }, { "epoch": 103.6470588235294, "grad_norm": 1.791055679321289, "learning_rate": 1.73708066518106e-05, "loss": 0.0573, "step": 1762 }, { "epoch": 103.70588235294117, "grad_norm": 1.2735308408737183, "learning_rate": 1.7367587041923588e-05, "loss": 0.0406, "step": 1763 }, { "epoch": 103.76470588235294, "grad_norm": 1.2000950574874878, "learning_rate": 1.736436576070077e-05, "loss": 0.026, "step": 1764 }, { "epoch": 103.82352941176471, "grad_norm": 2.1088650226593018, "learning_rate": 1.7361142808872884e-05, "loss": 0.0656, "step": 1765 }, { "epoch": 103.88235294117646, "grad_norm": 1.8969300985336304, "learning_rate": 1.7357918187171064e-05, "loss": 0.0432, "step": 1766 }, { "epoch": 103.94117647058823, "grad_norm": 1.4624872207641602, "learning_rate": 1.735469189632681e-05, "loss": 0.0273, "step": 1767 }, { "epoch": 104.0, "grad_norm": 1.6410229206085205, "learning_rate": 1.7351463937072008e-05, "loss": 0.0589, "step": 1768 }, { "epoch": 104.05882352941177, "grad_norm": 2.1526691913604736, "learning_rate": 1.7348234310138915e-05, "loss": 0.0821, "step": 1769 }, { "epoch": 104.11764705882354, "grad_norm": 2.1674768924713135, "learning_rate": 1.734500301626018e-05, "loss": 0.05, "step": 1770 }, { "epoch": 104.17647058823529, "grad_norm": 1.346055030822754, "learning_rate": 1.7341770056168813e-05, "loss": 0.0296, "step": 1771 }, { "epoch": 104.23529411764706, "grad_norm": 2.080461263656616, "learning_rate": 1.7338535430598217e-05, "loss": 0.0618, "step": 1772 }, { "epoch": 104.29411764705883, "grad_norm": 1.4984848499298096, "learning_rate": 1.733529914028216e-05, "loss": 0.0254, "step": 1773 }, { "epoch": 104.3529411764706, "grad_norm": 4.827793121337891, "learning_rate": 1.7332061185954797e-05, "loss": 0.0482, "step": 1774 }, { "epoch": 104.41176470588235, "grad_norm": 3.6892759799957275, "learning_rate": 1.732882156835066e-05, "loss": 0.0637, "step": 1775 }, { "epoch": 104.47058823529412, "grad_norm": 1.16379714012146, "learning_rate": 1.7325580288204652e-05, "loss": 0.0416, "step": 1776 }, { "epoch": 104.52941176470588, "grad_norm": 1.9767804145812988, "learning_rate": 1.732233734625206e-05, "loss": 0.0361, "step": 1777 }, { "epoch": 104.58823529411765, "grad_norm": 3.038198232650757, "learning_rate": 1.7319092743228544e-05, "loss": 0.0377, "step": 1778 }, { "epoch": 104.6470588235294, "grad_norm": 2.5692763328552246, "learning_rate": 1.7315846479870135e-05, "loss": 0.0311, "step": 1779 }, { "epoch": 104.70588235294117, "grad_norm": 3.1480493545532227, "learning_rate": 1.7312598556913255e-05, "loss": 0.0295, "step": 1780 }, { "epoch": 104.76470588235294, "grad_norm": 2.0401761531829834, "learning_rate": 1.7309348975094695e-05, "loss": 0.0489, "step": 1781 }, { "epoch": 104.82352941176471, "grad_norm": 3.1394360065460205, "learning_rate": 1.7306097735151616e-05, "loss": 0.0588, "step": 1782 }, { "epoch": 104.88235294117646, "grad_norm": 1.9422533512115479, "learning_rate": 1.730284483782157e-05, "loss": 0.0543, "step": 1783 }, { "epoch": 104.94117647058823, "grad_norm": 4.171651363372803, "learning_rate": 1.7299590283842467e-05, "loss": 0.0284, "step": 1784 }, { "epoch": 105.0, "grad_norm": 2.412464141845703, "learning_rate": 1.7296334073952606e-05, "loss": 0.0247, "step": 1785 }, { "epoch": 105.05882352941177, "grad_norm": 2.3488337993621826, "learning_rate": 1.7293076208890654e-05, "loss": 0.0441, "step": 1786 }, { "epoch": 105.11764705882354, "grad_norm": 1.734313726425171, "learning_rate": 1.7289816689395666e-05, "loss": 0.0197, "step": 1787 }, { "epoch": 105.17647058823529, "grad_norm": 3.529585123062134, "learning_rate": 1.7286555516207054e-05, "loss": 0.067, "step": 1788 }, { "epoch": 105.23529411764706, "grad_norm": 1.9328737258911133, "learning_rate": 1.7283292690064622e-05, "loss": 0.0279, "step": 1789 }, { "epoch": 105.29411764705883, "grad_norm": 1.4702056646347046, "learning_rate": 1.728002821170854e-05, "loss": 0.0251, "step": 1790 }, { "epoch": 105.3529411764706, "grad_norm": 1.6364496946334839, "learning_rate": 1.7276762081879348e-05, "loss": 0.0229, "step": 1791 }, { "epoch": 105.41176470588235, "grad_norm": 1.9690303802490234, "learning_rate": 1.727349430131797e-05, "loss": 0.05, "step": 1792 }, { "epoch": 105.47058823529412, "grad_norm": 1.5278873443603516, "learning_rate": 1.727022487076571e-05, "loss": 0.0292, "step": 1793 }, { "epoch": 105.52941176470588, "grad_norm": 3.6213974952697754, "learning_rate": 1.726695379096423e-05, "loss": 0.0281, "step": 1794 }, { "epoch": 105.58823529411765, "grad_norm": 1.2833529710769653, "learning_rate": 1.726368106265557e-05, "loss": 0.0233, "step": 1795 }, { "epoch": 105.6470588235294, "grad_norm": 2.814256429672241, "learning_rate": 1.726040668658215e-05, "loss": 0.0605, "step": 1796 }, { "epoch": 105.70588235294117, "grad_norm": 3.3879058361053467, "learning_rate": 1.725713066348677e-05, "loss": 0.0758, "step": 1797 }, { "epoch": 105.76470588235294, "grad_norm": 1.6641029119491577, "learning_rate": 1.7253852994112585e-05, "loss": 0.0677, "step": 1798 }, { "epoch": 105.82352941176471, "grad_norm": 1.6385622024536133, "learning_rate": 1.725057367920314e-05, "loss": 0.059, "step": 1799 }, { "epoch": 105.88235294117646, "grad_norm": 2.8171536922454834, "learning_rate": 1.7247292719502343e-05, "loss": 0.0874, "step": 1800 }, { "epoch": 105.94117647058823, "grad_norm": 1.2776570320129395, "learning_rate": 1.7244010115754483e-05, "loss": 0.0273, "step": 1801 }, { "epoch": 106.0, "grad_norm": 3.8084843158721924, "learning_rate": 1.7240725868704218e-05, "loss": 0.0504, "step": 1802 }, { "epoch": 106.05882352941177, "grad_norm": 1.3483179807662964, "learning_rate": 1.7237439979096577e-05, "loss": 0.057, "step": 1803 }, { "epoch": 106.11764705882354, "grad_norm": 1.6311838626861572, "learning_rate": 1.7234152447676957e-05, "loss": 0.0542, "step": 1804 }, { "epoch": 106.17647058823529, "grad_norm": 1.933472990989685, "learning_rate": 1.7230863275191145e-05, "loss": 0.072, "step": 1805 }, { "epoch": 106.23529411764706, "grad_norm": 1.3196611404418945, "learning_rate": 1.7227572462385285e-05, "loss": 0.0436, "step": 1806 }, { "epoch": 106.29411764705883, "grad_norm": 1.7855198383331299, "learning_rate": 1.7224280010005898e-05, "loss": 0.0431, "step": 1807 }, { "epoch": 106.3529411764706, "grad_norm": 1.3464804887771606, "learning_rate": 1.722098591879987e-05, "loss": 0.0553, "step": 1808 }, { "epoch": 106.41176470588235, "grad_norm": 1.2060273885726929, "learning_rate": 1.7217690189514473e-05, "loss": 0.032, "step": 1809 }, { "epoch": 106.47058823529412, "grad_norm": 2.3782289028167725, "learning_rate": 1.721439282289734e-05, "loss": 0.0406, "step": 1810 }, { "epoch": 106.52941176470588, "grad_norm": 1.0845953226089478, "learning_rate": 1.721109381969648e-05, "loss": 0.0233, "step": 1811 }, { "epoch": 106.58823529411765, "grad_norm": 0.6977896094322205, "learning_rate": 1.7207793180660265e-05, "loss": 0.0137, "step": 1812 }, { "epoch": 106.6470588235294, "grad_norm": 1.5989803075790405, "learning_rate": 1.7204490906537453e-05, "loss": 0.0305, "step": 1813 }, { "epoch": 106.70588235294117, "grad_norm": 1.6015528440475464, "learning_rate": 1.720118699807716e-05, "loss": 0.0515, "step": 1814 }, { "epoch": 106.76470588235294, "grad_norm": 0.8897048234939575, "learning_rate": 1.7197881456028873e-05, "loss": 0.0197, "step": 1815 }, { "epoch": 106.82352941176471, "grad_norm": 1.7830810546875, "learning_rate": 1.7194574281142455e-05, "loss": 0.0364, "step": 1816 }, { "epoch": 106.88235294117646, "grad_norm": 0.9030135273933411, "learning_rate": 1.7191265474168143e-05, "loss": 0.0141, "step": 1817 }, { "epoch": 106.94117647058823, "grad_norm": 1.2932827472686768, "learning_rate": 1.7187955035856537e-05, "loss": 0.0369, "step": 1818 }, { "epoch": 107.0, "grad_norm": 1.3383783102035522, "learning_rate": 1.718464296695861e-05, "loss": 0.0325, "step": 1819 }, { "epoch": 107.05882352941177, "grad_norm": 1.7680771350860596, "learning_rate": 1.7181329268225702e-05, "loss": 0.0462, "step": 1820 }, { "epoch": 107.11764705882354, "grad_norm": 1.388492226600647, "learning_rate": 1.7178013940409527e-05, "loss": 0.0381, "step": 1821 }, { "epoch": 107.17647058823529, "grad_norm": 1.5531829595565796, "learning_rate": 1.7174696984262167e-05, "loss": 0.0497, "step": 1822 }, { "epoch": 107.23529411764706, "grad_norm": 1.5385397672653198, "learning_rate": 1.717137840053607e-05, "loss": 0.0655, "step": 1823 }, { "epoch": 107.29411764705883, "grad_norm": 1.4867051839828491, "learning_rate": 1.716805818998406e-05, "loss": 0.0421, "step": 1824 }, { "epoch": 107.3529411764706, "grad_norm": 1.347693681716919, "learning_rate": 1.716473635335932e-05, "loss": 0.0269, "step": 1825 }, { "epoch": 107.41176470588235, "grad_norm": 1.5571082830429077, "learning_rate": 1.7161412891415415e-05, "loss": 0.0406, "step": 1826 }, { "epoch": 107.47058823529412, "grad_norm": 1.417953372001648, "learning_rate": 1.715808780490627e-05, "loss": 0.0448, "step": 1827 }, { "epoch": 107.52941176470588, "grad_norm": 1.4764683246612549, "learning_rate": 1.7154761094586177e-05, "loss": 0.025, "step": 1828 }, { "epoch": 107.58823529411765, "grad_norm": 1.4307478666305542, "learning_rate": 1.7151432761209798e-05, "loss": 0.0373, "step": 1829 }, { "epoch": 107.6470588235294, "grad_norm": 1.7772451639175415, "learning_rate": 1.7148102805532173e-05, "loss": 0.0476, "step": 1830 }, { "epoch": 107.70588235294117, "grad_norm": 1.116499900817871, "learning_rate": 1.7144771228308698e-05, "loss": 0.0214, "step": 1831 }, { "epoch": 107.76470588235294, "grad_norm": 1.1883256435394287, "learning_rate": 1.714143803029514e-05, "loss": 0.0242, "step": 1832 }, { "epoch": 107.82352941176471, "grad_norm": 2.917383909225464, "learning_rate": 1.7138103212247635e-05, "loss": 0.0294, "step": 1833 }, { "epoch": 107.88235294117646, "grad_norm": 1.7013219594955444, "learning_rate": 1.7134766774922682e-05, "loss": 0.0488, "step": 1834 }, { "epoch": 107.94117647058823, "grad_norm": 1.306709885597229, "learning_rate": 1.7131428719077155e-05, "loss": 0.0259, "step": 1835 }, { "epoch": 108.0, "grad_norm": 1.2228847742080688, "learning_rate": 1.7128089045468294e-05, "loss": 0.0379, "step": 1836 }, { "epoch": 108.05882352941177, "grad_norm": 2.350015640258789, "learning_rate": 1.71247477548537e-05, "loss": 0.0854, "step": 1837 }, { "epoch": 108.11764705882354, "grad_norm": 3.2885539531707764, "learning_rate": 1.712140484799134e-05, "loss": 0.069, "step": 1838 }, { "epoch": 108.17647058823529, "grad_norm": 2.239473581314087, "learning_rate": 1.7118060325639562e-05, "loss": 0.0275, "step": 1839 }, { "epoch": 108.23529411764706, "grad_norm": 1.703123688697815, "learning_rate": 1.711471418855706e-05, "loss": 0.0389, "step": 1840 }, { "epoch": 108.29411764705883, "grad_norm": 1.841270923614502, "learning_rate": 1.7111366437502913e-05, "loss": 0.0436, "step": 1841 }, { "epoch": 108.3529411764706, "grad_norm": 1.4154552221298218, "learning_rate": 1.7108017073236555e-05, "loss": 0.0464, "step": 1842 }, { "epoch": 108.41176470588235, "grad_norm": 2.5388574600219727, "learning_rate": 1.7104666096517784e-05, "loss": 0.0423, "step": 1843 }, { "epoch": 108.47058823529412, "grad_norm": 1.894707441329956, "learning_rate": 1.710131350810677e-05, "loss": 0.0248, "step": 1844 }, { "epoch": 108.52941176470588, "grad_norm": 1.621259093284607, "learning_rate": 1.7097959308764057e-05, "loss": 0.0427, "step": 1845 }, { "epoch": 108.58823529411765, "grad_norm": 1.240109920501709, "learning_rate": 1.7094603499250532e-05, "loss": 0.0165, "step": 1846 }, { "epoch": 108.6470588235294, "grad_norm": 1.6652218103408813, "learning_rate": 1.7091246080327464e-05, "loss": 0.0215, "step": 1847 }, { "epoch": 108.70588235294117, "grad_norm": 1.150543451309204, "learning_rate": 1.7087887052756485e-05, "loss": 0.024, "step": 1848 }, { "epoch": 108.76470588235294, "grad_norm": 1.5817803144454956, "learning_rate": 1.7084526417299584e-05, "loss": 0.0374, "step": 1849 }, { "epoch": 108.82352941176471, "grad_norm": 0.9261584281921387, "learning_rate": 1.7081164174719133e-05, "loss": 0.0198, "step": 1850 }, { "epoch": 108.88235294117646, "grad_norm": 1.380887508392334, "learning_rate": 1.707780032577784e-05, "loss": 0.0332, "step": 1851 }, { "epoch": 108.94117647058823, "grad_norm": 2.4387080669403076, "learning_rate": 1.7074434871238804e-05, "loss": 0.0271, "step": 1852 }, { "epoch": 109.0, "grad_norm": 2.1419615745544434, "learning_rate": 1.7071067811865477e-05, "loss": 0.0336, "step": 1853 }, { "epoch": 109.05882352941177, "grad_norm": 1.6299248933792114, "learning_rate": 1.7067699148421673e-05, "loss": 0.0628, "step": 1854 }, { "epoch": 109.11764705882354, "grad_norm": 1.425675630569458, "learning_rate": 1.7064328881671572e-05, "loss": 0.0341, "step": 1855 }, { "epoch": 109.17647058823529, "grad_norm": 1.545905590057373, "learning_rate": 1.7060957012379722e-05, "loss": 0.041, "step": 1856 }, { "epoch": 109.23529411764706, "grad_norm": 1.6110429763793945, "learning_rate": 1.705758354131103e-05, "loss": 0.0227, "step": 1857 }, { "epoch": 109.29411764705883, "grad_norm": 1.278862476348877, "learning_rate": 1.7054208469230763e-05, "loss": 0.0272, "step": 1858 }, { "epoch": 109.3529411764706, "grad_norm": 1.3011140823364258, "learning_rate": 1.7050831796904563e-05, "loss": 0.0436, "step": 1859 }, { "epoch": 109.41176470588235, "grad_norm": 1.4213054180145264, "learning_rate": 1.704745352509842e-05, "loss": 0.0262, "step": 1860 }, { "epoch": 109.47058823529412, "grad_norm": 2.4693522453308105, "learning_rate": 1.70440736545787e-05, "loss": 0.0287, "step": 1861 }, { "epoch": 109.52941176470588, "grad_norm": 1.3126081228256226, "learning_rate": 1.7040692186112125e-05, "loss": 0.0419, "step": 1862 }, { "epoch": 109.58823529411765, "grad_norm": 1.3012458086013794, "learning_rate": 1.7037309120465776e-05, "loss": 0.0427, "step": 1863 }, { "epoch": 109.6470588235294, "grad_norm": 1.4314943552017212, "learning_rate": 1.7033924458407108e-05, "loss": 0.0533, "step": 1864 }, { "epoch": 109.70588235294117, "grad_norm": 1.791007161140442, "learning_rate": 1.7030538200703925e-05, "loss": 0.0317, "step": 1865 }, { "epoch": 109.76470588235294, "grad_norm": 1.3555138111114502, "learning_rate": 1.70271503481244e-05, "loss": 0.032, "step": 1866 }, { "epoch": 109.82352941176471, "grad_norm": 1.148694634437561, "learning_rate": 1.702376090143707e-05, "loss": 0.0374, "step": 1867 }, { "epoch": 109.88235294117646, "grad_norm": 1.427013635635376, "learning_rate": 1.7020369861410826e-05, "loss": 0.0309, "step": 1868 }, { "epoch": 109.94117647058823, "grad_norm": 1.2764694690704346, "learning_rate": 1.701697722881493e-05, "loss": 0.0312, "step": 1869 }, { "epoch": 110.0, "grad_norm": 2.4029104709625244, "learning_rate": 1.7013583004418994e-05, "loss": 0.0484, "step": 1870 }, { "epoch": 110.05882352941177, "grad_norm": 1.4966351985931396, "learning_rate": 1.7010187188993003e-05, "loss": 0.0369, "step": 1871 }, { "epoch": 110.11764705882354, "grad_norm": 1.4636814594268799, "learning_rate": 1.700678978330729e-05, "loss": 0.0435, "step": 1872 }, { "epoch": 110.17647058823529, "grad_norm": 1.4753172397613525, "learning_rate": 1.7003390788132566e-05, "loss": 0.0318, "step": 1873 }, { "epoch": 110.23529411764706, "grad_norm": 1.7487084865570068, "learning_rate": 1.6999990204239883e-05, "loss": 0.0534, "step": 1874 }, { "epoch": 110.29411764705883, "grad_norm": 1.3126990795135498, "learning_rate": 1.699658803240067e-05, "loss": 0.0297, "step": 1875 }, { "epoch": 110.3529411764706, "grad_norm": 1.7651875019073486, "learning_rate": 1.6993184273386702e-05, "loss": 0.0326, "step": 1876 }, { "epoch": 110.41176470588235, "grad_norm": 1.6494605541229248, "learning_rate": 1.6989778927970128e-05, "loss": 0.0604, "step": 1877 }, { "epoch": 110.47058823529412, "grad_norm": 1.2228807210922241, "learning_rate": 1.6986371996923445e-05, "loss": 0.0374, "step": 1878 }, { "epoch": 110.52941176470588, "grad_norm": 1.5793023109436035, "learning_rate": 1.698296348101952e-05, "loss": 0.0257, "step": 1879 }, { "epoch": 110.58823529411765, "grad_norm": 2.0450546741485596, "learning_rate": 1.6979553381031565e-05, "loss": 0.0258, "step": 1880 }, { "epoch": 110.6470588235294, "grad_norm": 1.3055810928344727, "learning_rate": 1.6976141697733172e-05, "loss": 0.0255, "step": 1881 }, { "epoch": 110.70588235294117, "grad_norm": 1.0773320198059082, "learning_rate": 1.6972728431898272e-05, "loss": 0.0256, "step": 1882 }, { "epoch": 110.76470588235294, "grad_norm": 1.7697519063949585, "learning_rate": 1.6969313584301165e-05, "loss": 0.0495, "step": 1883 }, { "epoch": 110.82352941176471, "grad_norm": 0.8306576609611511, "learning_rate": 1.6965897155716512e-05, "loss": 0.0141, "step": 1884 }, { "epoch": 110.88235294117646, "grad_norm": 1.9999258518218994, "learning_rate": 1.696247914691933e-05, "loss": 0.0438, "step": 1885 }, { "epoch": 110.94117647058823, "grad_norm": 1.039754867553711, "learning_rate": 1.6959059558684988e-05, "loss": 0.0263, "step": 1886 }, { "epoch": 111.0, "grad_norm": 1.4769526720046997, "learning_rate": 1.695563839178923e-05, "loss": 0.0495, "step": 1887 }, { "epoch": 111.05882352941177, "grad_norm": 1.372680425643921, "learning_rate": 1.6952215647008134e-05, "loss": 0.0322, "step": 1888 }, { "epoch": 111.11764705882354, "grad_norm": 1.1489819288253784, "learning_rate": 1.6948791325118152e-05, "loss": 0.0186, "step": 1889 }, { "epoch": 111.17647058823529, "grad_norm": 3.7387166023254395, "learning_rate": 1.6945365426896097e-05, "loss": 0.0379, "step": 1890 }, { "epoch": 111.23529411764706, "grad_norm": 1.251656413078308, "learning_rate": 1.6941937953119127e-05, "loss": 0.0357, "step": 1891 }, { "epoch": 111.29411764705883, "grad_norm": 2.1286826133728027, "learning_rate": 1.693850890456477e-05, "loss": 0.0438, "step": 1892 }, { "epoch": 111.3529411764706, "grad_norm": 1.4303910732269287, "learning_rate": 1.69350782820109e-05, "loss": 0.0384, "step": 1893 }, { "epoch": 111.41176470588235, "grad_norm": 2.4542038440704346, "learning_rate": 1.693164608623575e-05, "loss": 0.0587, "step": 1894 }, { "epoch": 111.47058823529412, "grad_norm": 1.0714792013168335, "learning_rate": 1.6928212318017925e-05, "loss": 0.0208, "step": 1895 }, { "epoch": 111.52941176470588, "grad_norm": 1.305410623550415, "learning_rate": 1.6924776978136363e-05, "loss": 0.042, "step": 1896 }, { "epoch": 111.58823529411765, "grad_norm": 1.6022956371307373, "learning_rate": 1.6921340067370374e-05, "loss": 0.0217, "step": 1897 }, { "epoch": 111.6470588235294, "grad_norm": 1.5373294353485107, "learning_rate": 1.6917901586499625e-05, "loss": 0.0441, "step": 1898 }, { "epoch": 111.70588235294117, "grad_norm": 1.7964471578598022, "learning_rate": 1.691446153630413e-05, "loss": 0.0443, "step": 1899 }, { "epoch": 111.76470588235294, "grad_norm": 1.7142525911331177, "learning_rate": 1.6911019917564263e-05, "loss": 0.0234, "step": 1900 }, { "epoch": 111.82352941176471, "grad_norm": 1.12490975856781, "learning_rate": 1.690757673106076e-05, "loss": 0.0355, "step": 1901 }, { "epoch": 111.88235294117646, "grad_norm": 1.4231663942337036, "learning_rate": 1.69041319775747e-05, "loss": 0.0415, "step": 1902 }, { "epoch": 111.94117647058823, "grad_norm": 1.0932329893112183, "learning_rate": 1.690068565788753e-05, "loss": 0.0423, "step": 1903 }, { "epoch": 112.0, "grad_norm": 2.107599973678589, "learning_rate": 1.6897237772781046e-05, "loss": 0.0185, "step": 1904 }, { "epoch": 112.05882352941177, "grad_norm": 1.285866141319275, "learning_rate": 1.6893788323037395e-05, "loss": 0.0315, "step": 1905 }, { "epoch": 112.11764705882354, "grad_norm": 1.0072484016418457, "learning_rate": 1.6890337309439094e-05, "loss": 0.0169, "step": 1906 }, { "epoch": 112.17647058823529, "grad_norm": 1.8069868087768555, "learning_rate": 1.6886884732768994e-05, "loss": 0.0405, "step": 1907 }, { "epoch": 112.23529411764706, "grad_norm": 4.238760948181152, "learning_rate": 1.688343059381032e-05, "loss": 0.0263, "step": 1908 }, { "epoch": 112.29411764705883, "grad_norm": 1.3486976623535156, "learning_rate": 1.6879974893346642e-05, "loss": 0.0179, "step": 1909 }, { "epoch": 112.3529411764706, "grad_norm": 1.189417839050293, "learning_rate": 1.6876517632161878e-05, "loss": 0.0223, "step": 1910 }, { "epoch": 112.41176470588235, "grad_norm": 1.2608513832092285, "learning_rate": 1.6873058811040316e-05, "loss": 0.0337, "step": 1911 }, { "epoch": 112.47058823529412, "grad_norm": 1.5828896760940552, "learning_rate": 1.6869598430766584e-05, "loss": 0.0362, "step": 1912 }, { "epoch": 112.52941176470588, "grad_norm": 1.2220128774642944, "learning_rate": 1.6866136492125667e-05, "loss": 0.0184, "step": 1913 }, { "epoch": 112.58823529411765, "grad_norm": 1.7936240434646606, "learning_rate": 1.6862672995902908e-05, "loss": 0.0461, "step": 1914 }, { "epoch": 112.6470588235294, "grad_norm": 3.173383951187134, "learning_rate": 1.6859207942884e-05, "loss": 0.0388, "step": 1915 }, { "epoch": 112.70588235294117, "grad_norm": 1.927397608757019, "learning_rate": 1.685574133385499e-05, "loss": 0.0485, "step": 1916 }, { "epoch": 112.76470588235294, "grad_norm": 0.8974199891090393, "learning_rate": 1.685227316960228e-05, "loss": 0.0139, "step": 1917 }, { "epoch": 112.82352941176471, "grad_norm": 1.4054014682769775, "learning_rate": 1.6848803450912618e-05, "loss": 0.0561, "step": 1918 }, { "epoch": 112.88235294117646, "grad_norm": 0.9586604833602905, "learning_rate": 1.6845332178573106e-05, "loss": 0.0135, "step": 1919 }, { "epoch": 112.94117647058823, "grad_norm": 1.8931220769882202, "learning_rate": 1.6841859353371212e-05, "loss": 0.0777, "step": 1920 }, { "epoch": 113.0, "grad_norm": 4.6174468994140625, "learning_rate": 1.6838384976094738e-05, "loss": 0.0876, "step": 1921 }, { "epoch": 113.05882352941177, "grad_norm": 1.3928691148757935, "learning_rate": 1.683490904753185e-05, "loss": 0.0617, "step": 1922 }, { "epoch": 113.11764705882354, "grad_norm": 1.2683418989181519, "learning_rate": 1.6831431568471056e-05, "loss": 0.0287, "step": 1923 }, { "epoch": 113.17647058823529, "grad_norm": 2.234604597091675, "learning_rate": 1.682795253970123e-05, "loss": 0.033, "step": 1924 }, { "epoch": 113.23529411764706, "grad_norm": 1.2672724723815918, "learning_rate": 1.682447196201158e-05, "loss": 0.021, "step": 1925 }, { "epoch": 113.29411764705883, "grad_norm": 1.8930654525756836, "learning_rate": 1.6820989836191678e-05, "loss": 0.0424, "step": 1926 }, { "epoch": 113.3529411764706, "grad_norm": 1.651115894317627, "learning_rate": 1.6817506163031446e-05, "loss": 0.0211, "step": 1927 }, { "epoch": 113.41176470588235, "grad_norm": 2.6936774253845215, "learning_rate": 1.6814020943321156e-05, "loss": 0.049, "step": 1928 }, { "epoch": 113.47058823529412, "grad_norm": 1.253448247909546, "learning_rate": 1.6810534177851424e-05, "loss": 0.0225, "step": 1929 }, { "epoch": 113.52941176470588, "grad_norm": 2.3111422061920166, "learning_rate": 1.6807045867413224e-05, "loss": 0.0251, "step": 1930 }, { "epoch": 113.58823529411765, "grad_norm": 1.4481966495513916, "learning_rate": 1.6803556012797878e-05, "loss": 0.0389, "step": 1931 }, { "epoch": 113.6470588235294, "grad_norm": 1.539488673210144, "learning_rate": 1.6800064614797066e-05, "loss": 0.0305, "step": 1932 }, { "epoch": 113.70588235294117, "grad_norm": 1.825221061706543, "learning_rate": 1.6796571674202798e-05, "loss": 0.0473, "step": 1933 }, { "epoch": 113.76470588235294, "grad_norm": 2.4214634895324707, "learning_rate": 1.6793077191807457e-05, "loss": 0.0283, "step": 1934 }, { "epoch": 113.82352941176471, "grad_norm": 1.75234055519104, "learning_rate": 1.6789581168403764e-05, "loss": 0.0626, "step": 1935 }, { "epoch": 113.88235294117646, "grad_norm": 1.9656918048858643, "learning_rate": 1.678608360478479e-05, "loss": 0.0463, "step": 1936 }, { "epoch": 113.94117647058823, "grad_norm": 1.7907650470733643, "learning_rate": 1.6782584501743958e-05, "loss": 0.0339, "step": 1937 }, { "epoch": 114.0, "grad_norm": 2.0593016147613525, "learning_rate": 1.6779083860075032e-05, "loss": 0.0872, "step": 1938 }, { "epoch": 114.05882352941177, "grad_norm": 0.8059966564178467, "learning_rate": 1.6775581680572142e-05, "loss": 0.0182, "step": 1939 }, { "epoch": 114.11764705882354, "grad_norm": 1.6181343793869019, "learning_rate": 1.677207796402975e-05, "loss": 0.0445, "step": 1940 }, { "epoch": 114.17647058823529, "grad_norm": 0.8564545512199402, "learning_rate": 1.676857271124268e-05, "loss": 0.0178, "step": 1941 }, { "epoch": 114.23529411764706, "grad_norm": 1.2127621173858643, "learning_rate": 1.676506592300609e-05, "loss": 0.0313, "step": 1942 }, { "epoch": 114.29411764705883, "grad_norm": 1.883644938468933, "learning_rate": 1.67615576001155e-05, "loss": 0.0567, "step": 1943 }, { "epoch": 114.3529411764706, "grad_norm": 1.6380833387374878, "learning_rate": 1.675804774336677e-05, "loss": 0.0371, "step": 1944 }, { "epoch": 114.41176470588235, "grad_norm": 0.7252531051635742, "learning_rate": 1.675453635355611e-05, "loss": 0.014, "step": 1945 }, { "epoch": 114.47058823529412, "grad_norm": 1.2965518236160278, "learning_rate": 1.6751023431480076e-05, "loss": 0.0204, "step": 1946 }, { "epoch": 114.52941176470588, "grad_norm": 2.291574716567993, "learning_rate": 1.674750897793558e-05, "loss": 0.0291, "step": 1947 }, { "epoch": 114.58823529411765, "grad_norm": 1.3028630018234253, "learning_rate": 1.6743992993719876e-05, "loss": 0.0245, "step": 1948 }, { "epoch": 114.6470588235294, "grad_norm": 2.463622570037842, "learning_rate": 1.6740475479630558e-05, "loss": 0.0668, "step": 1949 }, { "epoch": 114.70588235294117, "grad_norm": 1.47983980178833, "learning_rate": 1.6736956436465573e-05, "loss": 0.0508, "step": 1950 }, { "epoch": 114.76470588235294, "grad_norm": 1.4185764789581299, "learning_rate": 1.673343586502322e-05, "loss": 0.0383, "step": 1951 }, { "epoch": 114.82352941176471, "grad_norm": 2.3848962783813477, "learning_rate": 1.672991376610214e-05, "loss": 0.0561, "step": 1952 }, { "epoch": 114.88235294117646, "grad_norm": 1.9290779829025269, "learning_rate": 1.6726390140501315e-05, "loss": 0.0306, "step": 1953 }, { "epoch": 114.94117647058823, "grad_norm": 1.6111494302749634, "learning_rate": 1.6722864989020084e-05, "loss": 0.0225, "step": 1954 }, { "epoch": 115.0, "grad_norm": 2.1487693786621094, "learning_rate": 1.6719338312458123e-05, "loss": 0.0319, "step": 1955 }, { "epoch": 115.05882352941177, "grad_norm": 1.1556073427200317, "learning_rate": 1.6715810111615467e-05, "loss": 0.0405, "step": 1956 }, { "epoch": 115.11764705882354, "grad_norm": 0.9284799695014954, "learning_rate": 1.6712280387292477e-05, "loss": 0.0359, "step": 1957 }, { "epoch": 115.17647058823529, "grad_norm": 1.3754953145980835, "learning_rate": 1.670874914028987e-05, "loss": 0.0243, "step": 1958 }, { "epoch": 115.23529411764706, "grad_norm": 1.449352502822876, "learning_rate": 1.6705216371408722e-05, "loss": 0.0346, "step": 1959 }, { "epoch": 115.29411764705883, "grad_norm": 1.1836233139038086, "learning_rate": 1.6701682081450428e-05, "loss": 0.024, "step": 1960 }, { "epoch": 115.3529411764706, "grad_norm": 1.2714707851409912, "learning_rate": 1.6698146271216746e-05, "loss": 0.0329, "step": 1961 }, { "epoch": 115.41176470588235, "grad_norm": 1.5757834911346436, "learning_rate": 1.6694608941509772e-05, "loss": 0.0349, "step": 1962 }, { "epoch": 115.47058823529412, "grad_norm": 1.431269645690918, "learning_rate": 1.669107009313195e-05, "loss": 0.0208, "step": 1963 }, { "epoch": 115.52941176470588, "grad_norm": 0.8197975754737854, "learning_rate": 1.668752972688607e-05, "loss": 0.0191, "step": 1964 }, { "epoch": 115.58823529411765, "grad_norm": 1.4117162227630615, "learning_rate": 1.6683987843575256e-05, "loss": 0.0344, "step": 1965 }, { "epoch": 115.6470588235294, "grad_norm": 1.199059009552002, "learning_rate": 1.6680444444002993e-05, "loss": 0.0239, "step": 1966 }, { "epoch": 115.70588235294117, "grad_norm": 1.7859100103378296, "learning_rate": 1.6676899528973092e-05, "loss": 0.0484, "step": 1967 }, { "epoch": 115.76470588235294, "grad_norm": 1.3589597940444946, "learning_rate": 1.667335309928972e-05, "loss": 0.0218, "step": 1968 }, { "epoch": 115.82352941176471, "grad_norm": 1.161289930343628, "learning_rate": 1.6669805155757383e-05, "loss": 0.029, "step": 1969 }, { "epoch": 115.88235294117646, "grad_norm": 1.3632709980010986, "learning_rate": 1.6666255699180934e-05, "loss": 0.0661, "step": 1970 }, { "epoch": 115.94117647058823, "grad_norm": 1.6310505867004395, "learning_rate": 1.6662704730365566e-05, "loss": 0.0306, "step": 1971 }, { "epoch": 116.0, "grad_norm": 1.5007784366607666, "learning_rate": 1.665915225011681e-05, "loss": 0.0444, "step": 1972 }, { "epoch": 116.05882352941177, "grad_norm": 1.0866090059280396, "learning_rate": 1.6655598259240554e-05, "loss": 0.0351, "step": 1973 }, { "epoch": 116.11764705882354, "grad_norm": 0.8531699776649475, "learning_rate": 1.665204275854302e-05, "loss": 0.0168, "step": 1974 }, { "epoch": 116.17647058823529, "grad_norm": 1.0403492450714111, "learning_rate": 1.664848574883076e-05, "loss": 0.0187, "step": 1975 }, { "epoch": 116.23529411764706, "grad_norm": 1.0737428665161133, "learning_rate": 1.6644927230910693e-05, "loss": 0.0231, "step": 1976 }, { "epoch": 116.29411764705883, "grad_norm": 1.6412461996078491, "learning_rate": 1.6641367205590068e-05, "loss": 0.0345, "step": 1977 }, { "epoch": 116.3529411764706, "grad_norm": 1.1288697719573975, "learning_rate": 1.6637805673676473e-05, "loss": 0.0387, "step": 1978 }, { "epoch": 116.41176470588235, "grad_norm": 1.3349711894989014, "learning_rate": 1.6634242635977838e-05, "loss": 0.0375, "step": 1979 }, { "epoch": 116.47058823529412, "grad_norm": 1.3923213481903076, "learning_rate": 1.663067809330244e-05, "loss": 0.0275, "step": 1980 }, { "epoch": 116.52941176470588, "grad_norm": 1.1393109560012817, "learning_rate": 1.6627112046458898e-05, "loss": 0.031, "step": 1981 }, { "epoch": 116.58823529411765, "grad_norm": 1.139495849609375, "learning_rate": 1.6623544496256165e-05, "loss": 0.0325, "step": 1982 }, { "epoch": 116.6470588235294, "grad_norm": 1.5662249326705933, "learning_rate": 1.6619975443503543e-05, "loss": 0.0227, "step": 1983 }, { "epoch": 116.70588235294117, "grad_norm": 1.2490311861038208, "learning_rate": 1.6616404889010667e-05, "loss": 0.0433, "step": 1984 }, { "epoch": 116.76470588235294, "grad_norm": 2.6921579837799072, "learning_rate": 1.6612832833587518e-05, "loss": 0.0261, "step": 1985 }, { "epoch": 116.82352941176471, "grad_norm": 0.990930438041687, "learning_rate": 1.6609259278044418e-05, "loss": 0.019, "step": 1986 }, { "epoch": 116.88235294117646, "grad_norm": 1.0411674976348877, "learning_rate": 1.660568422319202e-05, "loss": 0.0324, "step": 1987 }, { "epoch": 116.94117647058823, "grad_norm": 1.3400804996490479, "learning_rate": 1.660210766984134e-05, "loss": 0.0314, "step": 1988 }, { "epoch": 117.0, "grad_norm": 1.3053758144378662, "learning_rate": 1.65985296188037e-05, "loss": 0.048, "step": 1989 }, { "epoch": 117.05882352941177, "grad_norm": 1.5710784196853638, "learning_rate": 1.6594950070890796e-05, "loss": 0.0684, "step": 1990 }, { "epoch": 117.11764705882354, "grad_norm": 2.6421356201171875, "learning_rate": 1.6591369026914635e-05, "loss": 0.0409, "step": 1991 }, { "epoch": 117.17647058823529, "grad_norm": 1.1326793432235718, "learning_rate": 1.6587786487687584e-05, "loss": 0.0368, "step": 1992 }, { "epoch": 117.23529411764706, "grad_norm": 1.277936577796936, "learning_rate": 1.6584202454022343e-05, "loss": 0.0179, "step": 1993 }, { "epoch": 117.29411764705883, "grad_norm": 1.5390111207962036, "learning_rate": 1.658061692673194e-05, "loss": 0.0347, "step": 1994 }, { "epoch": 117.3529411764706, "grad_norm": 1.3963571786880493, "learning_rate": 1.6577029906629762e-05, "loss": 0.0232, "step": 1995 }, { "epoch": 117.41176470588235, "grad_norm": 1.6564613580703735, "learning_rate": 1.6573441394529517e-05, "loss": 0.0375, "step": 1996 }, { "epoch": 117.47058823529412, "grad_norm": 0.896558940410614, "learning_rate": 1.6569851391245258e-05, "loss": 0.0155, "step": 1997 }, { "epoch": 117.52941176470588, "grad_norm": 1.1559416055679321, "learning_rate": 1.6566259897591384e-05, "loss": 0.0405, "step": 1998 }, { "epoch": 117.58823529411765, "grad_norm": 0.8324143886566162, "learning_rate": 1.6562666914382617e-05, "loss": 0.0164, "step": 1999 }, { "epoch": 117.6470588235294, "grad_norm": 1.1319090127944946, "learning_rate": 1.6559072442434025e-05, "loss": 0.0213, "step": 2000 }, { "epoch": 117.70588235294117, "grad_norm": 1.402652621269226, "learning_rate": 1.655547648256102e-05, "loss": 0.028, "step": 2001 }, { "epoch": 117.76470588235294, "grad_norm": 1.2524513006210327, "learning_rate": 1.655187903557934e-05, "loss": 0.0303, "step": 2002 }, { "epoch": 117.82352941176471, "grad_norm": 1.0550717115402222, "learning_rate": 1.6548280102305064e-05, "loss": 0.037, "step": 2003 }, { "epoch": 117.88235294117646, "grad_norm": 1.387695550918579, "learning_rate": 1.654467968355461e-05, "loss": 0.0192, "step": 2004 }, { "epoch": 117.94117647058823, "grad_norm": 1.50234854221344, "learning_rate": 1.6541077780144734e-05, "loss": 0.0363, "step": 2005 }, { "epoch": 118.0, "grad_norm": 1.3010427951812744, "learning_rate": 1.6537474392892527e-05, "loss": 0.0406, "step": 2006 }, { "epoch": 118.05882352941177, "grad_norm": 1.9430323839187622, "learning_rate": 1.6533869522615415e-05, "loss": 0.0163, "step": 2007 }, { "epoch": 118.11764705882354, "grad_norm": 0.9426420331001282, "learning_rate": 1.6530263170131164e-05, "loss": 0.0232, "step": 2008 }, { "epoch": 118.17647058823529, "grad_norm": 1.321579933166504, "learning_rate": 1.6526655336257876e-05, "loss": 0.0174, "step": 2009 }, { "epoch": 118.23529411764706, "grad_norm": 0.9397330284118652, "learning_rate": 1.6523046021813982e-05, "loss": 0.0302, "step": 2010 }, { "epoch": 118.29411764705883, "grad_norm": 0.9611418843269348, "learning_rate": 1.651943522761826e-05, "loss": 0.0207, "step": 2011 }, { "epoch": 118.3529411764706, "grad_norm": 1.8225295543670654, "learning_rate": 1.6515822954489816e-05, "loss": 0.0659, "step": 2012 }, { "epoch": 118.41176470588235, "grad_norm": 1.868554949760437, "learning_rate": 1.6512209203248092e-05, "loss": 0.0295, "step": 2013 }, { "epoch": 118.47058823529412, "grad_norm": 1.1025341749191284, "learning_rate": 1.6508593974712868e-05, "loss": 0.0277, "step": 2014 }, { "epoch": 118.52941176470588, "grad_norm": 1.086158037185669, "learning_rate": 1.6504977269704258e-05, "loss": 0.0381, "step": 2015 }, { "epoch": 118.58823529411765, "grad_norm": 1.8653781414031982, "learning_rate": 1.650135908904271e-05, "loss": 0.0304, "step": 2016 }, { "epoch": 118.6470588235294, "grad_norm": 1.029072642326355, "learning_rate": 1.6497739433549013e-05, "loss": 0.0167, "step": 2017 }, { "epoch": 118.70588235294117, "grad_norm": 2.2411272525787354, "learning_rate": 1.6494118304044277e-05, "loss": 0.0416, "step": 2018 }, { "epoch": 118.76470588235294, "grad_norm": 1.185510277748108, "learning_rate": 1.649049570134996e-05, "loss": 0.0215, "step": 2019 }, { "epoch": 118.82352941176471, "grad_norm": 1.439656376838684, "learning_rate": 1.6486871626287844e-05, "loss": 0.0386, "step": 2020 }, { "epoch": 118.88235294117646, "grad_norm": 1.0343669652938843, "learning_rate": 1.6483246079680055e-05, "loss": 0.0175, "step": 2021 }, { "epoch": 118.94117647058823, "grad_norm": 1.2800501585006714, "learning_rate": 1.6479619062349048e-05, "loss": 0.0219, "step": 2022 }, { "epoch": 119.0, "grad_norm": 1.2035579681396484, "learning_rate": 1.6475990575117603e-05, "loss": 0.0449, "step": 2023 }, { "epoch": 119.05882352941177, "grad_norm": 0.9439592957496643, "learning_rate": 1.647236061880885e-05, "loss": 0.0175, "step": 2024 }, { "epoch": 119.11764705882354, "grad_norm": 0.9799657464027405, "learning_rate": 1.6468729194246244e-05, "loss": 0.0183, "step": 2025 }, { "epoch": 119.17647058823529, "grad_norm": 1.614474892616272, "learning_rate": 1.6465096302253565e-05, "loss": 0.0185, "step": 2026 }, { "epoch": 119.23529411764706, "grad_norm": 1.8540197610855103, "learning_rate": 1.6461461943654942e-05, "loss": 0.0326, "step": 2027 }, { "epoch": 119.29411764705883, "grad_norm": 1.9675071239471436, "learning_rate": 1.6457826119274827e-05, "loss": 0.0394, "step": 2028 }, { "epoch": 119.3529411764706, "grad_norm": 1.2668298482894897, "learning_rate": 1.6454188829938006e-05, "loss": 0.0339, "step": 2029 }, { "epoch": 119.41176470588235, "grad_norm": 1.032975196838379, "learning_rate": 1.6450550076469594e-05, "loss": 0.0131, "step": 2030 }, { "epoch": 119.47058823529412, "grad_norm": 1.2147698402404785, "learning_rate": 1.6446909859695044e-05, "loss": 0.0344, "step": 2031 }, { "epoch": 119.52941176470588, "grad_norm": 1.327880620956421, "learning_rate": 1.644326818044014e-05, "loss": 0.0244, "step": 2032 }, { "epoch": 119.58823529411765, "grad_norm": 0.8889104723930359, "learning_rate": 1.643962503953099e-05, "loss": 0.0228, "step": 2033 }, { "epoch": 119.6470588235294, "grad_norm": 0.70361328125, "learning_rate": 1.643598043779405e-05, "loss": 0.0157, "step": 2034 }, { "epoch": 119.70588235294117, "grad_norm": 1.4766441583633423, "learning_rate": 1.643233437605609e-05, "loss": 0.0572, "step": 2035 }, { "epoch": 119.76470588235294, "grad_norm": 1.5108286142349243, "learning_rate": 1.6428686855144222e-05, "loss": 0.0353, "step": 2036 }, { "epoch": 119.82352941176471, "grad_norm": 0.7618154883384705, "learning_rate": 1.6425037875885882e-05, "loss": 0.0174, "step": 2037 }, { "epoch": 119.88235294117646, "grad_norm": 0.8423704504966736, "learning_rate": 1.6421387439108845e-05, "loss": 0.029, "step": 2038 }, { "epoch": 119.94117647058823, "grad_norm": 2.9853498935699463, "learning_rate": 1.6417735545641206e-05, "loss": 0.0588, "step": 2039 }, { "epoch": 120.0, "grad_norm": 1.3551998138427734, "learning_rate": 1.6414082196311402e-05, "loss": 0.0219, "step": 2040 }, { "epoch": 120.05882352941177, "grad_norm": 0.5134773254394531, "learning_rate": 1.6410427391948188e-05, "loss": 0.0106, "step": 2041 }, { "epoch": 120.11764705882354, "grad_norm": 0.8803375959396362, "learning_rate": 1.6406771133380663e-05, "loss": 0.0196, "step": 2042 }, { "epoch": 120.17647058823529, "grad_norm": 1.5165902376174927, "learning_rate": 1.6403113421438246e-05, "loss": 0.0274, "step": 2043 }, { "epoch": 120.23529411764706, "grad_norm": 1.2954515218734741, "learning_rate": 1.6399454256950686e-05, "loss": 0.0375, "step": 2044 }, { "epoch": 120.29411764705883, "grad_norm": 0.9533706903457642, "learning_rate": 1.6395793640748065e-05, "loss": 0.0145, "step": 2045 }, { "epoch": 120.3529411764706, "grad_norm": 1.2363831996917725, "learning_rate": 1.6392131573660795e-05, "loss": 0.0474, "step": 2046 }, { "epoch": 120.41176470588235, "grad_norm": 1.6459544897079468, "learning_rate": 1.638846805651961e-05, "loss": 0.0219, "step": 2047 }, { "epoch": 120.47058823529412, "grad_norm": 0.7514832019805908, "learning_rate": 1.638480309015559e-05, "loss": 0.0133, "step": 2048 }, { "epoch": 120.52941176470588, "grad_norm": 0.7563949823379517, "learning_rate": 1.638113667540012e-05, "loss": 0.011, "step": 2049 }, { "epoch": 120.58823529411765, "grad_norm": 3.3088626861572266, "learning_rate": 1.637746881308493e-05, "loss": 0.0254, "step": 2050 }, { "epoch": 120.6470588235294, "grad_norm": 1.570794939994812, "learning_rate": 1.6373799504042073e-05, "loss": 0.0657, "step": 2051 }, { "epoch": 120.70588235294117, "grad_norm": 1.2547242641448975, "learning_rate": 1.6370128749103932e-05, "loss": 0.036, "step": 2052 }, { "epoch": 120.76470588235294, "grad_norm": 1.377021312713623, "learning_rate": 1.636645654910322e-05, "loss": 0.0261, "step": 2053 }, { "epoch": 120.82352941176471, "grad_norm": 1.4459011554718018, "learning_rate": 1.6362782904872964e-05, "loss": 0.0505, "step": 2054 }, { "epoch": 120.88235294117646, "grad_norm": 1.380276083946228, "learning_rate": 1.6359107817246546e-05, "loss": 0.0288, "step": 2055 }, { "epoch": 120.94117647058823, "grad_norm": 1.0040935277938843, "learning_rate": 1.6355431287057644e-05, "loss": 0.0199, "step": 2056 }, { "epoch": 121.0, "grad_norm": 1.2613792419433594, "learning_rate": 1.6351753315140285e-05, "loss": 0.0307, "step": 2057 }, { "epoch": 121.05882352941177, "grad_norm": 2.3106942176818848, "learning_rate": 1.634807390232882e-05, "loss": 0.0459, "step": 2058 }, { "epoch": 121.11764705882354, "grad_norm": 1.2070554494857788, "learning_rate": 1.6344393049457915e-05, "loss": 0.0248, "step": 2059 }, { "epoch": 121.17647058823529, "grad_norm": 1.666526198387146, "learning_rate": 1.6340710757362575e-05, "loss": 0.0294, "step": 2060 }, { "epoch": 121.23529411764706, "grad_norm": 1.3586173057556152, "learning_rate": 1.633702702687813e-05, "loss": 0.0337, "step": 2061 }, { "epoch": 121.29411764705883, "grad_norm": 1.8812639713287354, "learning_rate": 1.6333341858840228e-05, "loss": 0.0328, "step": 2062 }, { "epoch": 121.3529411764706, "grad_norm": 0.9515507221221924, "learning_rate": 1.6329655254084853e-05, "loss": 0.0276, "step": 2063 }, { "epoch": 121.41176470588235, "grad_norm": 1.211322546005249, "learning_rate": 1.6325967213448307e-05, "loss": 0.03, "step": 2064 }, { "epoch": 121.47058823529412, "grad_norm": 1.576292634010315, "learning_rate": 1.6322277737767223e-05, "loss": 0.0299, "step": 2065 }, { "epoch": 121.52941176470588, "grad_norm": 2.9462037086486816, "learning_rate": 1.631858682787856e-05, "loss": 0.0164, "step": 2066 }, { "epoch": 121.58823529411765, "grad_norm": 1.0335215330123901, "learning_rate": 1.6314894484619596e-05, "loss": 0.0169, "step": 2067 }, { "epoch": 121.6470588235294, "grad_norm": 1.4415076971054077, "learning_rate": 1.6311200708827942e-05, "loss": 0.021, "step": 2068 }, { "epoch": 121.70588235294117, "grad_norm": 1.648146390914917, "learning_rate": 1.630750550134153e-05, "loss": 0.0416, "step": 2069 }, { "epoch": 121.76470588235294, "grad_norm": 1.1855179071426392, "learning_rate": 1.6303808862998617e-05, "loss": 0.0361, "step": 2070 }, { "epoch": 121.82352941176471, "grad_norm": 0.9974439740180969, "learning_rate": 1.6300110794637784e-05, "loss": 0.0197, "step": 2071 }, { "epoch": 121.88235294117646, "grad_norm": 0.8773073554039001, "learning_rate": 1.6296411297097937e-05, "loss": 0.0148, "step": 2072 }, { "epoch": 121.94117647058823, "grad_norm": 1.0646758079528809, "learning_rate": 1.6292710371218306e-05, "loss": 0.0386, "step": 2073 }, { "epoch": 122.0, "grad_norm": 1.4986156225204468, "learning_rate": 1.6289008017838447e-05, "loss": 0.0336, "step": 2074 }, { "epoch": 122.05882352941177, "grad_norm": 1.7823584079742432, "learning_rate": 1.6285304237798238e-05, "loss": 0.0173, "step": 2075 }, { "epoch": 122.11764705882354, "grad_norm": 2.05639386177063, "learning_rate": 1.628159903193788e-05, "loss": 0.0248, "step": 2076 }, { "epoch": 122.17647058823529, "grad_norm": 1.5956860780715942, "learning_rate": 1.6277892401097894e-05, "loss": 0.0661, "step": 2077 }, { "epoch": 122.23529411764706, "grad_norm": 1.3117560148239136, "learning_rate": 1.6274184346119137e-05, "loss": 0.0347, "step": 2078 }, { "epoch": 122.29411764705883, "grad_norm": 1.5625598430633545, "learning_rate": 1.6270474867842777e-05, "loss": 0.0246, "step": 2079 }, { "epoch": 122.3529411764706, "grad_norm": 0.6774130463600159, "learning_rate": 1.6266763967110307e-05, "loss": 0.0135, "step": 2080 }, { "epoch": 122.41176470588235, "grad_norm": 1.8211573362350464, "learning_rate": 1.6263051644763542e-05, "loss": 0.0483, "step": 2081 }, { "epoch": 122.47058823529412, "grad_norm": 1.949668049812317, "learning_rate": 1.625933790164463e-05, "loss": 0.0322, "step": 2082 }, { "epoch": 122.52941176470588, "grad_norm": 1.9390064477920532, "learning_rate": 1.6255622738596027e-05, "loss": 0.0309, "step": 2083 }, { "epoch": 122.58823529411765, "grad_norm": 1.2997509241104126, "learning_rate": 1.6251906156460517e-05, "loss": 0.0211, "step": 2084 }, { "epoch": 122.6470588235294, "grad_norm": 1.542000651359558, "learning_rate": 1.6248188156081205e-05, "loss": 0.0256, "step": 2085 }, { "epoch": 122.70588235294117, "grad_norm": 1.107702374458313, "learning_rate": 1.6244468738301523e-05, "loss": 0.0248, "step": 2086 }, { "epoch": 122.76470588235294, "grad_norm": 1.1634700298309326, "learning_rate": 1.6240747903965214e-05, "loss": 0.0344, "step": 2087 }, { "epoch": 122.82352941176471, "grad_norm": 1.2444396018981934, "learning_rate": 1.623702565391636e-05, "loss": 0.0163, "step": 2088 }, { "epoch": 122.88235294117646, "grad_norm": 1.5261375904083252, "learning_rate": 1.623330198899934e-05, "loss": 0.0328, "step": 2089 }, { "epoch": 122.94117647058823, "grad_norm": 1.0432908535003662, "learning_rate": 1.6229576910058872e-05, "loss": 0.013, "step": 2090 }, { "epoch": 123.0, "grad_norm": 1.0409353971481323, "learning_rate": 1.622585041793999e-05, "loss": 0.0178, "step": 2091 }, { "epoch": 123.05882352941177, "grad_norm": 1.1955652236938477, "learning_rate": 1.6222122513488048e-05, "loss": 0.0168, "step": 2092 }, { "epoch": 123.11764705882354, "grad_norm": 1.217842936515808, "learning_rate": 1.6218393197548722e-05, "loss": 0.0301, "step": 2093 }, { "epoch": 123.17647058823529, "grad_norm": 1.7670010328292847, "learning_rate": 1.6214662470968003e-05, "loss": 0.0247, "step": 2094 }, { "epoch": 123.23529411764706, "grad_norm": 0.9974545836448669, "learning_rate": 1.6210930334592207e-05, "loss": 0.0143, "step": 2095 }, { "epoch": 123.29411764705883, "grad_norm": 1.5324127674102783, "learning_rate": 1.620719678926797e-05, "loss": 0.0442, "step": 2096 }, { "epoch": 123.3529411764706, "grad_norm": 1.587709665298462, "learning_rate": 1.620346183584224e-05, "loss": 0.0515, "step": 2097 }, { "epoch": 123.41176470588235, "grad_norm": 1.7993539571762085, "learning_rate": 1.61997254751623e-05, "loss": 0.0421, "step": 2098 }, { "epoch": 123.47058823529412, "grad_norm": 1.684699535369873, "learning_rate": 1.619598770807574e-05, "loss": 0.0136, "step": 2099 }, { "epoch": 123.52941176470588, "grad_norm": 0.9327067136764526, "learning_rate": 1.6192248535430467e-05, "loss": 0.0181, "step": 2100 }, { "epoch": 123.58823529411765, "grad_norm": 0.8741617202758789, "learning_rate": 1.6188507958074716e-05, "loss": 0.0187, "step": 2101 }, { "epoch": 123.6470588235294, "grad_norm": 2.1743578910827637, "learning_rate": 1.6184765976857032e-05, "loss": 0.0225, "step": 2102 }, { "epoch": 123.70588235294117, "grad_norm": 1.7998713254928589, "learning_rate": 1.6181022592626286e-05, "loss": 0.0233, "step": 2103 }, { "epoch": 123.76470588235294, "grad_norm": 1.785646915435791, "learning_rate": 1.617727780623167e-05, "loss": 0.0203, "step": 2104 }, { "epoch": 123.82352941176471, "grad_norm": 1.4046939611434937, "learning_rate": 1.6173531618522677e-05, "loss": 0.0415, "step": 2105 }, { "epoch": 123.88235294117646, "grad_norm": 1.1642076969146729, "learning_rate": 1.6169784030349135e-05, "loss": 0.0225, "step": 2106 }, { "epoch": 123.94117647058823, "grad_norm": 1.0021806955337524, "learning_rate": 1.6166035042561183e-05, "loss": 0.0172, "step": 2107 }, { "epoch": 124.0, "grad_norm": 1.348222017288208, "learning_rate": 1.6162284656009276e-05, "loss": 0.0565, "step": 2108 }, { "epoch": 124.05882352941177, "grad_norm": 1.2895303964614868, "learning_rate": 1.615853287154419e-05, "loss": 0.0312, "step": 2109 }, { "epoch": 124.11764705882354, "grad_norm": 1.4962955713272095, "learning_rate": 1.615477969001702e-05, "loss": 0.0298, "step": 2110 }, { "epoch": 124.17647058823529, "grad_norm": 1.1473286151885986, "learning_rate": 1.6151025112279167e-05, "loss": 0.023, "step": 2111 }, { "epoch": 124.23529411764706, "grad_norm": 1.4957242012023926, "learning_rate": 1.6147269139182366e-05, "loss": 0.0316, "step": 2112 }, { "epoch": 124.29411764705883, "grad_norm": 1.2349895238876343, "learning_rate": 1.614351177157865e-05, "loss": 0.0181, "step": 2113 }, { "epoch": 124.3529411764706, "grad_norm": 1.2415951490402222, "learning_rate": 1.6139753010320384e-05, "loss": 0.0383, "step": 2114 }, { "epoch": 124.41176470588235, "grad_norm": 1.0999735593795776, "learning_rate": 1.6135992856260237e-05, "loss": 0.0253, "step": 2115 }, { "epoch": 124.47058823529412, "grad_norm": 1.1173501014709473, "learning_rate": 1.61322313102512e-05, "loss": 0.0179, "step": 2116 }, { "epoch": 124.52941176470588, "grad_norm": 1.3451766967773438, "learning_rate": 1.6128468373146584e-05, "loss": 0.0364, "step": 2117 }, { "epoch": 124.58823529411765, "grad_norm": 1.0730403661727905, "learning_rate": 1.6124704045800002e-05, "loss": 0.026, "step": 2118 }, { "epoch": 124.6470588235294, "grad_norm": 1.0121442079544067, "learning_rate": 1.6120938329065402e-05, "loss": 0.012, "step": 2119 }, { "epoch": 124.70588235294117, "grad_norm": 0.9187469482421875, "learning_rate": 1.6117171223797027e-05, "loss": 0.0168, "step": 2120 }, { "epoch": 124.76470588235294, "grad_norm": 1.3190672397613525, "learning_rate": 1.611340273084945e-05, "loss": 0.0493, "step": 2121 }, { "epoch": 124.82352941176471, "grad_norm": 1.4823434352874756, "learning_rate": 1.6109632851077546e-05, "loss": 0.044, "step": 2122 }, { "epoch": 124.88235294117646, "grad_norm": 1.1590824127197266, "learning_rate": 1.6105861585336522e-05, "loss": 0.015, "step": 2123 }, { "epoch": 124.94117647058823, "grad_norm": 1.8767305612564087, "learning_rate": 1.6102088934481882e-05, "loss": 0.0187, "step": 2124 }, { "epoch": 125.0, "grad_norm": 1.1800004243850708, "learning_rate": 1.6098314899369446e-05, "loss": 0.0331, "step": 2125 }, { "epoch": 125.05882352941177, "grad_norm": 1.1615185737609863, "learning_rate": 1.6094539480855366e-05, "loss": 0.0412, "step": 2126 }, { "epoch": 125.11764705882354, "grad_norm": 1.1093937158584595, "learning_rate": 1.6090762679796085e-05, "loss": 0.0382, "step": 2127 }, { "epoch": 125.17647058823529, "grad_norm": 1.445124626159668, "learning_rate": 1.6086984497048373e-05, "loss": 0.0345, "step": 2128 }, { "epoch": 125.23529411764706, "grad_norm": 0.869393527507782, "learning_rate": 1.608320493346931e-05, "loss": 0.0184, "step": 2129 }, { "epoch": 125.29411764705883, "grad_norm": 1.6264824867248535, "learning_rate": 1.607942398991629e-05, "loss": 0.0163, "step": 2130 }, { "epoch": 125.3529411764706, "grad_norm": 0.898322582244873, "learning_rate": 1.6075641667247017e-05, "loss": 0.0156, "step": 2131 }, { "epoch": 125.41176470588235, "grad_norm": 1.1587742567062378, "learning_rate": 1.6071857966319517e-05, "loss": 0.0182, "step": 2132 }, { "epoch": 125.47058823529412, "grad_norm": 1.2784496545791626, "learning_rate": 1.6068072887992113e-05, "loss": 0.0171, "step": 2133 }, { "epoch": 125.52941176470588, "grad_norm": 1.4171849489212036, "learning_rate": 1.606428643312345e-05, "loss": 0.0299, "step": 2134 }, { "epoch": 125.58823529411765, "grad_norm": 1.1385037899017334, "learning_rate": 1.6060498602572494e-05, "loss": 0.045, "step": 2135 }, { "epoch": 125.6470588235294, "grad_norm": 0.9899119138717651, "learning_rate": 1.6056709397198506e-05, "loss": 0.0164, "step": 2136 }, { "epoch": 125.70588235294117, "grad_norm": 1.3100498914718628, "learning_rate": 1.6052918817861064e-05, "loss": 0.0238, "step": 2137 }, { "epoch": 125.76470588235294, "grad_norm": 1.4448647499084473, "learning_rate": 1.6049126865420067e-05, "loss": 0.0221, "step": 2138 }, { "epoch": 125.82352941176471, "grad_norm": 1.2700223922729492, "learning_rate": 1.604533354073572e-05, "loss": 0.0234, "step": 2139 }, { "epoch": 125.88235294117646, "grad_norm": 0.6932899355888367, "learning_rate": 1.604153884466853e-05, "loss": 0.0163, "step": 2140 }, { "epoch": 125.94117647058823, "grad_norm": 1.4670360088348389, "learning_rate": 1.6037742778079324e-05, "loss": 0.0419, "step": 2141 }, { "epoch": 126.0, "grad_norm": 1.209381103515625, "learning_rate": 1.603394534182925e-05, "loss": 0.0181, "step": 2142 }, { "epoch": 126.05882352941177, "grad_norm": 0.7510047554969788, "learning_rate": 1.603014653677974e-05, "loss": 0.025, "step": 2143 }, { "epoch": 126.11764705882354, "grad_norm": 0.7128726243972778, "learning_rate": 1.6026346363792565e-05, "loss": 0.0169, "step": 2144 }, { "epoch": 126.17647058823529, "grad_norm": 1.1940373182296753, "learning_rate": 1.602254482372979e-05, "loss": 0.0428, "step": 2145 }, { "epoch": 126.23529411764706, "grad_norm": 1.6266348361968994, "learning_rate": 1.6018741917453788e-05, "loss": 0.0187, "step": 2146 }, { "epoch": 126.29411764705883, "grad_norm": 1.380407452583313, "learning_rate": 1.6014937645827253e-05, "loss": 0.0352, "step": 2147 }, { "epoch": 126.3529411764706, "grad_norm": 0.9491796493530273, "learning_rate": 1.6011132009713185e-05, "loss": 0.0265, "step": 2148 }, { "epoch": 126.41176470588235, "grad_norm": 0.9684766530990601, "learning_rate": 1.6007325009974887e-05, "loss": 0.0183, "step": 2149 }, { "epoch": 126.47058823529412, "grad_norm": 1.204427719116211, "learning_rate": 1.6003516647475983e-05, "loss": 0.0246, "step": 2150 }, { "epoch": 126.52941176470588, "grad_norm": 1.4105587005615234, "learning_rate": 1.599970692308039e-05, "loss": 0.0438, "step": 2151 }, { "epoch": 126.58823529411765, "grad_norm": 1.240522861480713, "learning_rate": 1.599589583765235e-05, "loss": 0.0176, "step": 2152 }, { "epoch": 126.6470588235294, "grad_norm": 0.9044825434684753, "learning_rate": 1.5992083392056405e-05, "loss": 0.0187, "step": 2153 }, { "epoch": 126.70588235294117, "grad_norm": 0.8449994921684265, "learning_rate": 1.5988269587157407e-05, "loss": 0.0119, "step": 2154 }, { "epoch": 126.76470588235294, "grad_norm": 1.8271360397338867, "learning_rate": 1.5984454423820518e-05, "loss": 0.0271, "step": 2155 }, { "epoch": 126.82352941176471, "grad_norm": 0.9066997766494751, "learning_rate": 1.5980637902911208e-05, "loss": 0.0282, "step": 2156 }, { "epoch": 126.88235294117646, "grad_norm": 1.2108418941497803, "learning_rate": 1.597682002529525e-05, "loss": 0.0347, "step": 2157 }, { "epoch": 126.94117647058823, "grad_norm": 0.8565777540206909, "learning_rate": 1.5973000791838736e-05, "loss": 0.018, "step": 2158 }, { "epoch": 127.0, "grad_norm": 2.3082494735717773, "learning_rate": 1.5969180203408052e-05, "loss": 0.0233, "step": 2159 }, { "epoch": 127.05882352941177, "grad_norm": 0.7638451457023621, "learning_rate": 1.5965358260869905e-05, "loss": 0.0152, "step": 2160 }, { "epoch": 127.11764705882354, "grad_norm": 1.0572960376739502, "learning_rate": 1.596153496509129e-05, "loss": 0.0279, "step": 2161 }, { "epoch": 127.17647058823529, "grad_norm": 1.0502336025238037, "learning_rate": 1.595771031693953e-05, "loss": 0.0297, "step": 2162 }, { "epoch": 127.23529411764706, "grad_norm": 0.9832669496536255, "learning_rate": 1.5953884317282248e-05, "loss": 0.0265, "step": 2163 }, { "epoch": 127.29411764705883, "grad_norm": 1.0871429443359375, "learning_rate": 1.5950056966987363e-05, "loss": 0.015, "step": 2164 }, { "epoch": 127.3529411764706, "grad_norm": 0.8034135103225708, "learning_rate": 1.5946228266923114e-05, "loss": 0.0251, "step": 2165 }, { "epoch": 127.41176470588235, "grad_norm": 0.893951952457428, "learning_rate": 1.594239821795804e-05, "loss": 0.0197, "step": 2166 }, { "epoch": 127.47058823529412, "grad_norm": 1.0392513275146484, "learning_rate": 1.5938566820960986e-05, "loss": 0.0227, "step": 2167 }, { "epoch": 127.52941176470588, "grad_norm": 1.3577702045440674, "learning_rate": 1.5934734076801102e-05, "loss": 0.03, "step": 2168 }, { "epoch": 127.58823529411765, "grad_norm": 1.3019332885742188, "learning_rate": 1.5930899986347852e-05, "loss": 0.0216, "step": 2169 }, { "epoch": 127.6470588235294, "grad_norm": 0.8125183582305908, "learning_rate": 1.5927064550470995e-05, "loss": 0.017, "step": 2170 }, { "epoch": 127.70588235294117, "grad_norm": 0.9904482960700989, "learning_rate": 1.5923227770040594e-05, "loss": 0.0343, "step": 2171 }, { "epoch": 127.76470588235294, "grad_norm": 1.6847602128982544, "learning_rate": 1.591938964592703e-05, "loss": 0.0338, "step": 2172 }, { "epoch": 127.82352941176471, "grad_norm": 1.0853174924850464, "learning_rate": 1.5915550179000976e-05, "loss": 0.022, "step": 2173 }, { "epoch": 127.88235294117646, "grad_norm": 1.8103705644607544, "learning_rate": 1.5911709370133418e-05, "loss": 0.0408, "step": 2174 }, { "epoch": 127.94117647058823, "grad_norm": 1.0715078115463257, "learning_rate": 1.5907867220195637e-05, "loss": 0.0292, "step": 2175 }, { "epoch": 128.0, "grad_norm": 0.7978612184524536, "learning_rate": 1.5904023730059227e-05, "loss": 0.0132, "step": 2176 }, { "epoch": 128.05882352941177, "grad_norm": 1.0642962455749512, "learning_rate": 1.590017890059609e-05, "loss": 0.0239, "step": 2177 }, { "epoch": 128.11764705882354, "grad_norm": 0.5209488868713379, "learning_rate": 1.589633273267841e-05, "loss": 0.0088, "step": 2178 }, { "epoch": 128.1764705882353, "grad_norm": 1.1666678190231323, "learning_rate": 1.58924852271787e-05, "loss": 0.0185, "step": 2179 }, { "epoch": 128.23529411764707, "grad_norm": 1.3325923681259155, "learning_rate": 1.5888636384969767e-05, "loss": 0.02, "step": 2180 }, { "epoch": 128.2941176470588, "grad_norm": 0.9331563115119934, "learning_rate": 1.5884786206924717e-05, "loss": 0.0261, "step": 2181 }, { "epoch": 128.35294117647058, "grad_norm": 1.2227956056594849, "learning_rate": 1.588093469391696e-05, "loss": 0.0334, "step": 2182 }, { "epoch": 128.41176470588235, "grad_norm": 0.929931104183197, "learning_rate": 1.5877081846820215e-05, "loss": 0.0124, "step": 2183 }, { "epoch": 128.47058823529412, "grad_norm": 1.1240346431732178, "learning_rate": 1.5873227666508494e-05, "loss": 0.015, "step": 2184 }, { "epoch": 128.52941176470588, "grad_norm": 1.8084050416946411, "learning_rate": 1.5869372153856123e-05, "loss": 0.0168, "step": 2185 }, { "epoch": 128.58823529411765, "grad_norm": 1.3064091205596924, "learning_rate": 1.5865515309737726e-05, "loss": 0.0424, "step": 2186 }, { "epoch": 128.64705882352942, "grad_norm": 1.0805590152740479, "learning_rate": 1.5861657135028222e-05, "loss": 0.0234, "step": 2187 }, { "epoch": 128.7058823529412, "grad_norm": 0.9641945958137512, "learning_rate": 1.5857797630602837e-05, "loss": 0.0266, "step": 2188 }, { "epoch": 128.76470588235293, "grad_norm": 1.5109049081802368, "learning_rate": 1.5853936797337103e-05, "loss": 0.0455, "step": 2189 }, { "epoch": 128.8235294117647, "grad_norm": 0.9503676295280457, "learning_rate": 1.5850074636106844e-05, "loss": 0.0149, "step": 2190 }, { "epoch": 128.88235294117646, "grad_norm": 1.0417628288269043, "learning_rate": 1.58462111477882e-05, "loss": 0.0194, "step": 2191 }, { "epoch": 128.94117647058823, "grad_norm": 1.146972417831421, "learning_rate": 1.584234633325759e-05, "loss": 0.0328, "step": 2192 }, { "epoch": 129.0, "grad_norm": 1.330806851387024, "learning_rate": 1.5838480193391753e-05, "loss": 0.0166, "step": 2193 }, { "epoch": 129.05882352941177, "grad_norm": 1.2099294662475586, "learning_rate": 1.5834612729067728e-05, "loss": 0.0489, "step": 2194 }, { "epoch": 129.11764705882354, "grad_norm": 0.6220524311065674, "learning_rate": 1.583074394116284e-05, "loss": 0.0124, "step": 2195 }, { "epoch": 129.1764705882353, "grad_norm": 0.712964653968811, "learning_rate": 1.5826873830554723e-05, "loss": 0.0273, "step": 2196 }, { "epoch": 129.23529411764707, "grad_norm": 1.4274324178695679, "learning_rate": 1.5823002398121318e-05, "loss": 0.0182, "step": 2197 }, { "epoch": 129.2941176470588, "grad_norm": 1.1162307262420654, "learning_rate": 1.581912964474085e-05, "loss": 0.0206, "step": 2198 }, { "epoch": 129.35294117647058, "grad_norm": 0.8562161922454834, "learning_rate": 1.5815255571291862e-05, "loss": 0.0186, "step": 2199 }, { "epoch": 129.41176470588235, "grad_norm": 1.1807365417480469, "learning_rate": 1.5811380178653175e-05, "loss": 0.0299, "step": 2200 }, { "epoch": 129.47058823529412, "grad_norm": 1.0786186456680298, "learning_rate": 1.580750346770393e-05, "loss": 0.024, "step": 2201 }, { "epoch": 129.52941176470588, "grad_norm": 1.225982904434204, "learning_rate": 1.580362543932356e-05, "loss": 0.0326, "step": 2202 }, { "epoch": 129.58823529411765, "grad_norm": 1.002803087234497, "learning_rate": 1.5799746094391786e-05, "loss": 0.0289, "step": 2203 }, { "epoch": 129.64705882352942, "grad_norm": 0.6595170497894287, "learning_rate": 1.5795865433788642e-05, "loss": 0.0119, "step": 2204 }, { "epoch": 129.7058823529412, "grad_norm": 1.0276538133621216, "learning_rate": 1.579198345839446e-05, "loss": 0.0257, "step": 2205 }, { "epoch": 129.76470588235293, "grad_norm": 1.9390277862548828, "learning_rate": 1.5788100169089852e-05, "loss": 0.0325, "step": 2206 }, { "epoch": 129.8235294117647, "grad_norm": 0.9861728549003601, "learning_rate": 1.5784215566755757e-05, "loss": 0.0188, "step": 2207 }, { "epoch": 129.88235294117646, "grad_norm": 0.8611615300178528, "learning_rate": 1.5780329652273386e-05, "loss": 0.0232, "step": 2208 }, { "epoch": 129.94117647058823, "grad_norm": 0.794869601726532, "learning_rate": 1.577644242652426e-05, "loss": 0.0155, "step": 2209 }, { "epoch": 130.0, "grad_norm": 0.8289405107498169, "learning_rate": 1.5772553890390196e-05, "loss": 0.0157, "step": 2210 }, { "epoch": 130.05882352941177, "grad_norm": 0.7518115043640137, "learning_rate": 1.576866404475331e-05, "loss": 0.0118, "step": 2211 }, { "epoch": 130.11764705882354, "grad_norm": 1.307311773300171, "learning_rate": 1.576477289049601e-05, "loss": 0.0354, "step": 2212 }, { "epoch": 130.1764705882353, "grad_norm": 1.3234901428222656, "learning_rate": 1.5760880428501007e-05, "loss": 0.0107, "step": 2213 }, { "epoch": 130.23529411764707, "grad_norm": 2.83182692527771, "learning_rate": 1.57569866596513e-05, "loss": 0.0187, "step": 2214 }, { "epoch": 130.2941176470588, "grad_norm": 0.892569363117218, "learning_rate": 1.5753091584830197e-05, "loss": 0.0246, "step": 2215 }, { "epoch": 130.35294117647058, "grad_norm": 0.9794045090675354, "learning_rate": 1.5749195204921286e-05, "loss": 0.0258, "step": 2216 }, { "epoch": 130.41176470588235, "grad_norm": 1.5632238388061523, "learning_rate": 1.574529752080847e-05, "loss": 0.028, "step": 2217 }, { "epoch": 130.47058823529412, "grad_norm": 1.112045168876648, "learning_rate": 1.5741398533375928e-05, "loss": 0.023, "step": 2218 }, { "epoch": 130.52941176470588, "grad_norm": 1.1699973344802856, "learning_rate": 1.5737498243508153e-05, "loss": 0.0188, "step": 2219 }, { "epoch": 130.58823529411765, "grad_norm": 1.1465950012207031, "learning_rate": 1.5733596652089914e-05, "loss": 0.0267, "step": 2220 }, { "epoch": 130.64705882352942, "grad_norm": 1.0665029287338257, "learning_rate": 1.5729693760006303e-05, "loss": 0.0149, "step": 2221 }, { "epoch": 130.7058823529412, "grad_norm": 1.751857876777649, "learning_rate": 1.5725789568142678e-05, "loss": 0.027, "step": 2222 }, { "epoch": 130.76470588235293, "grad_norm": 0.7038673162460327, "learning_rate": 1.572188407738471e-05, "loss": 0.0147, "step": 2223 }, { "epoch": 130.8235294117647, "grad_norm": 1.1428756713867188, "learning_rate": 1.571797728861835e-05, "loss": 0.022, "step": 2224 }, { "epoch": 130.88235294117646, "grad_norm": 1.3009415864944458, "learning_rate": 1.5714069202729866e-05, "loss": 0.0446, "step": 2225 }, { "epoch": 130.94117647058823, "grad_norm": 1.1797330379486084, "learning_rate": 1.5710159820605792e-05, "loss": 0.034, "step": 2226 }, { "epoch": 131.0, "grad_norm": 0.8811649680137634, "learning_rate": 1.5706249143132982e-05, "loss": 0.0161, "step": 2227 }, { "epoch": 131.05882352941177, "grad_norm": 0.9773036241531372, "learning_rate": 1.570233717119857e-05, "loss": 0.0191, "step": 2228 }, { "epoch": 131.11764705882354, "grad_norm": 1.5034610033035278, "learning_rate": 1.569842390568998e-05, "loss": 0.0371, "step": 2229 }, { "epoch": 131.1764705882353, "grad_norm": 1.3593825101852417, "learning_rate": 1.5694509347494943e-05, "loss": 0.021, "step": 2230 }, { "epoch": 131.23529411764707, "grad_norm": 1.3860833644866943, "learning_rate": 1.5690593497501474e-05, "loss": 0.0246, "step": 2231 }, { "epoch": 131.2941176470588, "grad_norm": 0.7345595955848694, "learning_rate": 1.568667635659788e-05, "loss": 0.024, "step": 2232 }, { "epoch": 131.35294117647058, "grad_norm": 1.0760750770568848, "learning_rate": 1.5682757925672764e-05, "loss": 0.0214, "step": 2233 }, { "epoch": 131.41176470588235, "grad_norm": 0.9526598453521729, "learning_rate": 1.5678838205615026e-05, "loss": 0.0146, "step": 2234 }, { "epoch": 131.47058823529412, "grad_norm": 0.6399233341217041, "learning_rate": 1.567491719731385e-05, "loss": 0.0087, "step": 2235 }, { "epoch": 131.52941176470588, "grad_norm": 0.7677380442619324, "learning_rate": 1.5670994901658715e-05, "loss": 0.0187, "step": 2236 }, { "epoch": 131.58823529411765, "grad_norm": 0.9129673838615417, "learning_rate": 1.5667071319539397e-05, "loss": 0.0178, "step": 2237 }, { "epoch": 131.64705882352942, "grad_norm": 2.8888003826141357, "learning_rate": 1.5663146451845956e-05, "loss": 0.0282, "step": 2238 }, { "epoch": 131.7058823529412, "grad_norm": 0.9228150844573975, "learning_rate": 1.5659220299468748e-05, "loss": 0.0268, "step": 2239 }, { "epoch": 131.76470588235293, "grad_norm": 1.20578932762146, "learning_rate": 1.5655292863298427e-05, "loss": 0.0209, "step": 2240 }, { "epoch": 131.8235294117647, "grad_norm": 0.9214701056480408, "learning_rate": 1.565136414422592e-05, "loss": 0.0233, "step": 2241 }, { "epoch": 131.88235294117646, "grad_norm": 1.1176868677139282, "learning_rate": 1.5647434143142464e-05, "loss": 0.0345, "step": 2242 }, { "epoch": 131.94117647058823, "grad_norm": 1.0181312561035156, "learning_rate": 1.5643502860939577e-05, "loss": 0.0259, "step": 2243 }, { "epoch": 132.0, "grad_norm": 1.4867730140686035, "learning_rate": 1.5639570298509067e-05, "loss": 0.0135, "step": 2244 }, { "epoch": 132.05882352941177, "grad_norm": 3.466097354888916, "learning_rate": 1.5635636456743037e-05, "loss": 0.0393, "step": 2245 }, { "epoch": 132.11764705882354, "grad_norm": 1.0958499908447266, "learning_rate": 1.563170133653388e-05, "loss": 0.0303, "step": 2246 }, { "epoch": 132.1764705882353, "grad_norm": 1.365338683128357, "learning_rate": 1.5627764938774277e-05, "loss": 0.0178, "step": 2247 }, { "epoch": 132.23529411764707, "grad_norm": 1.0934871435165405, "learning_rate": 1.5623827264357196e-05, "loss": 0.0252, "step": 2248 }, { "epoch": 132.2941176470588, "grad_norm": 0.8306342363357544, "learning_rate": 1.5619888314175904e-05, "loss": 0.0169, "step": 2249 }, { "epoch": 132.35294117647058, "grad_norm": 0.8736316561698914, "learning_rate": 1.5615948089123945e-05, "loss": 0.011, "step": 2250 }, { "epoch": 132.41176470588235, "grad_norm": 0.7483803629875183, "learning_rate": 1.5612006590095158e-05, "loss": 0.013, "step": 2251 }, { "epoch": 132.47058823529412, "grad_norm": 2.026529550552368, "learning_rate": 1.5608063817983677e-05, "loss": 0.0366, "step": 2252 }, { "epoch": 132.52941176470588, "grad_norm": 0.9229958653450012, "learning_rate": 1.5604119773683913e-05, "loss": 0.0118, "step": 2253 }, { "epoch": 132.58823529411765, "grad_norm": 0.88579922914505, "learning_rate": 1.560017445809058e-05, "loss": 0.0172, "step": 2254 }, { "epoch": 132.64705882352942, "grad_norm": 0.7884432673454285, "learning_rate": 1.5596227872098663e-05, "loss": 0.0139, "step": 2255 }, { "epoch": 132.7058823529412, "grad_norm": 1.0669453144073486, "learning_rate": 1.5592280016603447e-05, "loss": 0.0317, "step": 2256 }, { "epoch": 132.76470588235293, "grad_norm": 1.6400725841522217, "learning_rate": 1.5588330892500503e-05, "loss": 0.0227, "step": 2257 }, { "epoch": 132.8235294117647, "grad_norm": 1.0260952711105347, "learning_rate": 1.5584380500685693e-05, "loss": 0.0315, "step": 2258 }, { "epoch": 132.88235294117646, "grad_norm": 1.4229627847671509, "learning_rate": 1.558042884205516e-05, "loss": 0.0309, "step": 2259 }, { "epoch": 132.94117647058823, "grad_norm": 1.6640238761901855, "learning_rate": 1.5576475917505334e-05, "loss": 0.0153, "step": 2260 }, { "epoch": 133.0, "grad_norm": 1.5642647743225098, "learning_rate": 1.5572521727932937e-05, "loss": 0.0484, "step": 2261 }, { "epoch": 133.05882352941177, "grad_norm": 0.6328088045120239, "learning_rate": 1.5568566274234976e-05, "loss": 0.0131, "step": 2262 }, { "epoch": 133.11764705882354, "grad_norm": 0.8928561210632324, "learning_rate": 1.556460955730875e-05, "loss": 0.0277, "step": 2263 }, { "epoch": 133.1764705882353, "grad_norm": 1.3067985773086548, "learning_rate": 1.5560651578051832e-05, "loss": 0.0377, "step": 2264 }, { "epoch": 133.23529411764707, "grad_norm": 0.907798707485199, "learning_rate": 1.555669233736209e-05, "loss": 0.0227, "step": 2265 }, { "epoch": 133.2941176470588, "grad_norm": 1.3276623487472534, "learning_rate": 1.5552731836137684e-05, "loss": 0.0205, "step": 2266 }, { "epoch": 133.35294117647058, "grad_norm": 1.1998987197875977, "learning_rate": 1.5548770075277045e-05, "loss": 0.0381, "step": 2267 }, { "epoch": 133.41176470588235, "grad_norm": 0.9382331371307373, "learning_rate": 1.55448070556789e-05, "loss": 0.0195, "step": 2268 }, { "epoch": 133.47058823529412, "grad_norm": 1.2647470235824585, "learning_rate": 1.5540842778242264e-05, "loss": 0.0242, "step": 2269 }, { "epoch": 133.52941176470588, "grad_norm": 1.1523661613464355, "learning_rate": 1.5536877243866427e-05, "loss": 0.0182, "step": 2270 }, { "epoch": 133.58823529411765, "grad_norm": 0.9978881478309631, "learning_rate": 1.5532910453450973e-05, "loss": 0.0179, "step": 2271 }, { "epoch": 133.64705882352942, "grad_norm": 0.8383499979972839, "learning_rate": 1.552894240789576e-05, "loss": 0.0203, "step": 2272 }, { "epoch": 133.7058823529412, "grad_norm": 0.8373019695281982, "learning_rate": 1.5524973108100954e-05, "loss": 0.0165, "step": 2273 }, { "epoch": 133.76470588235293, "grad_norm": 0.9543101191520691, "learning_rate": 1.5521002554966976e-05, "loss": 0.0208, "step": 2274 }, { "epoch": 133.8235294117647, "grad_norm": 1.2598679065704346, "learning_rate": 1.5517030749394547e-05, "loss": 0.0276, "step": 2275 }, { "epoch": 133.88235294117646, "grad_norm": 0.9453441500663757, "learning_rate": 1.5513057692284678e-05, "loss": 0.017, "step": 2276 }, { "epoch": 133.94117647058823, "grad_norm": 1.2099968194961548, "learning_rate": 1.5509083384538654e-05, "loss": 0.0154, "step": 2277 }, { "epoch": 134.0, "grad_norm": 0.8689420223236084, "learning_rate": 1.5505107827058038e-05, "loss": 0.0207, "step": 2278 }, { "epoch": 134.05882352941177, "grad_norm": 1.1150339841842651, "learning_rate": 1.5501131020744692e-05, "loss": 0.0278, "step": 2279 }, { "epoch": 134.11764705882354, "grad_norm": 1.17989182472229, "learning_rate": 1.549715296650075e-05, "loss": 0.0286, "step": 2280 }, { "epoch": 134.1764705882353, "grad_norm": 1.1243904829025269, "learning_rate": 1.5493173665228637e-05, "loss": 0.0294, "step": 2281 }, { "epoch": 134.23529411764707, "grad_norm": 0.8388255834579468, "learning_rate": 1.5489193117831056e-05, "loss": 0.0116, "step": 2282 }, { "epoch": 134.2941176470588, "grad_norm": 1.2901341915130615, "learning_rate": 1.548521132521099e-05, "loss": 0.0314, "step": 2283 }, { "epoch": 134.35294117647058, "grad_norm": 0.8371298909187317, "learning_rate": 1.5481228288271712e-05, "loss": 0.019, "step": 2284 }, { "epoch": 134.41176470588235, "grad_norm": 1.0687940120697021, "learning_rate": 1.5477244007916768e-05, "loss": 0.0258, "step": 2285 }, { "epoch": 134.47058823529412, "grad_norm": 1.0967172384262085, "learning_rate": 1.5473258485049997e-05, "loss": 0.0339, "step": 2286 }, { "epoch": 134.52941176470588, "grad_norm": 1.2482709884643555, "learning_rate": 1.546927172057551e-05, "loss": 0.0248, "step": 2287 }, { "epoch": 134.58823529411765, "grad_norm": 1.5236893892288208, "learning_rate": 1.546528371539771e-05, "loss": 0.0113, "step": 2288 }, { "epoch": 134.64705882352942, "grad_norm": 1.0375782251358032, "learning_rate": 1.546129447042127e-05, "loss": 0.0265, "step": 2289 }, { "epoch": 134.7058823529412, "grad_norm": 0.8551545143127441, "learning_rate": 1.5457303986551146e-05, "loss": 0.0178, "step": 2290 }, { "epoch": 134.76470588235293, "grad_norm": 0.8587495684623718, "learning_rate": 1.5453312264692584e-05, "loss": 0.0197, "step": 2291 }, { "epoch": 134.8235294117647, "grad_norm": 1.4558767080307007, "learning_rate": 1.5449319305751108e-05, "loss": 0.0129, "step": 2292 }, { "epoch": 134.88235294117646, "grad_norm": 1.265342354774475, "learning_rate": 1.5445325110632513e-05, "loss": 0.0174, "step": 2293 }, { "epoch": 134.94117647058823, "grad_norm": 1.1391764879226685, "learning_rate": 1.5441329680242887e-05, "loss": 0.0152, "step": 2294 }, { "epoch": 135.0, "grad_norm": 0.9027068614959717, "learning_rate": 1.5437333015488586e-05, "loss": 0.0286, "step": 2295 }, { "epoch": 135.05882352941177, "grad_norm": 0.8458561301231384, "learning_rate": 1.543333511727626e-05, "loss": 0.0135, "step": 2296 }, { "epoch": 135.11764705882354, "grad_norm": 1.5428718328475952, "learning_rate": 1.542933598651283e-05, "loss": 0.0195, "step": 2297 }, { "epoch": 135.1764705882353, "grad_norm": 0.9559590220451355, "learning_rate": 1.5425335624105495e-05, "loss": 0.0286, "step": 2298 }, { "epoch": 135.23529411764707, "grad_norm": 1.0897142887115479, "learning_rate": 1.5421334030961738e-05, "loss": 0.0136, "step": 2299 }, { "epoch": 135.2941176470588, "grad_norm": 1.2206196784973145, "learning_rate": 1.541733120798932e-05, "loss": 0.0259, "step": 2300 }, { "epoch": 135.35294117647058, "grad_norm": 1.2655857801437378, "learning_rate": 1.5413327156096284e-05, "loss": 0.0208, "step": 2301 }, { "epoch": 135.41176470588235, "grad_norm": 1.0833007097244263, "learning_rate": 1.5409321876190944e-05, "loss": 0.0337, "step": 2302 }, { "epoch": 135.47058823529412, "grad_norm": 1.233199954032898, "learning_rate": 1.5405315369181896e-05, "loss": 0.0273, "step": 2303 }, { "epoch": 135.52941176470588, "grad_norm": 0.926562488079071, "learning_rate": 1.5401307635978016e-05, "loss": 0.0248, "step": 2304 }, { "epoch": 135.58823529411765, "grad_norm": 0.7337629795074463, "learning_rate": 1.539729867748846e-05, "loss": 0.0185, "step": 2305 }, { "epoch": 135.64705882352942, "grad_norm": 0.8782310485839844, "learning_rate": 1.539328849462266e-05, "loss": 0.0224, "step": 2306 }, { "epoch": 135.7058823529412, "grad_norm": 1.2968487739562988, "learning_rate": 1.5389277088290326e-05, "loss": 0.0364, "step": 2307 }, { "epoch": 135.76470588235293, "grad_norm": 0.7615335583686829, "learning_rate": 1.5385264459401436e-05, "loss": 0.0154, "step": 2308 }, { "epoch": 135.8235294117647, "grad_norm": 0.7639279961585999, "learning_rate": 1.5381250608866267e-05, "loss": 0.0136, "step": 2309 }, { "epoch": 135.88235294117646, "grad_norm": 0.6830813884735107, "learning_rate": 1.5377235537595354e-05, "loss": 0.0122, "step": 2310 }, { "epoch": 135.94117647058823, "grad_norm": 1.361939549446106, "learning_rate": 1.5373219246499512e-05, "loss": 0.0172, "step": 2311 }, { "epoch": 136.0, "grad_norm": 0.9353744983673096, "learning_rate": 1.536920173648984e-05, "loss": 0.012, "step": 2312 }, { "epoch": 136.05882352941177, "grad_norm": 1.3595281839370728, "learning_rate": 1.536518300847771e-05, "loss": 0.0293, "step": 2313 }, { "epoch": 136.11764705882354, "grad_norm": 1.0525085926055908, "learning_rate": 1.5361163063374768e-05, "loss": 0.0274, "step": 2314 }, { "epoch": 136.1764705882353, "grad_norm": 1.1343921422958374, "learning_rate": 1.535714190209294e-05, "loss": 0.0195, "step": 2315 }, { "epoch": 136.23529411764707, "grad_norm": 0.8758456110954285, "learning_rate": 1.5353119525544417e-05, "loss": 0.0241, "step": 2316 }, { "epoch": 136.2941176470588, "grad_norm": 1.1039693355560303, "learning_rate": 1.534909593464169e-05, "loss": 0.0221, "step": 2317 }, { "epoch": 136.35294117647058, "grad_norm": 1.1376533508300781, "learning_rate": 1.5345071130297493e-05, "loss": 0.023, "step": 2318 }, { "epoch": 136.41176470588235, "grad_norm": 1.1482734680175781, "learning_rate": 1.5341045113424865e-05, "loss": 0.0243, "step": 2319 }, { "epoch": 136.47058823529412, "grad_norm": 0.8277150392532349, "learning_rate": 1.53370178849371e-05, "loss": 0.016, "step": 2320 }, { "epoch": 136.52941176470588, "grad_norm": 1.1077748537063599, "learning_rate": 1.5332989445747782e-05, "loss": 0.0135, "step": 2321 }, { "epoch": 136.58823529411765, "grad_norm": 1.3010438680648804, "learning_rate": 1.5328959796770754e-05, "loss": 0.016, "step": 2322 }, { "epoch": 136.64705882352942, "grad_norm": 0.9487817287445068, "learning_rate": 1.532492893892014e-05, "loss": 0.0132, "step": 2323 }, { "epoch": 136.7058823529412, "grad_norm": 0.9804069995880127, "learning_rate": 1.532089687311035e-05, "loss": 0.0237, "step": 2324 }, { "epoch": 136.76470588235293, "grad_norm": 1.3098186254501343, "learning_rate": 1.5316863600256045e-05, "loss": 0.0227, "step": 2325 }, { "epoch": 136.8235294117647, "grad_norm": 1.5102324485778809, "learning_rate": 1.531282912127218e-05, "loss": 0.0278, "step": 2326 }, { "epoch": 136.88235294117646, "grad_norm": 1.7954773902893066, "learning_rate": 1.530879343707397e-05, "loss": 0.0236, "step": 2327 }, { "epoch": 136.94117647058823, "grad_norm": 1.3943928480148315, "learning_rate": 1.5304756548576913e-05, "loss": 0.0196, "step": 2328 }, { "epoch": 137.0, "grad_norm": 1.2410210371017456, "learning_rate": 1.530071845669678e-05, "loss": 0.0166, "step": 2329 }, { "epoch": 137.05882352941177, "grad_norm": 0.6762658357620239, "learning_rate": 1.5296679162349607e-05, "loss": 0.0129, "step": 2330 }, { "epoch": 137.11764705882354, "grad_norm": 0.869182825088501, "learning_rate": 1.5292638666451703e-05, "loss": 0.0149, "step": 2331 }, { "epoch": 137.1764705882353, "grad_norm": 1.7164219617843628, "learning_rate": 1.5288596969919664e-05, "loss": 0.0291, "step": 2332 }, { "epoch": 137.23529411764707, "grad_norm": 1.218329668045044, "learning_rate": 1.528455407367034e-05, "loss": 0.0243, "step": 2333 }, { "epoch": 137.2941176470588, "grad_norm": 0.8941040635108948, "learning_rate": 1.5280509978620862e-05, "loss": 0.0121, "step": 2334 }, { "epoch": 137.35294117647058, "grad_norm": 1.0690075159072876, "learning_rate": 1.5276464685688638e-05, "loss": 0.0242, "step": 2335 }, { "epoch": 137.41176470588235, "grad_norm": 0.6569761633872986, "learning_rate": 1.5272418195791336e-05, "loss": 0.0129, "step": 2336 }, { "epoch": 137.47058823529412, "grad_norm": 1.5968353748321533, "learning_rate": 1.5268370509846904e-05, "loss": 0.0168, "step": 2337 }, { "epoch": 137.52941176470588, "grad_norm": 1.2631945610046387, "learning_rate": 1.526432162877356e-05, "loss": 0.0142, "step": 2338 }, { "epoch": 137.58823529411765, "grad_norm": 0.968915581703186, "learning_rate": 1.5260271553489787e-05, "loss": 0.0149, "step": 2339 }, { "epoch": 137.64705882352942, "grad_norm": 1.2227362394332886, "learning_rate": 1.525622028491435e-05, "loss": 0.0459, "step": 2340 }, { "epoch": 137.7058823529412, "grad_norm": 0.7151662111282349, "learning_rate": 1.5252167823966276e-05, "loss": 0.015, "step": 2341 }, { "epoch": 137.76470588235293, "grad_norm": 1.0352500677108765, "learning_rate": 1.5248114171564866e-05, "loss": 0.0212, "step": 2342 }, { "epoch": 137.8235294117647, "grad_norm": 1.6482559442520142, "learning_rate": 1.5244059328629686e-05, "loss": 0.0319, "step": 2343 }, { "epoch": 137.88235294117646, "grad_norm": 0.7760540843009949, "learning_rate": 1.5240003296080578e-05, "loss": 0.0108, "step": 2344 }, { "epoch": 137.94117647058823, "grad_norm": 1.7664194107055664, "learning_rate": 1.523594607483766e-05, "loss": 0.0258, "step": 2345 }, { "epoch": 138.0, "grad_norm": 1.2017863988876343, "learning_rate": 1.52318876658213e-05, "loss": 0.0311, "step": 2346 }, { "epoch": 138.05882352941177, "grad_norm": 1.1733293533325195, "learning_rate": 1.522782806995216e-05, "loss": 0.0236, "step": 2347 }, { "epoch": 138.11764705882354, "grad_norm": 0.6668499708175659, "learning_rate": 1.5223767288151145e-05, "loss": 0.0087, "step": 2348 }, { "epoch": 138.1764705882353, "grad_norm": 0.7458387613296509, "learning_rate": 1.5219705321339453e-05, "loss": 0.0114, "step": 2349 }, { "epoch": 138.23529411764707, "grad_norm": 1.2077187299728394, "learning_rate": 1.5215642170438533e-05, "loss": 0.0286, "step": 2350 }, { "epoch": 138.2941176470588, "grad_norm": 1.5628808736801147, "learning_rate": 1.521157783637012e-05, "loss": 0.0281, "step": 2351 }, { "epoch": 138.35294117647058, "grad_norm": 0.8522027730941772, "learning_rate": 1.5207512320056198e-05, "loss": 0.0152, "step": 2352 }, { "epoch": 138.41176470588235, "grad_norm": 1.3865246772766113, "learning_rate": 1.5203445622419031e-05, "loss": 0.0237, "step": 2353 }, { "epoch": 138.47058823529412, "grad_norm": 1.3676376342773438, "learning_rate": 1.5199377744381152e-05, "loss": 0.0249, "step": 2354 }, { "epoch": 138.52941176470588, "grad_norm": 1.1285159587860107, "learning_rate": 1.5195308686865355e-05, "loss": 0.0184, "step": 2355 }, { "epoch": 138.58823529411765, "grad_norm": 1.0332132577896118, "learning_rate": 1.5191238450794706e-05, "loss": 0.0179, "step": 2356 }, { "epoch": 138.64705882352942, "grad_norm": 0.8295021057128906, "learning_rate": 1.5187167037092536e-05, "loss": 0.0203, "step": 2357 }, { "epoch": 138.7058823529412, "grad_norm": 0.9386031031608582, "learning_rate": 1.518309444668245e-05, "loss": 0.0223, "step": 2358 }, { "epoch": 138.76470588235293, "grad_norm": 1.7711853981018066, "learning_rate": 1.5179020680488306e-05, "loss": 0.0234, "step": 2359 }, { "epoch": 138.8235294117647, "grad_norm": 1.0818281173706055, "learning_rate": 1.5174945739434244e-05, "loss": 0.0409, "step": 2360 }, { "epoch": 138.88235294117646, "grad_norm": 0.7785264253616333, "learning_rate": 1.517086962444466e-05, "loss": 0.012, "step": 2361 }, { "epoch": 138.94117647058823, "grad_norm": 0.9208652973175049, "learning_rate": 1.5166792336444221e-05, "loss": 0.018, "step": 2362 }, { "epoch": 139.0, "grad_norm": 1.0070348978042603, "learning_rate": 1.516271387635786e-05, "loss": 0.0167, "step": 2363 }, { "epoch": 139.05882352941177, "grad_norm": 2.0750629901885986, "learning_rate": 1.5158634245110771e-05, "loss": 0.0237, "step": 2364 }, { "epoch": 139.11764705882354, "grad_norm": 0.5604813098907471, "learning_rate": 1.5154553443628421e-05, "loss": 0.0107, "step": 2365 }, { "epoch": 139.1764705882353, "grad_norm": 0.8265314102172852, "learning_rate": 1.5150471472836543e-05, "loss": 0.0185, "step": 2366 }, { "epoch": 139.23529411764707, "grad_norm": 1.042266607284546, "learning_rate": 1.5146388333661125e-05, "loss": 0.0171, "step": 2367 }, { "epoch": 139.2941176470588, "grad_norm": 1.2669466733932495, "learning_rate": 1.5142304027028427e-05, "loss": 0.0156, "step": 2368 }, { "epoch": 139.35294117647058, "grad_norm": 1.1103209257125854, "learning_rate": 1.5138218553864974e-05, "loss": 0.0193, "step": 2369 }, { "epoch": 139.41176470588235, "grad_norm": 1.4374734163284302, "learning_rate": 1.5134131915097562e-05, "loss": 0.0248, "step": 2370 }, { "epoch": 139.47058823529412, "grad_norm": 1.3633480072021484, "learning_rate": 1.5130044111653233e-05, "loss": 0.0185, "step": 2371 }, { "epoch": 139.52941176470588, "grad_norm": 0.933364987373352, "learning_rate": 1.5125955144459308e-05, "loss": 0.0176, "step": 2372 }, { "epoch": 139.58823529411765, "grad_norm": 1.2611202001571655, "learning_rate": 1.5121865014443371e-05, "loss": 0.0197, "step": 2373 }, { "epoch": 139.64705882352942, "grad_norm": 1.2702100276947021, "learning_rate": 1.5117773722533264e-05, "loss": 0.0206, "step": 2374 }, { "epoch": 139.7058823529412, "grad_norm": 0.9342007040977478, "learning_rate": 1.5113681269657103e-05, "loss": 0.0333, "step": 2375 }, { "epoch": 139.76470588235293, "grad_norm": 0.662806510925293, "learning_rate": 1.5109587656743251e-05, "loss": 0.0123, "step": 2376 }, { "epoch": 139.8235294117647, "grad_norm": 1.1493234634399414, "learning_rate": 1.510549288472035e-05, "loss": 0.0371, "step": 2377 }, { "epoch": 139.88235294117646, "grad_norm": 0.8899133801460266, "learning_rate": 1.5101396954517293e-05, "loss": 0.0243, "step": 2378 }, { "epoch": 139.94117647058823, "grad_norm": 1.2125028371810913, "learning_rate": 1.5097299867063248e-05, "loss": 0.0316, "step": 2379 }, { "epoch": 140.0, "grad_norm": 0.9888961315155029, "learning_rate": 1.5093201623287631e-05, "loss": 0.0156, "step": 2380 }, { "epoch": 140.05882352941177, "grad_norm": 1.1355117559432983, "learning_rate": 1.5089102224120134e-05, "loss": 0.0184, "step": 2381 }, { "epoch": 140.11764705882354, "grad_norm": 0.9236961603164673, "learning_rate": 1.50850016704907e-05, "loss": 0.0226, "step": 2382 }, { "epoch": 140.1764705882353, "grad_norm": 2.0326344966888428, "learning_rate": 1.508089996332954e-05, "loss": 0.0325, "step": 2383 }, { "epoch": 140.23529411764707, "grad_norm": 0.8808832168579102, "learning_rate": 1.5076797103567131e-05, "loss": 0.0231, "step": 2384 }, { "epoch": 140.2941176470588, "grad_norm": 0.7482343912124634, "learning_rate": 1.50726930921342e-05, "loss": 0.0139, "step": 2385 }, { "epoch": 140.35294117647058, "grad_norm": 1.0368894338607788, "learning_rate": 1.506858792996174e-05, "loss": 0.0126, "step": 2386 }, { "epoch": 140.41176470588235, "grad_norm": 1.0043660402297974, "learning_rate": 1.506448161798101e-05, "loss": 0.0107, "step": 2387 }, { "epoch": 140.47058823529412, "grad_norm": 1.3852325677871704, "learning_rate": 1.5060374157123527e-05, "loss": 0.037, "step": 2388 }, { "epoch": 140.52941176470588, "grad_norm": 0.9034555554389954, "learning_rate": 1.5056265548321065e-05, "loss": 0.031, "step": 2389 }, { "epoch": 140.58823529411765, "grad_norm": 0.9304713606834412, "learning_rate": 1.5052155792505665e-05, "loss": 0.0214, "step": 2390 }, { "epoch": 140.64705882352942, "grad_norm": 0.7199295163154602, "learning_rate": 1.5048044890609617e-05, "loss": 0.0076, "step": 2391 }, { "epoch": 140.7058823529412, "grad_norm": 1.1552186012268066, "learning_rate": 1.5043932843565485e-05, "loss": 0.0292, "step": 2392 }, { "epoch": 140.76470588235293, "grad_norm": 1.0307267904281616, "learning_rate": 1.5039819652306085e-05, "loss": 0.0161, "step": 2393 }, { "epoch": 140.8235294117647, "grad_norm": 1.873308777809143, "learning_rate": 1.5035705317764496e-05, "loss": 0.018, "step": 2394 }, { "epoch": 140.88235294117646, "grad_norm": 0.9740332961082458, "learning_rate": 1.5031589840874045e-05, "loss": 0.0153, "step": 2395 }, { "epoch": 140.94117647058823, "grad_norm": 0.43267226219177246, "learning_rate": 1.5027473222568336e-05, "loss": 0.0079, "step": 2396 }, { "epoch": 141.0, "grad_norm": 0.691469669342041, "learning_rate": 1.5023355463781221e-05, "loss": 0.0123, "step": 2397 }, { "epoch": 141.05882352941177, "grad_norm": 2.220879554748535, "learning_rate": 1.501923656544681e-05, "loss": 0.0358, "step": 2398 }, { "epoch": 141.11764705882354, "grad_norm": 0.8513087630271912, "learning_rate": 1.5015116528499479e-05, "loss": 0.0152, "step": 2399 }, { "epoch": 141.1764705882353, "grad_norm": 1.4218844175338745, "learning_rate": 1.5010995353873852e-05, "loss": 0.0228, "step": 2400 }, { "epoch": 141.23529411764707, "grad_norm": 1.6877750158309937, "learning_rate": 1.5006873042504825e-05, "loss": 0.0103, "step": 2401 }, { "epoch": 141.2941176470588, "grad_norm": 1.0296032428741455, "learning_rate": 1.5002749595327532e-05, "loss": 0.0156, "step": 2402 }, { "epoch": 141.35294117647058, "grad_norm": 0.9818562865257263, "learning_rate": 1.4998625013277388e-05, "loss": 0.0267, "step": 2403 }, { "epoch": 141.41176470588235, "grad_norm": 0.9186837077140808, "learning_rate": 1.4994499297290046e-05, "loss": 0.0253, "step": 2404 }, { "epoch": 141.47058823529412, "grad_norm": 1.1025818586349487, "learning_rate": 1.4990372448301427e-05, "loss": 0.0208, "step": 2405 }, { "epoch": 141.52941176470588, "grad_norm": 0.9666366577148438, "learning_rate": 1.4986244467247703e-05, "loss": 0.0134, "step": 2406 }, { "epoch": 141.58823529411765, "grad_norm": 0.688225507736206, "learning_rate": 1.4982115355065311e-05, "loss": 0.0114, "step": 2407 }, { "epoch": 141.64705882352942, "grad_norm": 0.8332144618034363, "learning_rate": 1.4977985112690936e-05, "loss": 0.0142, "step": 2408 }, { "epoch": 141.7058823529412, "grad_norm": 0.9121866822242737, "learning_rate": 1.4973853741061523e-05, "loss": 0.0173, "step": 2409 }, { "epoch": 141.76470588235293, "grad_norm": 0.9431874752044678, "learning_rate": 1.4969721241114275e-05, "loss": 0.0254, "step": 2410 }, { "epoch": 141.8235294117647, "grad_norm": 0.8090367913246155, "learning_rate": 1.4965587613786642e-05, "loss": 0.0252, "step": 2411 }, { "epoch": 141.88235294117646, "grad_norm": 1.0533185005187988, "learning_rate": 1.4961452860016347e-05, "loss": 0.0181, "step": 2412 }, { "epoch": 141.94117647058823, "grad_norm": 1.3317924737930298, "learning_rate": 1.495731698074135e-05, "loss": 0.0149, "step": 2413 }, { "epoch": 142.0, "grad_norm": 1.1294068098068237, "learning_rate": 1.4953179976899878e-05, "loss": 0.023, "step": 2414 }, { "epoch": 142.05882352941177, "grad_norm": 0.8796066045761108, "learning_rate": 1.4949041849430407e-05, "loss": 0.0176, "step": 2415 }, { "epoch": 142.11764705882354, "grad_norm": 0.7835967540740967, "learning_rate": 1.4944902599271675e-05, "loss": 0.0172, "step": 2416 }, { "epoch": 142.1764705882353, "grad_norm": 1.515201449394226, "learning_rate": 1.4940762227362666e-05, "loss": 0.0284, "step": 2417 }, { "epoch": 142.23529411764707, "grad_norm": 1.3299312591552734, "learning_rate": 1.4936620734642626e-05, "loss": 0.024, "step": 2418 }, { "epoch": 142.2941176470588, "grad_norm": 0.6959709525108337, "learning_rate": 1.4932478122051049e-05, "loss": 0.0128, "step": 2419 }, { "epoch": 142.35294117647058, "grad_norm": 1.0708478689193726, "learning_rate": 1.4928334390527683e-05, "loss": 0.0217, "step": 2420 }, { "epoch": 142.41176470588235, "grad_norm": 1.0908111333847046, "learning_rate": 1.4924189541012542e-05, "loss": 0.0153, "step": 2421 }, { "epoch": 142.47058823529412, "grad_norm": 1.067492961883545, "learning_rate": 1.4920043574445875e-05, "loss": 0.0231, "step": 2422 }, { "epoch": 142.52941176470588, "grad_norm": 0.9421364665031433, "learning_rate": 1.4915896491768203e-05, "loss": 0.019, "step": 2423 }, { "epoch": 142.58823529411765, "grad_norm": 0.8192776441574097, "learning_rate": 1.4911748293920281e-05, "loss": 0.009, "step": 2424 }, { "epoch": 142.64705882352942, "grad_norm": 0.7312371730804443, "learning_rate": 1.4907598981843134e-05, "loss": 0.0166, "step": 2425 }, { "epoch": 142.7058823529412, "grad_norm": 1.030854344367981, "learning_rate": 1.490344855647803e-05, "loss": 0.0369, "step": 2426 }, { "epoch": 142.76470588235293, "grad_norm": 0.7261604070663452, "learning_rate": 1.4899297018766494e-05, "loss": 0.0126, "step": 2427 }, { "epoch": 142.8235294117647, "grad_norm": 0.5750477313995361, "learning_rate": 1.4895144369650299e-05, "loss": 0.0072, "step": 2428 }, { "epoch": 142.88235294117646, "grad_norm": 1.8252246379852295, "learning_rate": 1.4890990610071473e-05, "loss": 0.017, "step": 2429 }, { "epoch": 142.94117647058823, "grad_norm": 0.8067322373390198, "learning_rate": 1.4886835740972301e-05, "loss": 0.0171, "step": 2430 }, { "epoch": 143.0, "grad_norm": 0.9744489192962646, "learning_rate": 1.4882679763295307e-05, "loss": 0.0242, "step": 2431 }, { "epoch": 143.05882352941177, "grad_norm": 0.6817010641098022, "learning_rate": 1.4878522677983277e-05, "loss": 0.0196, "step": 2432 }, { "epoch": 143.11764705882354, "grad_norm": 0.6294306516647339, "learning_rate": 1.4874364485979244e-05, "loss": 0.0205, "step": 2433 }, { "epoch": 143.1764705882353, "grad_norm": 0.8944292068481445, "learning_rate": 1.4870205188226493e-05, "loss": 0.0244, "step": 2434 }, { "epoch": 143.23529411764707, "grad_norm": 0.6377672553062439, "learning_rate": 1.4866044785668563e-05, "loss": 0.0092, "step": 2435 }, { "epoch": 143.2941176470588, "grad_norm": 0.6144760251045227, "learning_rate": 1.486188327924924e-05, "loss": 0.0109, "step": 2436 }, { "epoch": 143.35294117647058, "grad_norm": 0.8911948204040527, "learning_rate": 1.4857720669912557e-05, "loss": 0.0196, "step": 2437 }, { "epoch": 143.41176470588235, "grad_norm": 1.026855230331421, "learning_rate": 1.4853556958602803e-05, "loss": 0.0189, "step": 2438 }, { "epoch": 143.47058823529412, "grad_norm": 1.3809266090393066, "learning_rate": 1.4849392146264516e-05, "loss": 0.0188, "step": 2439 }, { "epoch": 143.52941176470588, "grad_norm": 1.1472821235656738, "learning_rate": 1.4845226233842487e-05, "loss": 0.0179, "step": 2440 }, { "epoch": 143.58823529411765, "grad_norm": 0.9018409252166748, "learning_rate": 1.4841059222281744e-05, "loss": 0.022, "step": 2441 }, { "epoch": 143.64705882352942, "grad_norm": 1.0529924631118774, "learning_rate": 1.4836891112527579e-05, "loss": 0.0198, "step": 2442 }, { "epoch": 143.7058823529412, "grad_norm": 0.7852367162704468, "learning_rate": 1.4832721905525523e-05, "loss": 0.0151, "step": 2443 }, { "epoch": 143.76470588235293, "grad_norm": 0.9998731017112732, "learning_rate": 1.4828551602221364e-05, "loss": 0.014, "step": 2444 }, { "epoch": 143.8235294117647, "grad_norm": 1.5681480169296265, "learning_rate": 1.4824380203561133e-05, "loss": 0.0259, "step": 2445 }, { "epoch": 143.88235294117646, "grad_norm": 0.745109498500824, "learning_rate": 1.4820207710491107e-05, "loss": 0.0219, "step": 2446 }, { "epoch": 143.94117647058823, "grad_norm": 0.7057374715805054, "learning_rate": 1.4816034123957823e-05, "loss": 0.0103, "step": 2447 }, { "epoch": 144.0, "grad_norm": 0.7940883040428162, "learning_rate": 1.4811859444908053e-05, "loss": 0.017, "step": 2448 }, { "epoch": 144.05882352941177, "grad_norm": 0.8482946753501892, "learning_rate": 1.4807683674288822e-05, "loss": 0.0235, "step": 2449 }, { "epoch": 144.11764705882354, "grad_norm": 0.7322207689285278, "learning_rate": 1.4803506813047403e-05, "loss": 0.0147, "step": 2450 }, { "epoch": 144.1764705882353, "grad_norm": 0.6845687627792358, "learning_rate": 1.4799328862131322e-05, "loss": 0.0183, "step": 2451 }, { "epoch": 144.23529411764707, "grad_norm": 0.6845759153366089, "learning_rate": 1.4795149822488337e-05, "loss": 0.0244, "step": 2452 }, { "epoch": 144.2941176470588, "grad_norm": 0.816426157951355, "learning_rate": 1.4790969695066471e-05, "loss": 0.012, "step": 2453 }, { "epoch": 144.35294117647058, "grad_norm": 0.669400155544281, "learning_rate": 1.4786788480813981e-05, "loss": 0.0116, "step": 2454 }, { "epoch": 144.41176470588235, "grad_norm": 0.8231154084205627, "learning_rate": 1.4782606180679373e-05, "loss": 0.0137, "step": 2455 }, { "epoch": 144.47058823529412, "grad_norm": 0.6849294900894165, "learning_rate": 1.4778422795611404e-05, "loss": 0.0206, "step": 2456 }, { "epoch": 144.52941176470588, "grad_norm": 0.9358375072479248, "learning_rate": 1.4774238326559074e-05, "loss": 0.0221, "step": 2457 }, { "epoch": 144.58823529411765, "grad_norm": 0.9159533977508545, "learning_rate": 1.477005277447163e-05, "loss": 0.0129, "step": 2458 }, { "epoch": 144.64705882352942, "grad_norm": 0.596775233745575, "learning_rate": 1.4765866140298559e-05, "loss": 0.0108, "step": 2459 }, { "epoch": 144.7058823529412, "grad_norm": 0.6753364205360413, "learning_rate": 1.4761678424989606e-05, "loss": 0.012, "step": 2460 }, { "epoch": 144.76470588235293, "grad_norm": 0.9291605949401855, "learning_rate": 1.4757489629494743e-05, "loss": 0.0201, "step": 2461 }, { "epoch": 144.8235294117647, "grad_norm": 0.7210007309913635, "learning_rate": 1.4753299754764207e-05, "loss": 0.0197, "step": 2462 }, { "epoch": 144.88235294117646, "grad_norm": 0.9474327564239502, "learning_rate": 1.4749108801748465e-05, "loss": 0.0316, "step": 2463 }, { "epoch": 144.94117647058823, "grad_norm": 0.8228884339332581, "learning_rate": 1.4744916771398237e-05, "loss": 0.0123, "step": 2464 }, { "epoch": 145.0, "grad_norm": 1.1405344009399414, "learning_rate": 1.4740723664664483e-05, "loss": 0.0179, "step": 2465 }, { "epoch": 145.05882352941177, "grad_norm": 0.7112517952919006, "learning_rate": 1.4736529482498408e-05, "loss": 0.0151, "step": 2466 }, { "epoch": 145.11764705882354, "grad_norm": 0.5657258629798889, "learning_rate": 1.4732334225851462e-05, "loss": 0.0158, "step": 2467 }, { "epoch": 145.1764705882353, "grad_norm": 1.5432109832763672, "learning_rate": 1.4728137895675339e-05, "loss": 0.0183, "step": 2468 }, { "epoch": 145.23529411764707, "grad_norm": 0.6192536950111389, "learning_rate": 1.4723940492921971e-05, "loss": 0.0157, "step": 2469 }, { "epoch": 145.2941176470588, "grad_norm": 0.39468222856521606, "learning_rate": 1.4719742018543546e-05, "loss": 0.0092, "step": 2470 }, { "epoch": 145.35294117647058, "grad_norm": 0.7373402118682861, "learning_rate": 1.4715542473492486e-05, "loss": 0.0172, "step": 2471 }, { "epoch": 145.41176470588235, "grad_norm": 1.4026315212249756, "learning_rate": 1.4711341858721448e-05, "loss": 0.0373, "step": 2472 }, { "epoch": 145.47058823529412, "grad_norm": 1.1830016374588013, "learning_rate": 1.470714017518335e-05, "loss": 0.0261, "step": 2473 }, { "epoch": 145.52941176470588, "grad_norm": 0.5680297017097473, "learning_rate": 1.470293742383134e-05, "loss": 0.0103, "step": 2474 }, { "epoch": 145.58823529411765, "grad_norm": 0.8594658374786377, "learning_rate": 1.4698733605618812e-05, "loss": 0.0127, "step": 2475 }, { "epoch": 145.64705882352942, "grad_norm": 1.2306784391403198, "learning_rate": 1.46945287214994e-05, "loss": 0.0175, "step": 2476 }, { "epoch": 145.7058823529412, "grad_norm": 0.5280370712280273, "learning_rate": 1.469032277242698e-05, "loss": 0.0076, "step": 2477 }, { "epoch": 145.76470588235293, "grad_norm": 0.9632818698883057, "learning_rate": 1.4686115759355677e-05, "loss": 0.028, "step": 2478 }, { "epoch": 145.8235294117647, "grad_norm": 0.8522371649742126, "learning_rate": 1.4681907683239847e-05, "loss": 0.0155, "step": 2479 }, { "epoch": 145.88235294117646, "grad_norm": 1.128356695175171, "learning_rate": 1.4677698545034087e-05, "loss": 0.0115, "step": 2480 }, { "epoch": 145.94117647058823, "grad_norm": 1.4268742799758911, "learning_rate": 1.4673488345693248e-05, "loss": 0.0144, "step": 2481 }, { "epoch": 146.0, "grad_norm": 0.9622751474380493, "learning_rate": 1.4669277086172406e-05, "loss": 0.0242, "step": 2482 }, { "epoch": 146.05882352941177, "grad_norm": 1.167341947555542, "learning_rate": 1.4665064767426891e-05, "loss": 0.0107, "step": 2483 }, { "epoch": 146.11764705882354, "grad_norm": 2.2065532207489014, "learning_rate": 1.466085139041226e-05, "loss": 0.0248, "step": 2484 }, { "epoch": 146.1764705882353, "grad_norm": 1.4466971158981323, "learning_rate": 1.465663695608432e-05, "loss": 0.0133, "step": 2485 }, { "epoch": 146.23529411764707, "grad_norm": 1.7094690799713135, "learning_rate": 1.4652421465399115e-05, "loss": 0.0234, "step": 2486 }, { "epoch": 146.2941176470588, "grad_norm": 1.1253656148910522, "learning_rate": 1.4648204919312927e-05, "loss": 0.024, "step": 2487 }, { "epoch": 146.35294117647058, "grad_norm": 0.8639570474624634, "learning_rate": 1.464398731878228e-05, "loss": 0.015, "step": 2488 }, { "epoch": 146.41176470588235, "grad_norm": 4.216536998748779, "learning_rate": 1.4639768664763933e-05, "loss": 0.0204, "step": 2489 }, { "epoch": 146.47058823529412, "grad_norm": 0.6207290291786194, "learning_rate": 1.4635548958214894e-05, "loss": 0.0132, "step": 2490 }, { "epoch": 146.52941176470588, "grad_norm": 1.1786996126174927, "learning_rate": 1.4631328200092394e-05, "loss": 0.0232, "step": 2491 }, { "epoch": 146.58823529411765, "grad_norm": 0.7942230105400085, "learning_rate": 1.4627106391353918e-05, "loss": 0.01, "step": 2492 }, { "epoch": 146.64705882352942, "grad_norm": 1.9083677530288696, "learning_rate": 1.4622883532957178e-05, "loss": 0.0325, "step": 2493 }, { "epoch": 146.7058823529412, "grad_norm": 0.8728276491165161, "learning_rate": 1.461865962586013e-05, "loss": 0.0217, "step": 2494 }, { "epoch": 146.76470588235293, "grad_norm": 0.7326855659484863, "learning_rate": 1.4614434671020968e-05, "loss": 0.0175, "step": 2495 }, { "epoch": 146.8235294117647, "grad_norm": 0.7693637609481812, "learning_rate": 1.461020866939812e-05, "loss": 0.0153, "step": 2496 }, { "epoch": 146.88235294117646, "grad_norm": 1.4881869554519653, "learning_rate": 1.4605981621950259e-05, "loss": 0.0153, "step": 2497 }, { "epoch": 146.94117647058823, "grad_norm": 1.2520513534545898, "learning_rate": 1.4601753529636283e-05, "loss": 0.0152, "step": 2498 }, { "epoch": 147.0, "grad_norm": 0.5420241951942444, "learning_rate": 1.4597524393415336e-05, "loss": 0.0065, "step": 2499 }, { "epoch": 147.05882352941177, "grad_norm": 1.19541597366333, "learning_rate": 1.4593294214246798e-05, "loss": 0.0255, "step": 2500 }, { "epoch": 147.11764705882354, "grad_norm": 0.990162193775177, "learning_rate": 1.4589062993090286e-05, "loss": 0.0317, "step": 2501 }, { "epoch": 147.1764705882353, "grad_norm": 0.9598802924156189, "learning_rate": 1.4584830730905645e-05, "loss": 0.0107, "step": 2502 }, { "epoch": 147.23529411764707, "grad_norm": 1.077921986579895, "learning_rate": 1.4580597428652974e-05, "loss": 0.0177, "step": 2503 }, { "epoch": 147.2941176470588, "grad_norm": 0.8174329996109009, "learning_rate": 1.4576363087292587e-05, "loss": 0.0251, "step": 2504 }, { "epoch": 147.35294117647058, "grad_norm": 0.3753868341445923, "learning_rate": 1.4572127707785052e-05, "loss": 0.0089, "step": 2505 }, { "epoch": 147.41176470588235, "grad_norm": 1.0430512428283691, "learning_rate": 1.4567891291091152e-05, "loss": 0.0319, "step": 2506 }, { "epoch": 147.47058823529412, "grad_norm": 2.1973695755004883, "learning_rate": 1.4563653838171931e-05, "loss": 0.0149, "step": 2507 }, { "epoch": 147.52941176470588, "grad_norm": 1.1704250574111938, "learning_rate": 1.4559415349988643e-05, "loss": 0.0132, "step": 2508 }, { "epoch": 147.58823529411765, "grad_norm": 0.7036792635917664, "learning_rate": 1.45551758275028e-05, "loss": 0.0105, "step": 2509 }, { "epoch": 147.64705882352942, "grad_norm": 0.6536076664924622, "learning_rate": 1.4550935271676125e-05, "loss": 0.0123, "step": 2510 }, { "epoch": 147.7058823529412, "grad_norm": 0.8814238905906677, "learning_rate": 1.4546693683470594e-05, "loss": 0.0222, "step": 2511 }, { "epoch": 147.76470588235293, "grad_norm": 1.2742444276809692, "learning_rate": 1.4542451063848411e-05, "loss": 0.0122, "step": 2512 }, { "epoch": 147.8235294117647, "grad_norm": 0.8455948233604431, "learning_rate": 1.453820741377201e-05, "loss": 0.0152, "step": 2513 }, { "epoch": 147.88235294117646, "grad_norm": 0.967862069606781, "learning_rate": 1.4533962734204065e-05, "loss": 0.0315, "step": 2514 }, { "epoch": 147.94117647058823, "grad_norm": 0.942732572555542, "learning_rate": 1.4529717026107477e-05, "loss": 0.0153, "step": 2515 }, { "epoch": 148.0, "grad_norm": 1.7978637218475342, "learning_rate": 1.4525470290445392e-05, "loss": 0.0135, "step": 2516 }, { "epoch": 148.05882352941177, "grad_norm": 0.6104710102081299, "learning_rate": 1.4521222528181171e-05, "loss": 0.0177, "step": 2517 }, { "epoch": 148.11764705882354, "grad_norm": 1.0597891807556152, "learning_rate": 1.4516973740278428e-05, "loss": 0.0304, "step": 2518 }, { "epoch": 148.1764705882353, "grad_norm": 1.0342826843261719, "learning_rate": 1.4512723927700991e-05, "loss": 0.0187, "step": 2519 }, { "epoch": 148.23529411764707, "grad_norm": 1.611132025718689, "learning_rate": 1.4508473091412935e-05, "loss": 0.0184, "step": 2520 }, { "epoch": 148.2941176470588, "grad_norm": 0.7938459515571594, "learning_rate": 1.450422123237856e-05, "loss": 0.0103, "step": 2521 }, { "epoch": 148.35294117647058, "grad_norm": 0.8087086081504822, "learning_rate": 1.44999683515624e-05, "loss": 0.0141, "step": 2522 }, { "epoch": 148.41176470588235, "grad_norm": 0.8983964920043945, "learning_rate": 1.4495714449929217e-05, "loss": 0.0181, "step": 2523 }, { "epoch": 148.47058823529412, "grad_norm": 1.010948896408081, "learning_rate": 1.4491459528444011e-05, "loss": 0.0136, "step": 2524 }, { "epoch": 148.52941176470588, "grad_norm": 2.8101277351379395, "learning_rate": 1.4487203588072011e-05, "loss": 0.0275, "step": 2525 }, { "epoch": 148.58823529411765, "grad_norm": 2.8566532135009766, "learning_rate": 1.4482946629778673e-05, "loss": 0.0297, "step": 2526 }, { "epoch": 148.64705882352942, "grad_norm": 2.2591419219970703, "learning_rate": 1.4478688654529692e-05, "loss": 0.0229, "step": 2527 }, { "epoch": 148.7058823529412, "grad_norm": 1.0162231922149658, "learning_rate": 1.4474429663290983e-05, "loss": 0.0253, "step": 2528 }, { "epoch": 148.76470588235293, "grad_norm": 1.292468786239624, "learning_rate": 1.4470169657028703e-05, "loss": 0.0206, "step": 2529 }, { "epoch": 148.8235294117647, "grad_norm": 0.9442926645278931, "learning_rate": 1.4465908636709229e-05, "loss": 0.0103, "step": 2530 }, { "epoch": 148.88235294117646, "grad_norm": 0.9952703714370728, "learning_rate": 1.4461646603299176e-05, "loss": 0.0167, "step": 2531 }, { "epoch": 148.94117647058823, "grad_norm": 1.2462540864944458, "learning_rate": 1.4457383557765385e-05, "loss": 0.0129, "step": 2532 }, { "epoch": 149.0, "grad_norm": 1.0081113576889038, "learning_rate": 1.4453119501074924e-05, "loss": 0.0231, "step": 2533 }, { "epoch": 149.05882352941177, "grad_norm": 1.1796067953109741, "learning_rate": 1.4448854434195098e-05, "loss": 0.0306, "step": 2534 }, { "epoch": 149.11764705882354, "grad_norm": 0.9413060545921326, "learning_rate": 1.4444588358093432e-05, "loss": 0.0149, "step": 2535 }, { "epoch": 149.1764705882353, "grad_norm": 1.4609194993972778, "learning_rate": 1.444032127373769e-05, "loss": 0.0257, "step": 2536 }, { "epoch": 149.23529411764707, "grad_norm": 5.667477607727051, "learning_rate": 1.4436053182095853e-05, "loss": 0.0153, "step": 2537 }, { "epoch": 149.2941176470588, "grad_norm": 0.640769898891449, "learning_rate": 1.4431784084136144e-05, "loss": 0.0096, "step": 2538 }, { "epoch": 149.35294117647058, "grad_norm": 1.2995190620422363, "learning_rate": 1.4427513980827001e-05, "loss": 0.0317, "step": 2539 }, { "epoch": 149.41176470588235, "grad_norm": 1.1982918977737427, "learning_rate": 1.44232428731371e-05, "loss": 0.0102, "step": 2540 }, { "epoch": 149.47058823529412, "grad_norm": 3.0766897201538086, "learning_rate": 1.4418970762035337e-05, "loss": 0.0454, "step": 2541 }, { "epoch": 149.52941176470588, "grad_norm": 1.9282268285751343, "learning_rate": 1.4414697648490848e-05, "loss": 0.0297, "step": 2542 }, { "epoch": 149.58823529411765, "grad_norm": 1.0527747869491577, "learning_rate": 1.4410423533472976e-05, "loss": 0.0111, "step": 2543 }, { "epoch": 149.64705882352942, "grad_norm": 1.3157973289489746, "learning_rate": 1.4406148417951315e-05, "loss": 0.015, "step": 2544 }, { "epoch": 149.7058823529412, "grad_norm": 0.7240288853645325, "learning_rate": 1.4401872302895665e-05, "loss": 0.0097, "step": 2545 }, { "epoch": 149.76470588235293, "grad_norm": 1.6529945135116577, "learning_rate": 1.4397595189276069e-05, "loss": 0.0172, "step": 2546 }, { "epoch": 149.8235294117647, "grad_norm": 0.9773793816566467, "learning_rate": 1.4393317078062783e-05, "loss": 0.0109, "step": 2547 }, { "epoch": 149.88235294117646, "grad_norm": 1.560721755027771, "learning_rate": 1.4389037970226299e-05, "loss": 0.0239, "step": 2548 }, { "epoch": 149.94117647058823, "grad_norm": 1.9321870803833008, "learning_rate": 1.4384757866737333e-05, "loss": 0.0158, "step": 2549 }, { "epoch": 150.0, "grad_norm": 1.4772518873214722, "learning_rate": 1.4380476768566825e-05, "loss": 0.0231, "step": 2550 }, { "epoch": 150.05882352941177, "grad_norm": 1.4245909452438354, "learning_rate": 1.437619467668594e-05, "loss": 0.0294, "step": 2551 }, { "epoch": 150.11764705882354, "grad_norm": 1.5190021991729736, "learning_rate": 1.437191159206607e-05, "loss": 0.0119, "step": 2552 }, { "epoch": 150.1764705882353, "grad_norm": 1.005365014076233, "learning_rate": 1.4367627515678835e-05, "loss": 0.0272, "step": 2553 }, { "epoch": 150.23529411764707, "grad_norm": 1.0695236921310425, "learning_rate": 1.436334244849607e-05, "loss": 0.0105, "step": 2554 }, { "epoch": 150.2941176470588, "grad_norm": 1.5704277753829956, "learning_rate": 1.435905639148985e-05, "loss": 0.0305, "step": 2555 }, { "epoch": 150.35294117647058, "grad_norm": 1.2663191556930542, "learning_rate": 1.435476934563246e-05, "loss": 0.0197, "step": 2556 }, { "epoch": 150.41176470588235, "grad_norm": 4.768337249755859, "learning_rate": 1.4350481311896422e-05, "loss": 0.027, "step": 2557 }, { "epoch": 150.47058823529412, "grad_norm": 0.7781479954719543, "learning_rate": 1.434619229125447e-05, "loss": 0.0203, "step": 2558 }, { "epoch": 150.52941176470588, "grad_norm": 1.31853187084198, "learning_rate": 1.4341902284679567e-05, "loss": 0.0317, "step": 2559 }, { "epoch": 150.58823529411765, "grad_norm": 0.6796051859855652, "learning_rate": 1.4337611293144908e-05, "loss": 0.0098, "step": 2560 }, { "epoch": 150.64705882352942, "grad_norm": 1.6231820583343506, "learning_rate": 1.4333319317623895e-05, "loss": 0.0253, "step": 2561 }, { "epoch": 150.7058823529412, "grad_norm": 1.6833243370056152, "learning_rate": 1.4329026359090166e-05, "loss": 0.0242, "step": 2562 }, { "epoch": 150.76470588235293, "grad_norm": 1.3155169486999512, "learning_rate": 1.4324732418517574e-05, "loss": 0.0105, "step": 2563 }, { "epoch": 150.8235294117647, "grad_norm": 1.3250757455825806, "learning_rate": 1.4320437496880207e-05, "loss": 0.0132, "step": 2564 }, { "epoch": 150.88235294117646, "grad_norm": 2.795323133468628, "learning_rate": 1.4316141595152358e-05, "loss": 0.0263, "step": 2565 }, { "epoch": 150.94117647058823, "grad_norm": 1.7598048448562622, "learning_rate": 1.4311844714308556e-05, "loss": 0.0144, "step": 2566 }, { "epoch": 151.0, "grad_norm": 1.6238996982574463, "learning_rate": 1.4307546855323549e-05, "loss": 0.0335, "step": 2567 }, { "epoch": 151.05882352941177, "grad_norm": 0.9615972638130188, "learning_rate": 1.4303248019172303e-05, "loss": 0.016, "step": 2568 }, { "epoch": 151.11764705882354, "grad_norm": 1.9119775295257568, "learning_rate": 1.4298948206830005e-05, "loss": 0.0212, "step": 2569 }, { "epoch": 151.1764705882353, "grad_norm": 1.3115767240524292, "learning_rate": 1.4294647419272077e-05, "loss": 0.0212, "step": 2570 }, { "epoch": 151.23529411764707, "grad_norm": 1.1611660718917847, "learning_rate": 1.4290345657474142e-05, "loss": 0.0393, "step": 2571 }, { "epoch": 151.2941176470588, "grad_norm": 1.0260381698608398, "learning_rate": 1.4286042922412058e-05, "loss": 0.0114, "step": 2572 }, { "epoch": 151.35294117647058, "grad_norm": 1.8308665752410889, "learning_rate": 1.4281739215061898e-05, "loss": 0.0316, "step": 2573 }, { "epoch": 151.41176470588235, "grad_norm": 5.866944313049316, "learning_rate": 1.427743453639996e-05, "loss": 0.0373, "step": 2574 }, { "epoch": 151.47058823529412, "grad_norm": 4.1077728271484375, "learning_rate": 1.4273128887402758e-05, "loss": 0.0207, "step": 2575 }, { "epoch": 151.52941176470588, "grad_norm": 1.352608323097229, "learning_rate": 1.4268822269047027e-05, "loss": 0.0243, "step": 2576 }, { "epoch": 151.58823529411765, "grad_norm": 1.1142431497573853, "learning_rate": 1.4264514682309722e-05, "loss": 0.0166, "step": 2577 }, { "epoch": 151.64705882352942, "grad_norm": 1.3598427772521973, "learning_rate": 1.4260206128168019e-05, "loss": 0.0257, "step": 2578 }, { "epoch": 151.7058823529412, "grad_norm": 0.9399355053901672, "learning_rate": 1.4255896607599318e-05, "loss": 0.021, "step": 2579 }, { "epoch": 151.76470588235293, "grad_norm": 3.271914482116699, "learning_rate": 1.4251586121581223e-05, "loss": 0.0274, "step": 2580 }, { "epoch": 151.8235294117647, "grad_norm": 4.4671173095703125, "learning_rate": 1.4247274671091575e-05, "loss": 0.0206, "step": 2581 }, { "epoch": 151.88235294117646, "grad_norm": 8.772424697875977, "learning_rate": 1.4242962257108422e-05, "loss": 0.0439, "step": 2582 }, { "epoch": 151.94117647058823, "grad_norm": 2.0691332817077637, "learning_rate": 1.4238648880610035e-05, "loss": 0.0167, "step": 2583 }, { "epoch": 152.0, "grad_norm": 1.0402287244796753, "learning_rate": 1.4234334542574906e-05, "loss": 0.015, "step": 2584 }, { "epoch": 152.05882352941177, "grad_norm": 3.712512969970703, "learning_rate": 1.4230019243981735e-05, "loss": 0.0412, "step": 2585 }, { "epoch": 152.11764705882354, "grad_norm": 14.13194465637207, "learning_rate": 1.422570298580945e-05, "loss": 0.0862, "step": 2586 }, { "epoch": 152.1764705882353, "grad_norm": 1.8410099744796753, "learning_rate": 1.4221385769037198e-05, "loss": 0.0153, "step": 2587 }, { "epoch": 152.23529411764707, "grad_norm": 2.822044849395752, "learning_rate": 1.4217067594644332e-05, "loss": 0.0187, "step": 2588 }, { "epoch": 152.2941176470588, "grad_norm": 1.7892789840698242, "learning_rate": 1.421274846361043e-05, "loss": 0.0215, "step": 2589 }, { "epoch": 152.35294117647058, "grad_norm": 8.576443672180176, "learning_rate": 1.4208428376915293e-05, "loss": 0.0883, "step": 2590 }, { "epoch": 152.41176470588235, "grad_norm": 7.637890338897705, "learning_rate": 1.4204107335538923e-05, "loss": 0.0832, "step": 2591 }, { "epoch": 152.47058823529412, "grad_norm": 4.95609188079834, "learning_rate": 1.4199785340461554e-05, "loss": 0.0541, "step": 2592 }, { "epoch": 152.52941176470588, "grad_norm": 5.110592365264893, "learning_rate": 1.4195462392663623e-05, "loss": 0.0434, "step": 2593 }, { "epoch": 152.58823529411765, "grad_norm": 1.4727771282196045, "learning_rate": 1.41911384931258e-05, "loss": 0.0271, "step": 2594 }, { "epoch": 152.64705882352942, "grad_norm": 5.29982852935791, "learning_rate": 1.4186813642828952e-05, "loss": 0.0486, "step": 2595 }, { "epoch": 152.7058823529412, "grad_norm": 10.313112258911133, "learning_rate": 1.4182487842754173e-05, "loss": 0.0472, "step": 2596 }, { "epoch": 152.76470588235293, "grad_norm": 7.860767364501953, "learning_rate": 1.4178161093882775e-05, "loss": 0.055, "step": 2597 }, { "epoch": 152.8235294117647, "grad_norm": 3.063941240310669, "learning_rate": 1.4173833397196268e-05, "loss": 0.0294, "step": 2598 }, { "epoch": 152.88235294117646, "grad_norm": 5.754735946655273, "learning_rate": 1.4169504753676403e-05, "loss": 0.0338, "step": 2599 }, { "epoch": 152.94117647058823, "grad_norm": 3.1891074180603027, "learning_rate": 1.4165175164305121e-05, "loss": 0.043, "step": 2600 }, { "epoch": 153.0, "grad_norm": 4.255948543548584, "learning_rate": 1.4160844630064596e-05, "loss": 0.0376, "step": 2601 }, { "epoch": 153.05882352941177, "grad_norm": 4.67631721496582, "learning_rate": 1.4156513151937207e-05, "loss": 0.0435, "step": 2602 }, { "epoch": 153.11764705882354, "grad_norm": 6.258145809173584, "learning_rate": 1.4152180730905549e-05, "loss": 0.0455, "step": 2603 }, { "epoch": 153.1764705882353, "grad_norm": 2.6439177989959717, "learning_rate": 1.4147847367952427e-05, "loss": 0.0253, "step": 2604 }, { "epoch": 153.23529411764707, "grad_norm": 2.29438853263855, "learning_rate": 1.4143513064060871e-05, "loss": 0.0373, "step": 2605 }, { "epoch": 153.2941176470588, "grad_norm": 3.1101605892181396, "learning_rate": 1.4139177820214111e-05, "loss": 0.0453, "step": 2606 }, { "epoch": 153.35294117647058, "grad_norm": 3.6333186626434326, "learning_rate": 1.41348416373956e-05, "loss": 0.0303, "step": 2607 }, { "epoch": 153.41176470588235, "grad_norm": 3.2813899517059326, "learning_rate": 1.4130504516588997e-05, "loss": 0.0238, "step": 2608 }, { "epoch": 153.47058823529412, "grad_norm": 2.4605400562286377, "learning_rate": 1.4126166458778178e-05, "loss": 0.0295, "step": 2609 }, { "epoch": 153.52941176470588, "grad_norm": 2.9408857822418213, "learning_rate": 1.4121827464947233e-05, "loss": 0.0305, "step": 2610 }, { "epoch": 153.58823529411765, "grad_norm": 5.361012935638428, "learning_rate": 1.411748753608046e-05, "loss": 0.0406, "step": 2611 }, { "epoch": 153.64705882352942, "grad_norm": 6.952696800231934, "learning_rate": 1.4113146673162375e-05, "loss": 0.0374, "step": 2612 }, { "epoch": 153.7058823529412, "grad_norm": 2.0595924854278564, "learning_rate": 1.4108804877177693e-05, "loss": 0.0179, "step": 2613 }, { "epoch": 153.76470588235293, "grad_norm": 2.6884145736694336, "learning_rate": 1.4104462149111358e-05, "loss": 0.0322, "step": 2614 }, { "epoch": 153.8235294117647, "grad_norm": 3.3381400108337402, "learning_rate": 1.4100118489948514e-05, "loss": 0.0406, "step": 2615 }, { "epoch": 153.88235294117646, "grad_norm": 2.4846370220184326, "learning_rate": 1.4095773900674518e-05, "loss": 0.0481, "step": 2616 }, { "epoch": 153.94117647058823, "grad_norm": 7.557905673980713, "learning_rate": 1.4091428382274941e-05, "loss": 0.0568, "step": 2617 }, { "epoch": 154.0, "grad_norm": 2.197690963745117, "learning_rate": 1.4087081935735565e-05, "loss": 0.0239, "step": 2618 }, { "epoch": 154.05882352941177, "grad_norm": 1.9330596923828125, "learning_rate": 1.4082734562042374e-05, "loss": 0.0378, "step": 2619 }, { "epoch": 154.11764705882354, "grad_norm": 5.574730396270752, "learning_rate": 1.4078386262181579e-05, "loss": 0.0537, "step": 2620 }, { "epoch": 154.1764705882353, "grad_norm": 2.8541667461395264, "learning_rate": 1.4074037037139583e-05, "loss": 0.0281, "step": 2621 }, { "epoch": 154.23529411764707, "grad_norm": 4.139089107513428, "learning_rate": 1.4069686887903009e-05, "loss": 0.034, "step": 2622 }, { "epoch": 154.2941176470588, "grad_norm": 2.299652099609375, "learning_rate": 1.4065335815458688e-05, "loss": 0.0212, "step": 2623 }, { "epoch": 154.35294117647058, "grad_norm": 7.861202716827393, "learning_rate": 1.406098382079366e-05, "loss": 0.0426, "step": 2624 }, { "epoch": 154.41176470588235, "grad_norm": 2.520214080810547, "learning_rate": 1.4056630904895176e-05, "loss": 0.048, "step": 2625 }, { "epoch": 154.47058823529412, "grad_norm": 2.8597724437713623, "learning_rate": 1.4052277068750688e-05, "loss": 0.0386, "step": 2626 }, { "epoch": 154.52941176470588, "grad_norm": 4.74727725982666, "learning_rate": 1.4047922313347872e-05, "loss": 0.0433, "step": 2627 }, { "epoch": 154.58823529411765, "grad_norm": 3.9022841453552246, "learning_rate": 1.4043566639674597e-05, "loss": 0.0381, "step": 2628 }, { "epoch": 154.64705882352942, "grad_norm": 3.615713357925415, "learning_rate": 1.403921004871895e-05, "loss": 0.0396, "step": 2629 }, { "epoch": 154.7058823529412, "grad_norm": 2.673121929168701, "learning_rate": 1.4034852541469222e-05, "loss": 0.0366, "step": 2630 }, { "epoch": 154.76470588235293, "grad_norm": 7.147080421447754, "learning_rate": 1.4030494118913911e-05, "loss": 0.0614, "step": 2631 }, { "epoch": 154.8235294117647, "grad_norm": 2.851715326309204, "learning_rate": 1.4026134782041728e-05, "loss": 0.0521, "step": 2632 }, { "epoch": 154.88235294117646, "grad_norm": 4.070821285247803, "learning_rate": 1.4021774531841582e-05, "loss": 0.0519, "step": 2633 }, { "epoch": 154.94117647058823, "grad_norm": 4.142993927001953, "learning_rate": 1.4017413369302599e-05, "loss": 0.0487, "step": 2634 }, { "epoch": 155.0, "grad_norm": 2.3893661499023438, "learning_rate": 1.4013051295414108e-05, "loss": 0.0502, "step": 2635 }, { "epoch": 155.05882352941177, "grad_norm": 2.2156808376312256, "learning_rate": 1.4008688311165646e-05, "loss": 0.0372, "step": 2636 }, { "epoch": 155.11764705882354, "grad_norm": 3.637726306915283, "learning_rate": 1.4004324417546951e-05, "loss": 0.0366, "step": 2637 }, { "epoch": 155.1764705882353, "grad_norm": 2.881361961364746, "learning_rate": 1.3999959615547973e-05, "loss": 0.042, "step": 2638 }, { "epoch": 155.23529411764707, "grad_norm": 2.0819406509399414, "learning_rate": 1.3995593906158869e-05, "loss": 0.029, "step": 2639 }, { "epoch": 155.2941176470588, "grad_norm": 3.339632749557495, "learning_rate": 1.3991227290369997e-05, "loss": 0.0311, "step": 2640 }, { "epoch": 155.35294117647058, "grad_norm": 3.135173797607422, "learning_rate": 1.3986859769171925e-05, "loss": 0.0333, "step": 2641 }, { "epoch": 155.41176470588235, "grad_norm": 3.384634494781494, "learning_rate": 1.3982491343555422e-05, "loss": 0.0221, "step": 2642 }, { "epoch": 155.47058823529412, "grad_norm": 1.796352505683899, "learning_rate": 1.3978122014511466e-05, "loss": 0.0244, "step": 2643 }, { "epoch": 155.52941176470588, "grad_norm": 2.135953903198242, "learning_rate": 1.397375178303124e-05, "loss": 0.0255, "step": 2644 }, { "epoch": 155.58823529411765, "grad_norm": 1.6665667295455933, "learning_rate": 1.3969380650106128e-05, "loss": 0.0312, "step": 2645 }, { "epoch": 155.64705882352942, "grad_norm": 5.652390956878662, "learning_rate": 1.3965008616727719e-05, "loss": 0.0541, "step": 2646 }, { "epoch": 155.7058823529412, "grad_norm": 2.2237725257873535, "learning_rate": 1.3960635683887814e-05, "loss": 0.033, "step": 2647 }, { "epoch": 155.76470588235293, "grad_norm": 2.600339412689209, "learning_rate": 1.3956261852578403e-05, "loss": 0.0361, "step": 2648 }, { "epoch": 155.8235294117647, "grad_norm": 2.023329257965088, "learning_rate": 1.3951887123791699e-05, "loss": 0.0493, "step": 2649 }, { "epoch": 155.88235294117646, "grad_norm": 4.7208251953125, "learning_rate": 1.39475114985201e-05, "loss": 0.0481, "step": 2650 }, { "epoch": 155.94117647058823, "grad_norm": 1.771743893623352, "learning_rate": 1.3943134977756221e-05, "loss": 0.0215, "step": 2651 }, { "epoch": 156.0, "grad_norm": 1.7069960832595825, "learning_rate": 1.3938757562492873e-05, "loss": 0.0263, "step": 2652 }, { "epoch": 156.05882352941177, "grad_norm": 2.5601773262023926, "learning_rate": 1.3934379253723074e-05, "loss": 0.0312, "step": 2653 }, { "epoch": 156.11764705882354, "grad_norm": 1.4714374542236328, "learning_rate": 1.3930000052440037e-05, "loss": 0.0208, "step": 2654 }, { "epoch": 156.1764705882353, "grad_norm": 3.2979960441589355, "learning_rate": 1.3925619959637188e-05, "loss": 0.0218, "step": 2655 }, { "epoch": 156.23529411764707, "grad_norm": 2.099144458770752, "learning_rate": 1.3921238976308151e-05, "loss": 0.0189, "step": 2656 }, { "epoch": 156.2941176470588, "grad_norm": 2.279995918273926, "learning_rate": 1.3916857103446751e-05, "loss": 0.0375, "step": 2657 }, { "epoch": 156.35294117647058, "grad_norm": 1.8718452453613281, "learning_rate": 1.3912474342047012e-05, "loss": 0.0361, "step": 2658 }, { "epoch": 156.41176470588235, "grad_norm": 2.833921194076538, "learning_rate": 1.390809069310316e-05, "loss": 0.0236, "step": 2659 }, { "epoch": 156.47058823529412, "grad_norm": 5.602975845336914, "learning_rate": 1.3903706157609633e-05, "loss": 0.0379, "step": 2660 }, { "epoch": 156.52941176470588, "grad_norm": 2.778040647506714, "learning_rate": 1.3899320736561058e-05, "loss": 0.0446, "step": 2661 }, { "epoch": 156.58823529411765, "grad_norm": 2.617499351501465, "learning_rate": 1.3894934430952267e-05, "loss": 0.0381, "step": 2662 }, { "epoch": 156.64705882352942, "grad_norm": 2.3762357234954834, "learning_rate": 1.3890547241778291e-05, "loss": 0.0233, "step": 2663 }, { "epoch": 156.7058823529412, "grad_norm": 1.5933496952056885, "learning_rate": 1.3886159170034365e-05, "loss": 0.0166, "step": 2664 }, { "epoch": 156.76470588235293, "grad_norm": 2.8356614112854004, "learning_rate": 1.3881770216715923e-05, "loss": 0.0387, "step": 2665 }, { "epoch": 156.8235294117647, "grad_norm": 2.158630847930908, "learning_rate": 1.38773803828186e-05, "loss": 0.0319, "step": 2666 }, { "epoch": 156.88235294117646, "grad_norm": 2.456897497177124, "learning_rate": 1.3872989669338224e-05, "loss": 0.0428, "step": 2667 }, { "epoch": 156.94117647058823, "grad_norm": 3.61529541015625, "learning_rate": 1.3868598077270829e-05, "loss": 0.0453, "step": 2668 }, { "epoch": 157.0, "grad_norm": 1.5626832246780396, "learning_rate": 1.3864205607612648e-05, "loss": 0.0349, "step": 2669 }, { "epoch": 157.05882352941177, "grad_norm": 1.9788169860839844, "learning_rate": 1.3859812261360115e-05, "loss": 0.0357, "step": 2670 }, { "epoch": 157.11764705882354, "grad_norm": 1.965840458869934, "learning_rate": 1.3855418039509857e-05, "loss": 0.0387, "step": 2671 }, { "epoch": 157.1764705882353, "grad_norm": 3.3082470893859863, "learning_rate": 1.3851022943058698e-05, "loss": 0.0239, "step": 2672 }, { "epoch": 157.23529411764707, "grad_norm": 2.277653694152832, "learning_rate": 1.3846626973003674e-05, "loss": 0.0275, "step": 2673 }, { "epoch": 157.2941176470588, "grad_norm": 2.2550454139709473, "learning_rate": 1.3842230130342e-05, "loss": 0.0235, "step": 2674 }, { "epoch": 157.35294117647058, "grad_norm": 1.5489802360534668, "learning_rate": 1.3837832416071109e-05, "loss": 0.0134, "step": 2675 }, { "epoch": 157.41176470588235, "grad_norm": 3.7825779914855957, "learning_rate": 1.3833433831188612e-05, "loss": 0.019, "step": 2676 }, { "epoch": 157.47058823529412, "grad_norm": 2.0042026042938232, "learning_rate": 1.3829034376692337e-05, "loss": 0.0348, "step": 2677 }, { "epoch": 157.52941176470588, "grad_norm": 2.071481466293335, "learning_rate": 1.3824634053580288e-05, "loss": 0.0222, "step": 2678 }, { "epoch": 157.58823529411765, "grad_norm": 1.7646123170852661, "learning_rate": 1.3820232862850689e-05, "loss": 0.0326, "step": 2679 }, { "epoch": 157.64705882352942, "grad_norm": 2.062873125076294, "learning_rate": 1.3815830805501942e-05, "loss": 0.0357, "step": 2680 }, { "epoch": 157.7058823529412, "grad_norm": 1.034178376197815, "learning_rate": 1.3811427882532656e-05, "loss": 0.0167, "step": 2681 }, { "epoch": 157.76470588235293, "grad_norm": 9.231683731079102, "learning_rate": 1.3807024094941632e-05, "loss": 0.0447, "step": 2682 }, { "epoch": 157.8235294117647, "grad_norm": 1.9725501537322998, "learning_rate": 1.380261944372787e-05, "loss": 0.0278, "step": 2683 }, { "epoch": 157.88235294117646, "grad_norm": 3.263003349304199, "learning_rate": 1.3798213929890562e-05, "loss": 0.0259, "step": 2684 }, { "epoch": 157.94117647058823, "grad_norm": 3.5356273651123047, "learning_rate": 1.37938075544291e-05, "loss": 0.0395, "step": 2685 }, { "epoch": 158.0, "grad_norm": 4.457401752471924, "learning_rate": 1.378940031834307e-05, "loss": 0.0388, "step": 2686 }, { "epoch": 158.05882352941177, "grad_norm": 1.7065651416778564, "learning_rate": 1.378499222263225e-05, "loss": 0.0158, "step": 2687 }, { "epoch": 158.11764705882354, "grad_norm": 1.7514170408248901, "learning_rate": 1.3780583268296617e-05, "loss": 0.0215, "step": 2688 }, { "epoch": 158.1764705882353, "grad_norm": 1.8636428117752075, "learning_rate": 1.3776173456336342e-05, "loss": 0.0302, "step": 2689 }, { "epoch": 158.23529411764707, "grad_norm": 1.6147632598876953, "learning_rate": 1.377176278775179e-05, "loss": 0.0276, "step": 2690 }, { "epoch": 158.2941176470588, "grad_norm": 3.1243815422058105, "learning_rate": 1.3767351263543518e-05, "loss": 0.0383, "step": 2691 }, { "epoch": 158.35294117647058, "grad_norm": 1.74599289894104, "learning_rate": 1.3762938884712285e-05, "loss": 0.0207, "step": 2692 }, { "epoch": 158.41176470588235, "grad_norm": 5.473215103149414, "learning_rate": 1.3758525652259031e-05, "loss": 0.0423, "step": 2693 }, { "epoch": 158.47058823529412, "grad_norm": 1.6382216215133667, "learning_rate": 1.3754111567184904e-05, "loss": 0.0269, "step": 2694 }, { "epoch": 158.52941176470588, "grad_norm": 2.288809299468994, "learning_rate": 1.3749696630491236e-05, "loss": 0.0303, "step": 2695 }, { "epoch": 158.58823529411765, "grad_norm": 2.2334847450256348, "learning_rate": 1.3745280843179554e-05, "loss": 0.022, "step": 2696 }, { "epoch": 158.64705882352942, "grad_norm": 2.159796714782715, "learning_rate": 1.3740864206251576e-05, "loss": 0.0272, "step": 2697 }, { "epoch": 158.7058823529412, "grad_norm": 1.2999626398086548, "learning_rate": 1.3736446720709222e-05, "loss": 0.0246, "step": 2698 }, { "epoch": 158.76470588235293, "grad_norm": 0.9585908055305481, "learning_rate": 1.373202838755459e-05, "loss": 0.0145, "step": 2699 }, { "epoch": 158.8235294117647, "grad_norm": 1.294109582901001, "learning_rate": 1.3727609207789982e-05, "loss": 0.0317, "step": 2700 }, { "epoch": 158.88235294117646, "grad_norm": 1.8881869316101074, "learning_rate": 1.372318918241789e-05, "loss": 0.0259, "step": 2701 }, { "epoch": 158.94117647058823, "grad_norm": 1.5064257383346558, "learning_rate": 1.3718768312440993e-05, "loss": 0.0396, "step": 2702 }, { "epoch": 159.0, "grad_norm": 1.9238659143447876, "learning_rate": 1.3714346598862168e-05, "loss": 0.0197, "step": 2703 }, { "epoch": 159.05882352941177, "grad_norm": 3.602651596069336, "learning_rate": 1.3709924042684473e-05, "loss": 0.0223, "step": 2704 }, { "epoch": 159.11764705882354, "grad_norm": 1.2352168560028076, "learning_rate": 1.3705500644911172e-05, "loss": 0.0316, "step": 2705 }, { "epoch": 159.1764705882353, "grad_norm": 1.5955857038497925, "learning_rate": 1.3701076406545704e-05, "loss": 0.0312, "step": 2706 }, { "epoch": 159.23529411764707, "grad_norm": 5.348022937774658, "learning_rate": 1.3696651328591715e-05, "loss": 0.0276, "step": 2707 }, { "epoch": 159.2941176470588, "grad_norm": 2.0463204383850098, "learning_rate": 1.3692225412053027e-05, "loss": 0.0157, "step": 2708 }, { "epoch": 159.35294117647058, "grad_norm": 2.3545660972595215, "learning_rate": 1.3687798657933664e-05, "loss": 0.0356, "step": 2709 }, { "epoch": 159.41176470588235, "grad_norm": 1.3475277423858643, "learning_rate": 1.3683371067237827e-05, "loss": 0.0151, "step": 2710 }, { "epoch": 159.47058823529412, "grad_norm": 1.4816513061523438, "learning_rate": 1.3678942640969921e-05, "loss": 0.0237, "step": 2711 }, { "epoch": 159.52941176470588, "grad_norm": 1.888673186302185, "learning_rate": 1.3674513380134532e-05, "loss": 0.029, "step": 2712 }, { "epoch": 159.58823529411765, "grad_norm": 1.0000749826431274, "learning_rate": 1.3670083285736432e-05, "loss": 0.0213, "step": 2713 }, { "epoch": 159.64705882352942, "grad_norm": 1.5994443893432617, "learning_rate": 1.3665652358780594e-05, "loss": 0.0254, "step": 2714 }, { "epoch": 159.7058823529412, "grad_norm": 1.4419951438903809, "learning_rate": 1.3661220600272169e-05, "loss": 0.0364, "step": 2715 }, { "epoch": 159.76470588235293, "grad_norm": 9.574315071105957, "learning_rate": 1.3656788011216503e-05, "loss": 0.0232, "step": 2716 }, { "epoch": 159.8235294117647, "grad_norm": 2.419445514678955, "learning_rate": 1.3652354592619126e-05, "loss": 0.0281, "step": 2717 }, { "epoch": 159.88235294117646, "grad_norm": 1.0435113906860352, "learning_rate": 1.364792034548576e-05, "loss": 0.0151, "step": 2718 }, { "epoch": 159.94117647058823, "grad_norm": 1.3473719358444214, "learning_rate": 1.364348527082231e-05, "loss": 0.0151, "step": 2719 }, { "epoch": 160.0, "grad_norm": 1.881758689880371, "learning_rate": 1.3639049369634878e-05, "loss": 0.0167, "step": 2720 }, { "epoch": 160.05882352941177, "grad_norm": 2.443025827407837, "learning_rate": 1.363461264292974e-05, "loss": 0.0174, "step": 2721 }, { "epoch": 160.11764705882354, "grad_norm": 2.5723602771759033, "learning_rate": 1.3630175091713372e-05, "loss": 0.0213, "step": 2722 }, { "epoch": 160.1764705882353, "grad_norm": 1.2645576000213623, "learning_rate": 1.3625736716992428e-05, "loss": 0.016, "step": 2723 }, { "epoch": 160.23529411764707, "grad_norm": 1.4193774461746216, "learning_rate": 1.3621297519773755e-05, "loss": 0.0284, "step": 2724 }, { "epoch": 160.2941176470588, "grad_norm": 1.8038058280944824, "learning_rate": 1.3616857501064384e-05, "loss": 0.0347, "step": 2725 }, { "epoch": 160.35294117647058, "grad_norm": 2.249812602996826, "learning_rate": 1.3612416661871532e-05, "loss": 0.0371, "step": 2726 }, { "epoch": 160.41176470588235, "grad_norm": 1.4189127683639526, "learning_rate": 1.3607975003202602e-05, "loss": 0.0149, "step": 2727 }, { "epoch": 160.47058823529412, "grad_norm": 0.9129979610443115, "learning_rate": 1.3603532526065184e-05, "loss": 0.0157, "step": 2728 }, { "epoch": 160.52941176470588, "grad_norm": 2.691084384918213, "learning_rate": 1.3599089231467054e-05, "loss": 0.0244, "step": 2729 }, { "epoch": 160.58823529411765, "grad_norm": 1.567769169807434, "learning_rate": 1.3594645120416167e-05, "loss": 0.0225, "step": 2730 }, { "epoch": 160.64705882352942, "grad_norm": 1.0761325359344482, "learning_rate": 1.359020019392068e-05, "loss": 0.0343, "step": 2731 }, { "epoch": 160.7058823529412, "grad_norm": 1.1428806781768799, "learning_rate": 1.3585754452988914e-05, "loss": 0.0236, "step": 2732 }, { "epoch": 160.76470588235293, "grad_norm": 5.016293525695801, "learning_rate": 1.3581307898629387e-05, "loss": 0.0289, "step": 2733 }, { "epoch": 160.8235294117647, "grad_norm": 1.1181230545043945, "learning_rate": 1.3576860531850804e-05, "loss": 0.0143, "step": 2734 }, { "epoch": 160.88235294117646, "grad_norm": 0.7407974600791931, "learning_rate": 1.3572412353662041e-05, "loss": 0.0108, "step": 2735 }, { "epoch": 160.94117647058823, "grad_norm": 1.596429705619812, "learning_rate": 1.3567963365072174e-05, "loss": 0.0214, "step": 2736 }, { "epoch": 161.0, "grad_norm": 1.2417546510696411, "learning_rate": 1.356351356709045e-05, "loss": 0.0195, "step": 2737 }, { "epoch": 161.05882352941177, "grad_norm": 1.5182114839553833, "learning_rate": 1.3559062960726309e-05, "loss": 0.0154, "step": 2738 }, { "epoch": 161.11764705882354, "grad_norm": 1.08999502658844, "learning_rate": 1.3554611546989365e-05, "loss": 0.0232, "step": 2739 }, { "epoch": 161.1764705882353, "grad_norm": 1.625569224357605, "learning_rate": 1.3550159326889426e-05, "loss": 0.0192, "step": 2740 }, { "epoch": 161.23529411764707, "grad_norm": 1.324347734451294, "learning_rate": 1.3545706301436472e-05, "loss": 0.0369, "step": 2741 }, { "epoch": 161.2941176470588, "grad_norm": 1.009265661239624, "learning_rate": 1.3541252471640678e-05, "loss": 0.0129, "step": 2742 }, { "epoch": 161.35294117647058, "grad_norm": 1.7779737710952759, "learning_rate": 1.3536797838512388e-05, "loss": 0.0168, "step": 2743 }, { "epoch": 161.41176470588235, "grad_norm": 2.708009719848633, "learning_rate": 1.3532342403062138e-05, "loss": 0.0141, "step": 2744 }, { "epoch": 161.47058823529412, "grad_norm": 0.9736712574958801, "learning_rate": 1.3527886166300641e-05, "loss": 0.0132, "step": 2745 }, { "epoch": 161.52941176470588, "grad_norm": 2.7831804752349854, "learning_rate": 1.3523429129238794e-05, "loss": 0.0328, "step": 2746 }, { "epoch": 161.58823529411765, "grad_norm": 1.345530390739441, "learning_rate": 1.3518971292887677e-05, "loss": 0.0196, "step": 2747 }, { "epoch": 161.64705882352942, "grad_norm": 0.9570181965827942, "learning_rate": 1.3514512658258545e-05, "loss": 0.0177, "step": 2748 }, { "epoch": 161.7058823529412, "grad_norm": 0.7503051161766052, "learning_rate": 1.3510053226362844e-05, "loss": 0.0229, "step": 2749 }, { "epoch": 161.76470588235293, "grad_norm": 1.6973334550857544, "learning_rate": 1.3505592998212192e-05, "loss": 0.0297, "step": 2750 }, { "epoch": 161.8235294117647, "grad_norm": 1.4961737394332886, "learning_rate": 1.3501131974818391e-05, "loss": 0.0218, "step": 2751 }, { "epoch": 161.88235294117646, "grad_norm": 1.4273942708969116, "learning_rate": 1.3496670157193425e-05, "loss": 0.0287, "step": 2752 }, { "epoch": 161.94117647058823, "grad_norm": 1.1608940362930298, "learning_rate": 1.3492207546349456e-05, "loss": 0.0164, "step": 2753 }, { "epoch": 162.0, "grad_norm": 1.1475319862365723, "learning_rate": 1.3487744143298822e-05, "loss": 0.0141, "step": 2754 }, { "epoch": 162.05882352941177, "grad_norm": 1.1283693313598633, "learning_rate": 1.3483279949054055e-05, "loss": 0.0216, "step": 2755 }, { "epoch": 162.11764705882354, "grad_norm": 1.1736931800842285, "learning_rate": 1.3478814964627846e-05, "loss": 0.0157, "step": 2756 }, { "epoch": 162.1764705882353, "grad_norm": 1.3941190242767334, "learning_rate": 1.3474349191033085e-05, "loss": 0.0187, "step": 2757 }, { "epoch": 162.23529411764707, "grad_norm": 0.7183484435081482, "learning_rate": 1.3469882629282825e-05, "loss": 0.0113, "step": 2758 }, { "epoch": 162.2941176470588, "grad_norm": 1.0165371894836426, "learning_rate": 1.3465415280390305e-05, "loss": 0.0163, "step": 2759 }, { "epoch": 162.35294117647058, "grad_norm": 0.7321991920471191, "learning_rate": 1.346094714536895e-05, "loss": 0.0125, "step": 2760 }, { "epoch": 162.41176470588235, "grad_norm": 2.5106818675994873, "learning_rate": 1.3456478225232348e-05, "loss": 0.0371, "step": 2761 }, { "epoch": 162.47058823529412, "grad_norm": 0.8273015022277832, "learning_rate": 1.3452008520994275e-05, "loss": 0.0173, "step": 2762 }, { "epoch": 162.52941176470588, "grad_norm": 1.305977463722229, "learning_rate": 1.3447538033668684e-05, "loss": 0.0202, "step": 2763 }, { "epoch": 162.58823529411765, "grad_norm": 0.8803088068962097, "learning_rate": 1.34430667642697e-05, "loss": 0.0201, "step": 2764 }, { "epoch": 162.64705882352942, "grad_norm": 0.7438651323318481, "learning_rate": 1.3438594713811635e-05, "loss": 0.0157, "step": 2765 }, { "epoch": 162.7058823529412, "grad_norm": 1.804618239402771, "learning_rate": 1.3434121883308971e-05, "loss": 0.0315, "step": 2766 }, { "epoch": 162.76470588235293, "grad_norm": 1.5567351579666138, "learning_rate": 1.3429648273776365e-05, "loss": 0.0185, "step": 2767 }, { "epoch": 162.8235294117647, "grad_norm": 0.4663073420524597, "learning_rate": 1.3425173886228664e-05, "loss": 0.0095, "step": 2768 }, { "epoch": 162.88235294117646, "grad_norm": 2.2896652221679688, "learning_rate": 1.342069872168087e-05, "loss": 0.0222, "step": 2769 }, { "epoch": 162.94117647058823, "grad_norm": 0.9813204407691956, "learning_rate": 1.3416222781148182e-05, "loss": 0.0121, "step": 2770 }, { "epoch": 163.0, "grad_norm": 1.0614805221557617, "learning_rate": 1.3411746065645961e-05, "loss": 0.0281, "step": 2771 }, { "epoch": 163.05882352941177, "grad_norm": 1.1224621534347534, "learning_rate": 1.3407268576189756e-05, "loss": 0.0111, "step": 2772 }, { "epoch": 163.11764705882354, "grad_norm": 0.8955715298652649, "learning_rate": 1.3402790313795277e-05, "loss": 0.0282, "step": 2773 }, { "epoch": 163.1764705882353, "grad_norm": 0.9558151364326477, "learning_rate": 1.339831127947842e-05, "loss": 0.0134, "step": 2774 }, { "epoch": 163.23529411764707, "grad_norm": 0.7169488072395325, "learning_rate": 1.3393831474255255e-05, "loss": 0.0177, "step": 2775 }, { "epoch": 163.2941176470588, "grad_norm": 0.8951840996742249, "learning_rate": 1.3389350899142022e-05, "loss": 0.0193, "step": 2776 }, { "epoch": 163.35294117647058, "grad_norm": 0.9346464276313782, "learning_rate": 1.3384869555155143e-05, "loss": 0.0245, "step": 2777 }, { "epoch": 163.41176470588235, "grad_norm": 1.2007051706314087, "learning_rate": 1.3380387443311203e-05, "loss": 0.0127, "step": 2778 }, { "epoch": 163.47058823529412, "grad_norm": 1.171245813369751, "learning_rate": 1.3375904564626976e-05, "loss": 0.0175, "step": 2779 }, { "epoch": 163.52941176470588, "grad_norm": 0.7282311320304871, "learning_rate": 1.3371420920119395e-05, "loss": 0.0147, "step": 2780 }, { "epoch": 163.58823529411765, "grad_norm": 1.2767366170883179, "learning_rate": 1.3366936510805581e-05, "loss": 0.0209, "step": 2781 }, { "epoch": 163.64705882352942, "grad_norm": 0.5349398851394653, "learning_rate": 1.3362451337702816e-05, "loss": 0.008, "step": 2782 }, { "epoch": 163.7058823529412, "grad_norm": 0.9311054944992065, "learning_rate": 1.3357965401828565e-05, "loss": 0.011, "step": 2783 }, { "epoch": 163.76470588235293, "grad_norm": 1.0894694328308105, "learning_rate": 1.3353478704200458e-05, "loss": 0.015, "step": 2784 }, { "epoch": 163.8235294117647, "grad_norm": 3.1673038005828857, "learning_rate": 1.3348991245836304e-05, "loss": 0.028, "step": 2785 }, { "epoch": 163.88235294117646, "grad_norm": 1.7341147661209106, "learning_rate": 1.3344503027754084e-05, "loss": 0.0233, "step": 2786 }, { "epoch": 163.94117647058823, "grad_norm": 1.0133905410766602, "learning_rate": 1.3340014050971942e-05, "loss": 0.0209, "step": 2787 }, { "epoch": 164.0, "grad_norm": 1.0623148679733276, "learning_rate": 1.3335524316508208e-05, "loss": 0.0131, "step": 2788 }, { "epoch": 164.05882352941177, "grad_norm": 0.6012938022613525, "learning_rate": 1.3331033825381376e-05, "loss": 0.0084, "step": 2789 }, { "epoch": 164.11764705882354, "grad_norm": 1.4955112934112549, "learning_rate": 1.3326542578610114e-05, "loss": 0.0134, "step": 2790 }, { "epoch": 164.1764705882353, "grad_norm": 1.0192375183105469, "learning_rate": 1.3322050577213257e-05, "loss": 0.0156, "step": 2791 }, { "epoch": 164.23529411764707, "grad_norm": 0.6092782020568848, "learning_rate": 1.3317557822209818e-05, "loss": 0.0096, "step": 2792 }, { "epoch": 164.2941176470588, "grad_norm": 0.9862825274467468, "learning_rate": 1.3313064314618978e-05, "loss": 0.0252, "step": 2793 }, { "epoch": 164.35294117647058, "grad_norm": 1.4912471771240234, "learning_rate": 1.3308570055460087e-05, "loss": 0.013, "step": 2794 }, { "epoch": 164.41176470588235, "grad_norm": 1.0085649490356445, "learning_rate": 1.3304075045752664e-05, "loss": 0.0171, "step": 2795 }, { "epoch": 164.47058823529412, "grad_norm": 0.7624896168708801, "learning_rate": 1.3299579286516409e-05, "loss": 0.0188, "step": 2796 }, { "epoch": 164.52941176470588, "grad_norm": 1.0675230026245117, "learning_rate": 1.3295082778771175e-05, "loss": 0.0203, "step": 2797 }, { "epoch": 164.58823529411765, "grad_norm": 0.6188505291938782, "learning_rate": 1.3290585523537002e-05, "loss": 0.0075, "step": 2798 }, { "epoch": 164.64705882352942, "grad_norm": 0.8079723119735718, "learning_rate": 1.328608752183409e-05, "loss": 0.0205, "step": 2799 }, { "epoch": 164.7058823529412, "grad_norm": 1.4775017499923706, "learning_rate": 1.3281588774682804e-05, "loss": 0.0281, "step": 2800 }, { "epoch": 164.76470588235293, "grad_norm": 0.8472139239311218, "learning_rate": 1.327708928310369e-05, "loss": 0.0207, "step": 2801 }, { "epoch": 164.8235294117647, "grad_norm": 1.1817822456359863, "learning_rate": 1.3272589048117452e-05, "loss": 0.0214, "step": 2802 }, { "epoch": 164.88235294117646, "grad_norm": 1.095509648323059, "learning_rate": 1.3268088070744972e-05, "loss": 0.0184, "step": 2803 }, { "epoch": 164.94117647058823, "grad_norm": 0.9766758680343628, "learning_rate": 1.3263586352007294e-05, "loss": 0.0175, "step": 2804 }, { "epoch": 165.0, "grad_norm": 0.8589317202568054, "learning_rate": 1.3259083892925633e-05, "loss": 0.0095, "step": 2805 }, { "epoch": 165.05882352941177, "grad_norm": 0.7260152101516724, "learning_rate": 1.3254580694521369e-05, "loss": 0.0103, "step": 2806 }, { "epoch": 165.11764705882354, "grad_norm": 1.2623571157455444, "learning_rate": 1.3250076757816053e-05, "loss": 0.0189, "step": 2807 }, { "epoch": 165.1764705882353, "grad_norm": 0.876175582408905, "learning_rate": 1.3245572083831402e-05, "loss": 0.0336, "step": 2808 }, { "epoch": 165.23529411764707, "grad_norm": 0.8571335673332214, "learning_rate": 1.3241066673589305e-05, "loss": 0.0117, "step": 2809 }, { "epoch": 165.2941176470588, "grad_norm": 1.0751162767410278, "learning_rate": 1.3236560528111804e-05, "loss": 0.0234, "step": 2810 }, { "epoch": 165.35294117647058, "grad_norm": 1.0334815979003906, "learning_rate": 1.3232053648421126e-05, "loss": 0.0162, "step": 2811 }, { "epoch": 165.41176470588235, "grad_norm": 1.0527253150939941, "learning_rate": 1.3227546035539652e-05, "loss": 0.02, "step": 2812 }, { "epoch": 165.47058823529412, "grad_norm": 0.8005237579345703, "learning_rate": 1.322303769048993e-05, "loss": 0.0291, "step": 2813 }, { "epoch": 165.52941176470588, "grad_norm": 0.5401599407196045, "learning_rate": 1.3218528614294686e-05, "loss": 0.0084, "step": 2814 }, { "epoch": 165.58823529411765, "grad_norm": 0.8221551775932312, "learning_rate": 1.3214018807976793e-05, "loss": 0.0133, "step": 2815 }, { "epoch": 165.64705882352942, "grad_norm": 0.7957102656364441, "learning_rate": 1.320950827255931e-05, "loss": 0.0124, "step": 2816 }, { "epoch": 165.7058823529412, "grad_norm": 1.2490839958190918, "learning_rate": 1.3204997009065443e-05, "loss": 0.0138, "step": 2817 }, { "epoch": 165.76470588235293, "grad_norm": 1.0188826322555542, "learning_rate": 1.3200485018518577e-05, "loss": 0.0114, "step": 2818 }, { "epoch": 165.8235294117647, "grad_norm": 0.714738667011261, "learning_rate": 1.319597230194225e-05, "loss": 0.0088, "step": 2819 }, { "epoch": 165.88235294117646, "grad_norm": 0.4570634961128235, "learning_rate": 1.3191458860360176e-05, "loss": 0.007, "step": 2820 }, { "epoch": 165.94117647058823, "grad_norm": 0.7143329381942749, "learning_rate": 1.3186944694796227e-05, "loss": 0.016, "step": 2821 }, { "epoch": 166.0, "grad_norm": 1.3553636074066162, "learning_rate": 1.3182429806274442e-05, "loss": 0.0155, "step": 2822 }, { "epoch": 166.05882352941177, "grad_norm": 0.6521400809288025, "learning_rate": 1.3177914195819018e-05, "loss": 0.0162, "step": 2823 }, { "epoch": 166.11764705882354, "grad_norm": 1.0539263486862183, "learning_rate": 1.3173397864454325e-05, "loss": 0.0249, "step": 2824 }, { "epoch": 166.1764705882353, "grad_norm": 1.1062151193618774, "learning_rate": 1.3168880813204895e-05, "loss": 0.0137, "step": 2825 }, { "epoch": 166.23529411764707, "grad_norm": 0.731846272945404, "learning_rate": 1.316436304309541e-05, "loss": 0.0155, "step": 2826 }, { "epoch": 166.2941176470588, "grad_norm": 1.1029728651046753, "learning_rate": 1.3159844555150734e-05, "loss": 0.0155, "step": 2827 }, { "epoch": 166.35294117647058, "grad_norm": 0.8767803907394409, "learning_rate": 1.315532535039588e-05, "loss": 0.0165, "step": 2828 }, { "epoch": 166.41176470588235, "grad_norm": 0.6268046498298645, "learning_rate": 1.3150805429856032e-05, "loss": 0.0107, "step": 2829 }, { "epoch": 166.47058823529412, "grad_norm": 0.5701814293861389, "learning_rate": 1.3146284794556535e-05, "loss": 0.0171, "step": 2830 }, { "epoch": 166.52941176470588, "grad_norm": 0.5937273502349854, "learning_rate": 1.314176344552289e-05, "loss": 0.0074, "step": 2831 }, { "epoch": 166.58823529411765, "grad_norm": 0.5928347706794739, "learning_rate": 1.3137241383780766e-05, "loss": 0.0097, "step": 2832 }, { "epoch": 166.64705882352942, "grad_norm": 0.9692617058753967, "learning_rate": 1.313271861035599e-05, "loss": 0.0196, "step": 2833 }, { "epoch": 166.7058823529412, "grad_norm": 0.7730828523635864, "learning_rate": 1.312819512627456e-05, "loss": 0.01, "step": 2834 }, { "epoch": 166.76470588235293, "grad_norm": 0.8268076181411743, "learning_rate": 1.3123670932562615e-05, "loss": 0.0174, "step": 2835 }, { "epoch": 166.8235294117647, "grad_norm": 0.6121103167533875, "learning_rate": 1.3119146030246476e-05, "loss": 0.0092, "step": 2836 }, { "epoch": 166.88235294117646, "grad_norm": 1.2232811450958252, "learning_rate": 1.3114620420352617e-05, "loss": 0.023, "step": 2837 }, { "epoch": 166.94117647058823, "grad_norm": 0.4657258987426758, "learning_rate": 1.3110094103907668e-05, "loss": 0.0077, "step": 2838 }, { "epoch": 167.0, "grad_norm": 1.029444694519043, "learning_rate": 1.3105567081938423e-05, "loss": 0.0231, "step": 2839 }, { "epoch": 167.05882352941177, "grad_norm": 0.8120427131652832, "learning_rate": 1.310103935547184e-05, "loss": 0.0124, "step": 2840 }, { "epoch": 167.11764705882354, "grad_norm": 0.6855847239494324, "learning_rate": 1.3096510925535029e-05, "loss": 0.0172, "step": 2841 }, { "epoch": 167.1764705882353, "grad_norm": 1.0317063331604004, "learning_rate": 1.3091981793155268e-05, "loss": 0.0159, "step": 2842 }, { "epoch": 167.23529411764707, "grad_norm": 0.7170231938362122, "learning_rate": 1.3087451959359983e-05, "loss": 0.0109, "step": 2843 }, { "epoch": 167.2941176470588, "grad_norm": 1.097591519355774, "learning_rate": 1.3082921425176774e-05, "loss": 0.0166, "step": 2844 }, { "epoch": 167.35294117647058, "grad_norm": 1.179107666015625, "learning_rate": 1.3078390191633384e-05, "loss": 0.0151, "step": 2845 }, { "epoch": 167.41176470588235, "grad_norm": 1.6614521741867065, "learning_rate": 1.3073858259757727e-05, "loss": 0.0197, "step": 2846 }, { "epoch": 167.47058823529412, "grad_norm": 0.8867982029914856, "learning_rate": 1.3069325630577874e-05, "loss": 0.0145, "step": 2847 }, { "epoch": 167.52941176470588, "grad_norm": 0.6612418293952942, "learning_rate": 1.3064792305122042e-05, "loss": 0.0118, "step": 2848 }, { "epoch": 167.58823529411765, "grad_norm": 1.0514060258865356, "learning_rate": 1.3060258284418625e-05, "loss": 0.0104, "step": 2849 }, { "epoch": 167.64705882352942, "grad_norm": 0.9425469040870667, "learning_rate": 1.3055723569496157e-05, "loss": 0.0239, "step": 2850 }, { "epoch": 167.7058823529412, "grad_norm": 0.6752912998199463, "learning_rate": 1.3051188161383343e-05, "loss": 0.0074, "step": 2851 }, { "epoch": 167.76470588235293, "grad_norm": 0.7396991848945618, "learning_rate": 1.3046652061109035e-05, "loss": 0.0231, "step": 2852 }, { "epoch": 167.8235294117647, "grad_norm": 1.6033414602279663, "learning_rate": 1.3042115269702249e-05, "loss": 0.0138, "step": 2853 }, { "epoch": 167.88235294117646, "grad_norm": 0.6090513467788696, "learning_rate": 1.3037577788192154e-05, "loss": 0.0075, "step": 2854 }, { "epoch": 167.94117647058823, "grad_norm": 0.6565640568733215, "learning_rate": 1.303303961760808e-05, "loss": 0.0155, "step": 2855 }, { "epoch": 168.0, "grad_norm": 1.1667574644088745, "learning_rate": 1.3028500758979507e-05, "loss": 0.0199, "step": 2856 }, { "epoch": 168.05882352941177, "grad_norm": 0.9738884568214417, "learning_rate": 1.3023961213336076e-05, "loss": 0.0111, "step": 2857 }, { "epoch": 168.11764705882354, "grad_norm": 1.0719033479690552, "learning_rate": 1.3019420981707582e-05, "loss": 0.0114, "step": 2858 }, { "epoch": 168.1764705882353, "grad_norm": 0.8329287171363831, "learning_rate": 1.3014880065123974e-05, "loss": 0.0213, "step": 2859 }, { "epoch": 168.23529411764707, "grad_norm": 0.5648148059844971, "learning_rate": 1.301033846461536e-05, "loss": 0.0122, "step": 2860 }, { "epoch": 168.2941176470588, "grad_norm": 0.919567883014679, "learning_rate": 1.3005796181211998e-05, "loss": 0.0215, "step": 2861 }, { "epoch": 168.35294117647058, "grad_norm": 1.214983344078064, "learning_rate": 1.300125321594431e-05, "loss": 0.0191, "step": 2862 }, { "epoch": 168.41176470588235, "grad_norm": 0.6392197608947754, "learning_rate": 1.299670956984286e-05, "loss": 0.0121, "step": 2863 }, { "epoch": 168.47058823529412, "grad_norm": 0.534308135509491, "learning_rate": 1.2992165243938382e-05, "loss": 0.0064, "step": 2864 }, { "epoch": 168.52941176470588, "grad_norm": 0.9668881297111511, "learning_rate": 1.2987620239261744e-05, "loss": 0.0195, "step": 2865 }, { "epoch": 168.58823529411765, "grad_norm": 0.5979216694831848, "learning_rate": 1.2983074556843991e-05, "loss": 0.0099, "step": 2866 }, { "epoch": 168.64705882352942, "grad_norm": 0.7347297072410583, "learning_rate": 1.2978528197716302e-05, "loss": 0.0128, "step": 2867 }, { "epoch": 168.7058823529412, "grad_norm": 1.7468016147613525, "learning_rate": 1.2973981162910022e-05, "loss": 0.0204, "step": 2868 }, { "epoch": 168.76470588235293, "grad_norm": 0.7479432225227356, "learning_rate": 1.2969433453456644e-05, "loss": 0.0102, "step": 2869 }, { "epoch": 168.8235294117647, "grad_norm": 1.1235438585281372, "learning_rate": 1.2964885070387815e-05, "loss": 0.0216, "step": 2870 }, { "epoch": 168.88235294117646, "grad_norm": 0.5245190858840942, "learning_rate": 1.2960336014735335e-05, "loss": 0.0088, "step": 2871 }, { "epoch": 168.94117647058823, "grad_norm": 0.7112672328948975, "learning_rate": 1.2955786287531159e-05, "loss": 0.0071, "step": 2872 }, { "epoch": 169.0, "grad_norm": 0.6909666657447815, "learning_rate": 1.2951235889807386e-05, "loss": 0.0114, "step": 2873 }, { "epoch": 169.05882352941177, "grad_norm": 0.6205422878265381, "learning_rate": 1.2946684822596276e-05, "loss": 0.0092, "step": 2874 }, { "epoch": 169.11764705882354, "grad_norm": 0.8383591175079346, "learning_rate": 1.2942133086930237e-05, "loss": 0.0087, "step": 2875 }, { "epoch": 169.1764705882353, "grad_norm": 0.8869689702987671, "learning_rate": 1.2937580683841833e-05, "loss": 0.0227, "step": 2876 }, { "epoch": 169.23529411764707, "grad_norm": 1.5726666450500488, "learning_rate": 1.2933027614363771e-05, "loss": 0.0114, "step": 2877 }, { "epoch": 169.2941176470588, "grad_norm": 0.6507890820503235, "learning_rate": 1.2928473879528918e-05, "loss": 0.0137, "step": 2878 }, { "epoch": 169.35294117647058, "grad_norm": 3.3684921264648438, "learning_rate": 1.2923919480370284e-05, "loss": 0.0197, "step": 2879 }, { "epoch": 169.41176470588235, "grad_norm": 0.4231078624725342, "learning_rate": 1.2919364417921037e-05, "loss": 0.0085, "step": 2880 }, { "epoch": 169.47058823529412, "grad_norm": 2.2170867919921875, "learning_rate": 1.291480869321449e-05, "loss": 0.0194, "step": 2881 }, { "epoch": 169.52941176470588, "grad_norm": 0.9705116748809814, "learning_rate": 1.291025230728411e-05, "loss": 0.0179, "step": 2882 }, { "epoch": 169.58823529411765, "grad_norm": 0.6450294852256775, "learning_rate": 1.290569526116351e-05, "loss": 0.0143, "step": 2883 }, { "epoch": 169.64705882352942, "grad_norm": 0.7185750603675842, "learning_rate": 1.2901137555886458e-05, "loss": 0.0119, "step": 2884 }, { "epoch": 169.7058823529412, "grad_norm": 0.5855286121368408, "learning_rate": 1.2896579192486869e-05, "loss": 0.014, "step": 2885 }, { "epoch": 169.76470588235293, "grad_norm": 0.6247528791427612, "learning_rate": 1.2892020171998804e-05, "loss": 0.0163, "step": 2886 }, { "epoch": 169.8235294117647, "grad_norm": 0.8304274082183838, "learning_rate": 1.2887460495456476e-05, "loss": 0.0245, "step": 2887 }, { "epoch": 169.88235294117646, "grad_norm": 0.9933075904846191, "learning_rate": 1.288290016389425e-05, "loss": 0.0117, "step": 2888 }, { "epoch": 169.94117647058823, "grad_norm": 0.8372111916542053, "learning_rate": 1.2878339178346631e-05, "loss": 0.0123, "step": 2889 }, { "epoch": 170.0, "grad_norm": 1.2246582508087158, "learning_rate": 1.2873777539848284e-05, "loss": 0.0149, "step": 2890 }, { "epoch": 170.05882352941177, "grad_norm": 0.6795688271522522, "learning_rate": 1.2869215249434011e-05, "loss": 0.0078, "step": 2891 }, { "epoch": 170.11764705882354, "grad_norm": 0.8883925676345825, "learning_rate": 1.286465230813877e-05, "loss": 0.0248, "step": 2892 }, { "epoch": 170.1764705882353, "grad_norm": 0.8350722789764404, "learning_rate": 1.286008871699766e-05, "loss": 0.0102, "step": 2893 }, { "epoch": 170.23529411764707, "grad_norm": 1.1112326383590698, "learning_rate": 1.2855524477045937e-05, "loss": 0.0177, "step": 2894 }, { "epoch": 170.2941176470588, "grad_norm": 2.0230772495269775, "learning_rate": 1.285095958931899e-05, "loss": 0.0264, "step": 2895 }, { "epoch": 170.35294117647058, "grad_norm": 0.7805535793304443, "learning_rate": 1.2846394054852371e-05, "loss": 0.0138, "step": 2896 }, { "epoch": 170.41176470588235, "grad_norm": 0.8175185322761536, "learning_rate": 1.2841827874681765e-05, "loss": 0.0095, "step": 2897 }, { "epoch": 170.47058823529412, "grad_norm": 0.9026839733123779, "learning_rate": 1.283726104984301e-05, "loss": 0.0183, "step": 2898 }, { "epoch": 170.52941176470588, "grad_norm": 0.9614089727401733, "learning_rate": 1.2832693581372093e-05, "loss": 0.0152, "step": 2899 }, { "epoch": 170.58823529411765, "grad_norm": 0.42265042662620544, "learning_rate": 1.2828125470305137e-05, "loss": 0.0071, "step": 2900 }, { "epoch": 170.64705882352942, "grad_norm": 1.6417752504348755, "learning_rate": 1.2823556717678423e-05, "loss": 0.0236, "step": 2901 }, { "epoch": 170.7058823529412, "grad_norm": 1.1591852903366089, "learning_rate": 1.281898732452837e-05, "loss": 0.0166, "step": 2902 }, { "epoch": 170.76470588235293, "grad_norm": 1.4168070554733276, "learning_rate": 1.2814417291891542e-05, "loss": 0.0158, "step": 2903 }, { "epoch": 170.8235294117647, "grad_norm": 0.7424045205116272, "learning_rate": 1.2809846620804652e-05, "loss": 0.0169, "step": 2904 }, { "epoch": 170.88235294117646, "grad_norm": 0.7353686094284058, "learning_rate": 1.2805275312304557e-05, "loss": 0.0082, "step": 2905 }, { "epoch": 170.94117647058823, "grad_norm": 0.871799111366272, "learning_rate": 1.2800703367428253e-05, "loss": 0.0231, "step": 2906 }, { "epoch": 171.0, "grad_norm": 0.3621479272842407, "learning_rate": 1.279613078721289e-05, "loss": 0.008, "step": 2907 }, { "epoch": 171.05882352941177, "grad_norm": 0.6054264903068542, "learning_rate": 1.2791557572695756e-05, "loss": 0.0175, "step": 2908 }, { "epoch": 171.11764705882354, "grad_norm": 1.1972806453704834, "learning_rate": 1.2786983724914284e-05, "loss": 0.0188, "step": 2909 }, { "epoch": 171.1764705882353, "grad_norm": 0.5340092778205872, "learning_rate": 1.2782409244906044e-05, "loss": 0.0076, "step": 2910 }, { "epoch": 171.23529411764707, "grad_norm": 0.4262945353984833, "learning_rate": 1.2777834133708767e-05, "loss": 0.0092, "step": 2911 }, { "epoch": 171.2941176470588, "grad_norm": 0.9169754981994629, "learning_rate": 1.277325839236031e-05, "loss": 0.0229, "step": 2912 }, { "epoch": 171.35294117647058, "grad_norm": 0.6761354804039001, "learning_rate": 1.2768682021898678e-05, "loss": 0.0124, "step": 2913 }, { "epoch": 171.41176470588235, "grad_norm": 0.8142737150192261, "learning_rate": 1.2764105023362023e-05, "loss": 0.0128, "step": 2914 }, { "epoch": 171.47058823529412, "grad_norm": 0.878391444683075, "learning_rate": 1.2759527397788633e-05, "loss": 0.0193, "step": 2915 }, { "epoch": 171.52941176470588, "grad_norm": 0.5497244596481323, "learning_rate": 1.2754949146216946e-05, "loss": 0.0087, "step": 2916 }, { "epoch": 171.58823529411765, "grad_norm": 1.1375051736831665, "learning_rate": 1.275037026968553e-05, "loss": 0.0119, "step": 2917 }, { "epoch": 171.64705882352942, "grad_norm": 0.598848283290863, "learning_rate": 1.2745790769233114e-05, "loss": 0.0141, "step": 2918 }, { "epoch": 171.7058823529412, "grad_norm": 0.8972129821777344, "learning_rate": 1.2741210645898545e-05, "loss": 0.0099, "step": 2919 }, { "epoch": 171.76470588235293, "grad_norm": 0.7980281710624695, "learning_rate": 1.2736629900720832e-05, "loss": 0.0143, "step": 2920 }, { "epoch": 171.8235294117647, "grad_norm": 0.9987487196922302, "learning_rate": 1.273204853473911e-05, "loss": 0.0187, "step": 2921 }, { "epoch": 171.88235294117646, "grad_norm": 0.9456412196159363, "learning_rate": 1.2727466548992665e-05, "loss": 0.0212, "step": 2922 }, { "epoch": 171.94117647058823, "grad_norm": 0.8552698493003845, "learning_rate": 1.2722883944520917e-05, "loss": 0.0136, "step": 2923 }, { "epoch": 172.0, "grad_norm": 0.7402402758598328, "learning_rate": 1.2718300722363431e-05, "loss": 0.0115, "step": 2924 }, { "epoch": 172.05882352941177, "grad_norm": 0.6869096159934998, "learning_rate": 1.2713716883559909e-05, "loss": 0.0132, "step": 2925 }, { "epoch": 172.11764705882354, "grad_norm": 0.8875576257705688, "learning_rate": 1.2709132429150196e-05, "loss": 0.0251, "step": 2926 }, { "epoch": 172.1764705882353, "grad_norm": 2.96286940574646, "learning_rate": 1.2704547360174271e-05, "loss": 0.0172, "step": 2927 }, { "epoch": 172.23529411764707, "grad_norm": 1.147987961769104, "learning_rate": 1.2699961677672259e-05, "loss": 0.0208, "step": 2928 }, { "epoch": 172.2941176470588, "grad_norm": 0.4250200390815735, "learning_rate": 1.269537538268442e-05, "loss": 0.0061, "step": 2929 }, { "epoch": 172.35294117647058, "grad_norm": 0.49814340472221375, "learning_rate": 1.2690788476251155e-05, "loss": 0.0096, "step": 2930 }, { "epoch": 172.41176470588235, "grad_norm": 0.5387275815010071, "learning_rate": 1.2686200959413003e-05, "loss": 0.0135, "step": 2931 }, { "epoch": 172.47058823529412, "grad_norm": 1.026863932609558, "learning_rate": 1.2681612833210642e-05, "loss": 0.0128, "step": 2932 }, { "epoch": 172.52941176470588, "grad_norm": 0.577397346496582, "learning_rate": 1.2677024098684884e-05, "loss": 0.0092, "step": 2933 }, { "epoch": 172.58823529411765, "grad_norm": 3.977640151977539, "learning_rate": 1.2672434756876685e-05, "loss": 0.0136, "step": 2934 }, { "epoch": 172.64705882352942, "grad_norm": 0.48671218752861023, "learning_rate": 1.266784480882714e-05, "loss": 0.0091, "step": 2935 }, { "epoch": 172.7058823529412, "grad_norm": 1.021776795387268, "learning_rate": 1.266325425557747e-05, "loss": 0.0228, "step": 2936 }, { "epoch": 172.76470588235293, "grad_norm": 0.5261368751525879, "learning_rate": 1.2658663098169049e-05, "loss": 0.0096, "step": 2937 }, { "epoch": 172.8235294117647, "grad_norm": 0.8067064881324768, "learning_rate": 1.2654071337643376e-05, "loss": 0.0098, "step": 2938 }, { "epoch": 172.88235294117646, "grad_norm": 1.1470211744308472, "learning_rate": 1.264947897504209e-05, "loss": 0.0081, "step": 2939 }, { "epoch": 172.94117647058823, "grad_norm": 0.9106146097183228, "learning_rate": 1.2644886011406972e-05, "loss": 0.0233, "step": 2940 }, { "epoch": 173.0, "grad_norm": 0.4713217616081238, "learning_rate": 1.2640292447779932e-05, "loss": 0.0091, "step": 2941 }, { "epoch": 173.05882352941177, "grad_norm": 0.5976211428642273, "learning_rate": 1.263569828520302e-05, "loss": 0.0086, "step": 2942 }, { "epoch": 173.11764705882354, "grad_norm": 0.8981890678405762, "learning_rate": 1.2631103524718418e-05, "loss": 0.0162, "step": 2943 }, { "epoch": 173.1764705882353, "grad_norm": 2.1679699420928955, "learning_rate": 1.2626508167368452e-05, "loss": 0.0123, "step": 2944 }, { "epoch": 173.23529411764707, "grad_norm": 1.289747953414917, "learning_rate": 1.2621912214195576e-05, "loss": 0.0107, "step": 2945 }, { "epoch": 173.2941176470588, "grad_norm": 1.143012285232544, "learning_rate": 1.2617315666242379e-05, "loss": 0.0181, "step": 2946 }, { "epoch": 173.35294117647058, "grad_norm": 0.5344862341880798, "learning_rate": 1.2612718524551588e-05, "loss": 0.0073, "step": 2947 }, { "epoch": 173.41176470588235, "grad_norm": 0.5659047961235046, "learning_rate": 1.2608120790166062e-05, "loss": 0.0071, "step": 2948 }, { "epoch": 173.47058823529412, "grad_norm": 0.8985995054244995, "learning_rate": 1.2603522464128804e-05, "loss": 0.0123, "step": 2949 }, { "epoch": 173.52941176470588, "grad_norm": 0.9848408699035645, "learning_rate": 1.2598923547482932e-05, "loss": 0.0104, "step": 2950 }, { "epoch": 173.58823529411765, "grad_norm": 1.0517181158065796, "learning_rate": 1.2594324041271719e-05, "loss": 0.0135, "step": 2951 }, { "epoch": 173.64705882352942, "grad_norm": 1.1243349313735962, "learning_rate": 1.2589723946538556e-05, "loss": 0.0169, "step": 2952 }, { "epoch": 173.7058823529412, "grad_norm": 0.5939726829528809, "learning_rate": 1.2585123264326979e-05, "loss": 0.0082, "step": 2953 }, { "epoch": 173.76470588235293, "grad_norm": 0.5893354415893555, "learning_rate": 1.2580521995680648e-05, "loss": 0.0148, "step": 2954 }, { "epoch": 173.8235294117647, "grad_norm": 0.8135547041893005, "learning_rate": 1.2575920141643362e-05, "loss": 0.0229, "step": 2955 }, { "epoch": 173.88235294117646, "grad_norm": 1.1389063596725464, "learning_rate": 1.2571317703259048e-05, "loss": 0.0314, "step": 2956 }, { "epoch": 173.94117647058823, "grad_norm": 0.6103852987289429, "learning_rate": 1.256671468157177e-05, "loss": 0.014, "step": 2957 }, { "epoch": 174.0, "grad_norm": 1.8376290798187256, "learning_rate": 1.2562111077625723e-05, "loss": 0.019, "step": 2958 }, { "epoch": 174.05882352941177, "grad_norm": 0.6155897974967957, "learning_rate": 1.2557506892465236e-05, "loss": 0.0141, "step": 2959 }, { "epoch": 174.11764705882354, "grad_norm": 0.722943127155304, "learning_rate": 1.2552902127134765e-05, "loss": 0.0183, "step": 2960 }, { "epoch": 174.1764705882353, "grad_norm": 0.9298325777053833, "learning_rate": 1.2548296782678896e-05, "loss": 0.013, "step": 2961 }, { "epoch": 174.23529411764707, "grad_norm": 0.642699122428894, "learning_rate": 1.2543690860142358e-05, "loss": 0.0126, "step": 2962 }, { "epoch": 174.2941176470588, "grad_norm": 0.6240149736404419, "learning_rate": 1.2539084360569999e-05, "loss": 0.0084, "step": 2963 }, { "epoch": 174.35294117647058, "grad_norm": 0.7987310290336609, "learning_rate": 1.2534477285006807e-05, "loss": 0.021, "step": 2964 }, { "epoch": 174.41176470588235, "grad_norm": 0.6933825612068176, "learning_rate": 1.2529869634497893e-05, "loss": 0.0099, "step": 2965 }, { "epoch": 174.47058823529412, "grad_norm": 0.7791456580162048, "learning_rate": 1.2525261410088501e-05, "loss": 0.0084, "step": 2966 }, { "epoch": 174.52941176470588, "grad_norm": 1.6302894353866577, "learning_rate": 1.2520652612824006e-05, "loss": 0.0302, "step": 2967 }, { "epoch": 174.58823529411765, "grad_norm": 0.5127400755882263, "learning_rate": 1.2516043243749918e-05, "loss": 0.0061, "step": 2968 }, { "epoch": 174.64705882352942, "grad_norm": 0.6203836798667908, "learning_rate": 1.2511433303911861e-05, "loss": 0.0089, "step": 2969 }, { "epoch": 174.7058823529412, "grad_norm": 0.5376498103141785, "learning_rate": 1.2506822794355611e-05, "loss": 0.0094, "step": 2970 }, { "epoch": 174.76470588235293, "grad_norm": 0.39769992232322693, "learning_rate": 1.2502211716127053e-05, "loss": 0.0059, "step": 2971 }, { "epoch": 174.8235294117647, "grad_norm": 1.437874674797058, "learning_rate": 1.2497600070272214e-05, "loss": 0.023, "step": 2972 }, { "epoch": 174.88235294117646, "grad_norm": 0.694344162940979, "learning_rate": 1.2492987857837244e-05, "loss": 0.0135, "step": 2973 }, { "epoch": 174.94117647058823, "grad_norm": 1.427112340927124, "learning_rate": 1.2488375079868418e-05, "loss": 0.0188, "step": 2974 }, { "epoch": 175.0, "grad_norm": 1.583977222442627, "learning_rate": 1.248376173741215e-05, "loss": 0.0182, "step": 2975 }, { "epoch": 175.05882352941177, "grad_norm": 0.6301031708717346, "learning_rate": 1.2479147831514972e-05, "loss": 0.0096, "step": 2976 }, { "epoch": 175.11764705882354, "grad_norm": 0.7199718356132507, "learning_rate": 1.2474533363223551e-05, "loss": 0.0124, "step": 2977 }, { "epoch": 175.1764705882353, "grad_norm": 0.7587603330612183, "learning_rate": 1.2469918333584675e-05, "loss": 0.0209, "step": 2978 }, { "epoch": 175.23529411764707, "grad_norm": 0.7285974621772766, "learning_rate": 1.2465302743645264e-05, "loss": 0.0134, "step": 2979 }, { "epoch": 175.2941176470588, "grad_norm": 1.017099142074585, "learning_rate": 1.2460686594452365e-05, "loss": 0.017, "step": 2980 }, { "epoch": 175.35294117647058, "grad_norm": 0.6247594952583313, "learning_rate": 1.2456069887053151e-05, "loss": 0.0161, "step": 2981 }, { "epoch": 175.41176470588235, "grad_norm": 0.6317576766014099, "learning_rate": 1.2451452622494916e-05, "loss": 0.0159, "step": 2982 }, { "epoch": 175.47058823529412, "grad_norm": 0.8494994640350342, "learning_rate": 1.2446834801825092e-05, "loss": 0.0108, "step": 2983 }, { "epoch": 175.52941176470588, "grad_norm": 0.89380943775177, "learning_rate": 1.2442216426091226e-05, "loss": 0.0083, "step": 2984 }, { "epoch": 175.58823529411765, "grad_norm": 0.5883828401565552, "learning_rate": 1.2437597496341001e-05, "loss": 0.0085, "step": 2985 }, { "epoch": 175.64705882352942, "grad_norm": 0.8209514021873474, "learning_rate": 1.2432978013622217e-05, "loss": 0.024, "step": 2986 }, { "epoch": 175.7058823529412, "grad_norm": 0.7617586255073547, "learning_rate": 1.24283579789828e-05, "loss": 0.0152, "step": 2987 }, { "epoch": 175.76470588235293, "grad_norm": 0.9858369827270508, "learning_rate": 1.242373739347081e-05, "loss": 0.0177, "step": 2988 }, { "epoch": 175.8235294117647, "grad_norm": 0.8219929933547974, "learning_rate": 1.241911625813442e-05, "loss": 0.0108, "step": 2989 }, { "epoch": 175.88235294117646, "grad_norm": 0.8222288489341736, "learning_rate": 1.2414494574021943e-05, "loss": 0.0098, "step": 2990 }, { "epoch": 175.94117647058823, "grad_norm": 0.7099545001983643, "learning_rate": 1.2409872342181795e-05, "loss": 0.0095, "step": 2991 }, { "epoch": 176.0, "grad_norm": 0.6535040736198425, "learning_rate": 1.2405249563662539e-05, "loss": 0.0087, "step": 2992 }, { "epoch": 176.05882352941177, "grad_norm": 1.1098780632019043, "learning_rate": 1.2400626239512842e-05, "loss": 0.0182, "step": 2993 }, { "epoch": 176.11764705882354, "grad_norm": 0.47737911343574524, "learning_rate": 1.2396002370781515e-05, "loss": 0.0085, "step": 2994 }, { "epoch": 176.1764705882353, "grad_norm": 1.4311379194259644, "learning_rate": 1.2391377958517472e-05, "loss": 0.013, "step": 2995 }, { "epoch": 176.23529411764707, "grad_norm": 0.9140229821205139, "learning_rate": 1.2386753003769767e-05, "loss": 0.018, "step": 2996 }, { "epoch": 176.2941176470588, "grad_norm": 0.36319634318351746, "learning_rate": 1.2382127507587565e-05, "loss": 0.007, "step": 2997 }, { "epoch": 176.35294117647058, "grad_norm": 0.6540010571479797, "learning_rate": 1.2377501471020164e-05, "loss": 0.0099, "step": 2998 }, { "epoch": 176.41176470588235, "grad_norm": 0.6913789510726929, "learning_rate": 1.2372874895116975e-05, "loss": 0.014, "step": 2999 }, { "epoch": 176.47058823529412, "grad_norm": 0.6146858334541321, "learning_rate": 1.236824778092754e-05, "loss": 0.0155, "step": 3000 }, { "epoch": 176.52941176470588, "grad_norm": 0.7793124914169312, "learning_rate": 1.2363620129501517e-05, "loss": 0.009, "step": 3001 }, { "epoch": 176.58823529411765, "grad_norm": 0.7235597968101501, "learning_rate": 1.2358991941888686e-05, "loss": 0.0089, "step": 3002 }, { "epoch": 176.64705882352942, "grad_norm": 1.2884905338287354, "learning_rate": 1.2354363219138952e-05, "loss": 0.0071, "step": 3003 }, { "epoch": 176.7058823529412, "grad_norm": 0.4814426898956299, "learning_rate": 1.2349733962302342e-05, "loss": 0.0074, "step": 3004 }, { "epoch": 176.76470588235293, "grad_norm": 0.6091153621673584, "learning_rate": 1.2345104172428999e-05, "loss": 0.0185, "step": 3005 }, { "epoch": 176.8235294117647, "grad_norm": 0.6774498224258423, "learning_rate": 1.2340473850569192e-05, "loss": 0.0134, "step": 3006 }, { "epoch": 176.88235294117646, "grad_norm": 1.3288594484329224, "learning_rate": 1.2335842997773309e-05, "loss": 0.0153, "step": 3007 }, { "epoch": 176.94117647058823, "grad_norm": 1.1508119106292725, "learning_rate": 1.2331211615091857e-05, "loss": 0.0142, "step": 3008 }, { "epoch": 177.0, "grad_norm": 0.3997195363044739, "learning_rate": 1.2326579703575464e-05, "loss": 0.0099, "step": 3009 }, { "epoch": 177.05882352941177, "grad_norm": 0.6882345080375671, "learning_rate": 1.2321947264274876e-05, "loss": 0.0071, "step": 3010 }, { "epoch": 177.11764705882354, "grad_norm": 0.7281436920166016, "learning_rate": 1.2317314298240968e-05, "loss": 0.0134, "step": 3011 }, { "epoch": 177.1764705882353, "grad_norm": 0.5966264605522156, "learning_rate": 1.2312680806524722e-05, "loss": 0.0115, "step": 3012 }, { "epoch": 177.23529411764707, "grad_norm": 2.004533529281616, "learning_rate": 1.2308046790177249e-05, "loss": 0.0068, "step": 3013 }, { "epoch": 177.2941176470588, "grad_norm": 0.48678070306777954, "learning_rate": 1.2303412250249768e-05, "loss": 0.009, "step": 3014 }, { "epoch": 177.35294117647058, "grad_norm": 0.8100017309188843, "learning_rate": 1.2298777187793629e-05, "loss": 0.0087, "step": 3015 }, { "epoch": 177.41176470588235, "grad_norm": 0.5537344813346863, "learning_rate": 1.2294141603860295e-05, "loss": 0.0099, "step": 3016 }, { "epoch": 177.47058823529412, "grad_norm": 2.3706021308898926, "learning_rate": 1.2289505499501341e-05, "loss": 0.0287, "step": 3017 }, { "epoch": 177.52941176470588, "grad_norm": 0.5370182394981384, "learning_rate": 1.2284868875768478e-05, "loss": 0.0144, "step": 3018 }, { "epoch": 177.58823529411765, "grad_norm": 0.5737842917442322, "learning_rate": 1.2280231733713513e-05, "loss": 0.0165, "step": 3019 }, { "epoch": 177.64705882352942, "grad_norm": 0.47870051860809326, "learning_rate": 1.2275594074388387e-05, "loss": 0.0117, "step": 3020 }, { "epoch": 177.7058823529412, "grad_norm": 0.5015705227851868, "learning_rate": 1.2270955898845145e-05, "loss": 0.0063, "step": 3021 }, { "epoch": 177.76470588235293, "grad_norm": 0.542995035648346, "learning_rate": 1.2266317208135968e-05, "loss": 0.0145, "step": 3022 }, { "epoch": 177.8235294117647, "grad_norm": 0.9529106616973877, "learning_rate": 1.2261678003313128e-05, "loss": 0.0232, "step": 3023 }, { "epoch": 177.88235294117646, "grad_norm": 1.1171319484710693, "learning_rate": 1.2257038285429041e-05, "loss": 0.0101, "step": 3024 }, { "epoch": 177.94117647058823, "grad_norm": 0.6033110022544861, "learning_rate": 1.2252398055536216e-05, "loss": 0.0072, "step": 3025 }, { "epoch": 178.0, "grad_norm": 0.6490638852119446, "learning_rate": 1.2247757314687296e-05, "loss": 0.0127, "step": 3026 }, { "epoch": 178.05882352941177, "grad_norm": 1.5392229557037354, "learning_rate": 1.2243116063935028e-05, "loss": 0.0232, "step": 3027 }, { "epoch": 178.11764705882354, "grad_norm": 1.7458406686782837, "learning_rate": 1.2238474304332277e-05, "loss": 0.0255, "step": 3028 }, { "epoch": 178.1764705882353, "grad_norm": 0.8371738791465759, "learning_rate": 1.2233832036932031e-05, "loss": 0.0098, "step": 3029 }, { "epoch": 178.23529411764707, "grad_norm": 0.9486343860626221, "learning_rate": 1.2229189262787384e-05, "loss": 0.016, "step": 3030 }, { "epoch": 178.2941176470588, "grad_norm": 1.0207483768463135, "learning_rate": 1.222454598295155e-05, "loss": 0.0107, "step": 3031 }, { "epoch": 178.35294117647058, "grad_norm": 0.7067667245864868, "learning_rate": 1.2219902198477851e-05, "loss": 0.0122, "step": 3032 }, { "epoch": 178.41176470588235, "grad_norm": 1.0121811628341675, "learning_rate": 1.2215257910419738e-05, "loss": 0.0152, "step": 3033 }, { "epoch": 178.47058823529412, "grad_norm": 1.0465978384017944, "learning_rate": 1.221061311983076e-05, "loss": 0.0141, "step": 3034 }, { "epoch": 178.52941176470588, "grad_norm": 0.6869925260543823, "learning_rate": 1.2205967827764589e-05, "loss": 0.0089, "step": 3035 }, { "epoch": 178.58823529411765, "grad_norm": 1.0714492797851562, "learning_rate": 1.2201322035275005e-05, "loss": 0.0132, "step": 3036 }, { "epoch": 178.64705882352942, "grad_norm": 0.47962623834609985, "learning_rate": 1.2196675743415913e-05, "loss": 0.0059, "step": 3037 }, { "epoch": 178.7058823529412, "grad_norm": 0.7234214544296265, "learning_rate": 1.2192028953241317e-05, "loss": 0.0144, "step": 3038 }, { "epoch": 178.76470588235293, "grad_norm": 0.4531199634075165, "learning_rate": 1.2187381665805343e-05, "loss": 0.0077, "step": 3039 }, { "epoch": 178.8235294117647, "grad_norm": 0.9053736925125122, "learning_rate": 1.2182733882162227e-05, "loss": 0.0136, "step": 3040 }, { "epoch": 178.88235294117646, "grad_norm": 0.8165600299835205, "learning_rate": 1.2178085603366319e-05, "loss": 0.0212, "step": 3041 }, { "epoch": 178.94117647058823, "grad_norm": 0.5091197490692139, "learning_rate": 1.2173436830472079e-05, "loss": 0.0072, "step": 3042 }, { "epoch": 179.0, "grad_norm": 0.9358527064323425, "learning_rate": 1.2168787564534078e-05, "loss": 0.0178, "step": 3043 }, { "epoch": 179.05882352941177, "grad_norm": 0.6302806735038757, "learning_rate": 1.2164137806607006e-05, "loss": 0.0094, "step": 3044 }, { "epoch": 179.11764705882354, "grad_norm": 0.8134034872055054, "learning_rate": 1.2159487557745657e-05, "loss": 0.0222, "step": 3045 }, { "epoch": 179.1764705882353, "grad_norm": 1.1416248083114624, "learning_rate": 1.215483681900494e-05, "loss": 0.0127, "step": 3046 }, { "epoch": 179.23529411764707, "grad_norm": 1.278480052947998, "learning_rate": 1.2150185591439874e-05, "loss": 0.0127, "step": 3047 }, { "epoch": 179.2941176470588, "grad_norm": 0.7540642619132996, "learning_rate": 1.2145533876105586e-05, "loss": 0.019, "step": 3048 }, { "epoch": 179.35294117647058, "grad_norm": 0.7630302906036377, "learning_rate": 1.2140881674057322e-05, "loss": 0.0245, "step": 3049 }, { "epoch": 179.41176470588235, "grad_norm": 0.6463465094566345, "learning_rate": 1.2136228986350428e-05, "loss": 0.0085, "step": 3050 }, { "epoch": 179.47058823529412, "grad_norm": 0.6045681834220886, "learning_rate": 1.2131575814040373e-05, "loss": 0.0069, "step": 3051 }, { "epoch": 179.52941176470588, "grad_norm": 0.7358526587486267, "learning_rate": 1.2126922158182721e-05, "loss": 0.0158, "step": 3052 }, { "epoch": 179.58823529411765, "grad_norm": 0.775658369064331, "learning_rate": 1.2122268019833156e-05, "loss": 0.0096, "step": 3053 }, { "epoch": 179.64705882352942, "grad_norm": 0.5712041854858398, "learning_rate": 1.2117613400047467e-05, "loss": 0.0103, "step": 3054 }, { "epoch": 179.7058823529412, "grad_norm": 0.7177655696868896, "learning_rate": 1.2112958299881557e-05, "loss": 0.0135, "step": 3055 }, { "epoch": 179.76470588235293, "grad_norm": 0.8519097566604614, "learning_rate": 1.210830272039143e-05, "loss": 0.0092, "step": 3056 }, { "epoch": 179.8235294117647, "grad_norm": 0.620312511920929, "learning_rate": 1.2103646662633207e-05, "loss": 0.01, "step": 3057 }, { "epoch": 179.88235294117646, "grad_norm": 1.675844430923462, "learning_rate": 1.209899012766311e-05, "loss": 0.0115, "step": 3058 }, { "epoch": 179.94117647058823, "grad_norm": 0.9293653964996338, "learning_rate": 1.209433311653748e-05, "loss": 0.0103, "step": 3059 }, { "epoch": 180.0, "grad_norm": 1.769283413887024, "learning_rate": 1.2089675630312755e-05, "loss": 0.0114, "step": 3060 }, { "epoch": 180.05882352941177, "grad_norm": 0.6556861996650696, "learning_rate": 1.2085017670045481e-05, "loss": 0.0106, "step": 3061 }, { "epoch": 180.11764705882354, "grad_norm": 0.6365075707435608, "learning_rate": 1.2080359236792321e-05, "loss": 0.0108, "step": 3062 }, { "epoch": 180.1764705882353, "grad_norm": 0.6075327396392822, "learning_rate": 1.2075700331610036e-05, "loss": 0.0071, "step": 3063 }, { "epoch": 180.23529411764707, "grad_norm": 0.9589712619781494, "learning_rate": 1.2071040955555503e-05, "loss": 0.0129, "step": 3064 }, { "epoch": 180.2941176470588, "grad_norm": 0.845645010471344, "learning_rate": 1.2066381109685695e-05, "loss": 0.0103, "step": 3065 }, { "epoch": 180.35294117647058, "grad_norm": 1.603943109512329, "learning_rate": 1.2061720795057701e-05, "loss": 0.0126, "step": 3066 }, { "epoch": 180.41176470588235, "grad_norm": 0.5763875246047974, "learning_rate": 1.205706001272871e-05, "loss": 0.0081, "step": 3067 }, { "epoch": 180.47058823529412, "grad_norm": 0.5411078333854675, "learning_rate": 1.2052398763756022e-05, "loss": 0.0097, "step": 3068 }, { "epoch": 180.52941176470588, "grad_norm": 0.501833975315094, "learning_rate": 1.2047737049197037e-05, "loss": 0.0079, "step": 3069 }, { "epoch": 180.58823529411765, "grad_norm": 1.3332568407058716, "learning_rate": 1.2043074870109271e-05, "loss": 0.0159, "step": 3070 }, { "epoch": 180.64705882352942, "grad_norm": 0.8856135010719299, "learning_rate": 1.2038412227550329e-05, "loss": 0.0165, "step": 3071 }, { "epoch": 180.7058823529412, "grad_norm": 0.983879804611206, "learning_rate": 1.2033749122577938e-05, "loss": 0.0162, "step": 3072 }, { "epoch": 180.76470588235293, "grad_norm": 0.5004110932350159, "learning_rate": 1.202908555624992e-05, "loss": 0.0077, "step": 3073 }, { "epoch": 180.8235294117647, "grad_norm": 0.4829016923904419, "learning_rate": 1.2024421529624203e-05, "loss": 0.008, "step": 3074 }, { "epoch": 180.88235294117646, "grad_norm": 0.8993896245956421, "learning_rate": 1.2019757043758827e-05, "loss": 0.0314, "step": 3075 }, { "epoch": 180.94117647058823, "grad_norm": 0.9166138172149658, "learning_rate": 1.2015092099711918e-05, "loss": 0.0163, "step": 3076 }, { "epoch": 181.0, "grad_norm": 1.1481859683990479, "learning_rate": 1.2010426698541728e-05, "loss": 0.0089, "step": 3077 }, { "epoch": 181.05882352941177, "grad_norm": 0.7117326855659485, "learning_rate": 1.2005760841306598e-05, "loss": 0.0067, "step": 3078 }, { "epoch": 181.11764705882354, "grad_norm": 0.3858020007610321, "learning_rate": 1.2001094529064981e-05, "loss": 0.0062, "step": 3079 }, { "epoch": 181.1764705882353, "grad_norm": 1.0801516771316528, "learning_rate": 1.199642776287542e-05, "loss": 0.0239, "step": 3080 }, { "epoch": 181.23529411764707, "grad_norm": 0.7860425710678101, "learning_rate": 1.199176054379658e-05, "loss": 0.0217, "step": 3081 }, { "epoch": 181.2941176470588, "grad_norm": 0.6817658543586731, "learning_rate": 1.1987092872887214e-05, "loss": 0.0134, "step": 3082 }, { "epoch": 181.35294117647058, "grad_norm": 0.5780507326126099, "learning_rate": 1.1982424751206183e-05, "loss": 0.0129, "step": 3083 }, { "epoch": 181.41176470588235, "grad_norm": 0.4436086118221283, "learning_rate": 1.1977756179812447e-05, "loss": 0.0075, "step": 3084 }, { "epoch": 181.47058823529412, "grad_norm": 0.48537713289260864, "learning_rate": 1.1973087159765078e-05, "loss": 0.0086, "step": 3085 }, { "epoch": 181.52941176470588, "grad_norm": 2.101902961730957, "learning_rate": 1.1968417692123234e-05, "loss": 0.0198, "step": 3086 }, { "epoch": 181.58823529411765, "grad_norm": 0.4908403158187866, "learning_rate": 1.1963747777946186e-05, "loss": 0.008, "step": 3087 }, { "epoch": 181.64705882352942, "grad_norm": 0.919439435005188, "learning_rate": 1.1959077418293308e-05, "loss": 0.0199, "step": 3088 }, { "epoch": 181.7058823529412, "grad_norm": 0.6402555704116821, "learning_rate": 1.1954406614224061e-05, "loss": 0.0074, "step": 3089 }, { "epoch": 181.76470588235293, "grad_norm": 1.024216890335083, "learning_rate": 1.1949735366798025e-05, "loss": 0.014, "step": 3090 }, { "epoch": 181.8235294117647, "grad_norm": 0.6576192378997803, "learning_rate": 1.1945063677074864e-05, "loss": 0.0074, "step": 3091 }, { "epoch": 181.88235294117646, "grad_norm": 0.8404018878936768, "learning_rate": 1.1940391546114358e-05, "loss": 0.017, "step": 3092 }, { "epoch": 181.94117647058823, "grad_norm": 0.880074143409729, "learning_rate": 1.1935718974976372e-05, "loss": 0.0101, "step": 3093 }, { "epoch": 182.0, "grad_norm": 0.6990481615066528, "learning_rate": 1.1931045964720882e-05, "loss": 0.01, "step": 3094 }, { "epoch": 182.05882352941177, "grad_norm": 0.5788038969039917, "learning_rate": 1.1926372516407956e-05, "loss": 0.0108, "step": 3095 }, { "epoch": 182.11764705882354, "grad_norm": 2.6515839099884033, "learning_rate": 1.1921698631097768e-05, "loss": 0.0207, "step": 3096 }, { "epoch": 182.1764705882353, "grad_norm": 0.4956376254558563, "learning_rate": 1.1917024309850587e-05, "loss": 0.0082, "step": 3097 }, { "epoch": 182.23529411764707, "grad_norm": 0.5913253426551819, "learning_rate": 1.1912349553726785e-05, "loss": 0.0103, "step": 3098 }, { "epoch": 182.2941176470588, "grad_norm": 0.6019169688224792, "learning_rate": 1.1907674363786822e-05, "loss": 0.0141, "step": 3099 }, { "epoch": 182.35294117647058, "grad_norm": 0.8322303295135498, "learning_rate": 1.1902998741091271e-05, "loss": 0.0104, "step": 3100 }, { "epoch": 182.41176470588235, "grad_norm": 0.7998039126396179, "learning_rate": 1.1898322686700796e-05, "loss": 0.018, "step": 3101 }, { "epoch": 182.47058823529412, "grad_norm": 1.200136661529541, "learning_rate": 1.1893646201676154e-05, "loss": 0.0219, "step": 3102 }, { "epoch": 182.52941176470588, "grad_norm": 0.3586142063140869, "learning_rate": 1.1888969287078212e-05, "loss": 0.0054, "step": 3103 }, { "epoch": 182.58823529411765, "grad_norm": 0.7955685257911682, "learning_rate": 1.1884291943967919e-05, "loss": 0.0111, "step": 3104 }, { "epoch": 182.64705882352942, "grad_norm": 0.6574029922485352, "learning_rate": 1.1879614173406335e-05, "loss": 0.0197, "step": 3105 }, { "epoch": 182.7058823529412, "grad_norm": 0.7235502004623413, "learning_rate": 1.1874935976454607e-05, "loss": 0.0136, "step": 3106 }, { "epoch": 182.76470588235293, "grad_norm": 0.5916742086410522, "learning_rate": 1.187025735417399e-05, "loss": 0.0082, "step": 3107 }, { "epoch": 182.8235294117647, "grad_norm": 0.669167697429657, "learning_rate": 1.1865578307625822e-05, "loss": 0.0107, "step": 3108 }, { "epoch": 182.88235294117646, "grad_norm": 0.48454010486602783, "learning_rate": 1.186089883787155e-05, "loss": 0.0062, "step": 3109 }, { "epoch": 182.94117647058823, "grad_norm": 0.7564815282821655, "learning_rate": 1.1856218945972704e-05, "loss": 0.0093, "step": 3110 }, { "epoch": 183.0, "grad_norm": 1.0683848857879639, "learning_rate": 1.1851538632990922e-05, "loss": 0.0151, "step": 3111 }, { "epoch": 183.05882352941177, "grad_norm": 0.6909693479537964, "learning_rate": 1.1846857899987928e-05, "loss": 0.0109, "step": 3112 }, { "epoch": 183.11764705882354, "grad_norm": 0.8998768329620361, "learning_rate": 1.184217674802555e-05, "loss": 0.0096, "step": 3113 }, { "epoch": 183.1764705882353, "grad_norm": 2.402540922164917, "learning_rate": 1.1837495178165706e-05, "loss": 0.0086, "step": 3114 }, { "epoch": 183.23529411764707, "grad_norm": 0.7668830752372742, "learning_rate": 1.1832813191470405e-05, "loss": 0.0097, "step": 3115 }, { "epoch": 183.2941176470588, "grad_norm": 0.5495284199714661, "learning_rate": 1.1828130789001758e-05, "loss": 0.01, "step": 3116 }, { "epoch": 183.35294117647058, "grad_norm": 0.8045113682746887, "learning_rate": 1.1823447971821964e-05, "loss": 0.0179, "step": 3117 }, { "epoch": 183.41176470588235, "grad_norm": 0.38628676533699036, "learning_rate": 1.1818764740993327e-05, "loss": 0.0062, "step": 3118 }, { "epoch": 183.47058823529412, "grad_norm": 1.3248833417892456, "learning_rate": 1.1814081097578227e-05, "loss": 0.0117, "step": 3119 }, { "epoch": 183.52941176470588, "grad_norm": 0.2566997706890106, "learning_rate": 1.1809397042639155e-05, "loss": 0.0054, "step": 3120 }, { "epoch": 183.58823529411765, "grad_norm": 0.42495062947273254, "learning_rate": 1.1804712577238685e-05, "loss": 0.0077, "step": 3121 }, { "epoch": 183.64705882352942, "grad_norm": 0.4038427174091339, "learning_rate": 1.1800027702439488e-05, "loss": 0.0095, "step": 3122 }, { "epoch": 183.7058823529412, "grad_norm": 0.5095872282981873, "learning_rate": 1.1795342419304325e-05, "loss": 0.0103, "step": 3123 }, { "epoch": 183.76470588235293, "grad_norm": 1.750715970993042, "learning_rate": 1.1790656728896054e-05, "loss": 0.0156, "step": 3124 }, { "epoch": 183.8235294117647, "grad_norm": 0.9572781324386597, "learning_rate": 1.1785970632277623e-05, "loss": 0.0178, "step": 3125 }, { "epoch": 183.88235294117646, "grad_norm": 0.6386551856994629, "learning_rate": 1.1781284130512075e-05, "loss": 0.0187, "step": 3126 }, { "epoch": 183.94117647058823, "grad_norm": 0.3554636836051941, "learning_rate": 1.1776597224662533e-05, "loss": 0.0066, "step": 3127 }, { "epoch": 184.0, "grad_norm": 0.6381910443305969, "learning_rate": 1.177190991579223e-05, "loss": 0.0126, "step": 3128 }, { "epoch": 184.05882352941177, "grad_norm": 0.8333715200424194, "learning_rate": 1.1767222204964477e-05, "loss": 0.027, "step": 3129 }, { "epoch": 184.11764705882354, "grad_norm": 0.8239140510559082, "learning_rate": 1.1762534093242679e-05, "loss": 0.0117, "step": 3130 }, { "epoch": 184.1764705882353, "grad_norm": 0.4997989535331726, "learning_rate": 1.175784558169034e-05, "loss": 0.0085, "step": 3131 }, { "epoch": 184.23529411764707, "grad_norm": 0.9295970797538757, "learning_rate": 1.1753156671371037e-05, "loss": 0.0142, "step": 3132 }, { "epoch": 184.2941176470588, "grad_norm": 0.5399088263511658, "learning_rate": 1.174846736334846e-05, "loss": 0.012, "step": 3133 }, { "epoch": 184.35294117647058, "grad_norm": 0.2833041548728943, "learning_rate": 1.1743777658686369e-05, "loss": 0.0046, "step": 3134 }, { "epoch": 184.41176470588235, "grad_norm": 0.5501187443733215, "learning_rate": 1.1739087558448628e-05, "loss": 0.0079, "step": 3135 }, { "epoch": 184.47058823529412, "grad_norm": 1.1290204524993896, "learning_rate": 1.173439706369918e-05, "loss": 0.0101, "step": 3136 }, { "epoch": 184.52941176470588, "grad_norm": 0.36857256293296814, "learning_rate": 1.1729706175502071e-05, "loss": 0.0047, "step": 3137 }, { "epoch": 184.58823529411765, "grad_norm": 0.7248129844665527, "learning_rate": 1.1725014894921422e-05, "loss": 0.0101, "step": 3138 }, { "epoch": 184.64705882352942, "grad_norm": 0.48343682289123535, "learning_rate": 1.1720323223021452e-05, "loss": 0.0056, "step": 3139 }, { "epoch": 184.7058823529412, "grad_norm": 0.9177520871162415, "learning_rate": 1.1715631160866466e-05, "loss": 0.0081, "step": 3140 }, { "epoch": 184.76470588235293, "grad_norm": 0.47362449765205383, "learning_rate": 1.1710938709520852e-05, "loss": 0.0087, "step": 3141 }, { "epoch": 184.8235294117647, "grad_norm": 0.39673930406570435, "learning_rate": 1.1706245870049095e-05, "loss": 0.007, "step": 3142 }, { "epoch": 184.88235294117646, "grad_norm": 0.6668503284454346, "learning_rate": 1.1701552643515767e-05, "loss": 0.0085, "step": 3143 }, { "epoch": 184.94117647058823, "grad_norm": 0.7453538775444031, "learning_rate": 1.1696859030985528e-05, "loss": 0.0218, "step": 3144 }, { "epoch": 185.0, "grad_norm": 0.8061491847038269, "learning_rate": 1.1692165033523117e-05, "loss": 0.0289, "step": 3145 }, { "epoch": 185.05882352941177, "grad_norm": 0.6696867942810059, "learning_rate": 1.168747065219337e-05, "loss": 0.0086, "step": 3146 }, { "epoch": 185.11764705882354, "grad_norm": 0.5114485025405884, "learning_rate": 1.1682775888061205e-05, "loss": 0.0119, "step": 3147 }, { "epoch": 185.1764705882353, "grad_norm": 0.7772492170333862, "learning_rate": 1.1678080742191628e-05, "loss": 0.0187, "step": 3148 }, { "epoch": 185.23529411764707, "grad_norm": 0.8176195025444031, "learning_rate": 1.1673385215649735e-05, "loss": 0.0133, "step": 3149 }, { "epoch": 185.2941176470588, "grad_norm": 0.4677741229534149, "learning_rate": 1.16686893095007e-05, "loss": 0.0069, "step": 3150 }, { "epoch": 185.35294117647058, "grad_norm": 0.777787446975708, "learning_rate": 1.1663993024809796e-05, "loss": 0.0118, "step": 3151 }, { "epoch": 185.41176470588235, "grad_norm": 1.018214225769043, "learning_rate": 1.1659296362642367e-05, "loss": 0.0141, "step": 3152 }, { "epoch": 185.47058823529412, "grad_norm": 0.5889327526092529, "learning_rate": 1.1654599324063856e-05, "loss": 0.0072, "step": 3153 }, { "epoch": 185.52941176470588, "grad_norm": 0.38262149691581726, "learning_rate": 1.1649901910139781e-05, "loss": 0.0054, "step": 3154 }, { "epoch": 185.58823529411765, "grad_norm": 0.7576113343238831, "learning_rate": 1.1645204121935749e-05, "loss": 0.0101, "step": 3155 }, { "epoch": 185.64705882352942, "grad_norm": 0.5026267170906067, "learning_rate": 1.1640505960517456e-05, "loss": 0.0061, "step": 3156 }, { "epoch": 185.7058823529412, "grad_norm": 0.9908201098442078, "learning_rate": 1.1635807426950678e-05, "loss": 0.0123, "step": 3157 }, { "epoch": 185.76470588235293, "grad_norm": 0.4950030744075775, "learning_rate": 1.1631108522301276e-05, "loss": 0.0072, "step": 3158 }, { "epoch": 185.8235294117647, "grad_norm": 0.7063285112380981, "learning_rate": 1.1626409247635195e-05, "loss": 0.0097, "step": 3159 }, { "epoch": 185.88235294117646, "grad_norm": 0.5911723971366882, "learning_rate": 1.1621709604018465e-05, "loss": 0.0174, "step": 3160 }, { "epoch": 185.94117647058823, "grad_norm": 0.703351616859436, "learning_rate": 1.16170095925172e-05, "loss": 0.0087, "step": 3161 }, { "epoch": 186.0, "grad_norm": 0.9544652104377747, "learning_rate": 1.1612309214197599e-05, "loss": 0.0159, "step": 3162 }, { "epoch": 186.05882352941177, "grad_norm": 0.6002181172370911, "learning_rate": 1.1607608470125938e-05, "loss": 0.0155, "step": 3163 }, { "epoch": 186.11764705882354, "grad_norm": 0.31403258442878723, "learning_rate": 1.160290736136858e-05, "loss": 0.0072, "step": 3164 }, { "epoch": 186.1764705882353, "grad_norm": 0.3857726752758026, "learning_rate": 1.1598205888991973e-05, "loss": 0.0068, "step": 3165 }, { "epoch": 186.23529411764707, "grad_norm": 0.8194264769554138, "learning_rate": 1.1593504054062645e-05, "loss": 0.0186, "step": 3166 }, { "epoch": 186.2941176470588, "grad_norm": 1.0563417673110962, "learning_rate": 1.1588801857647203e-05, "loss": 0.007, "step": 3167 }, { "epoch": 186.35294117647058, "grad_norm": 0.5380100011825562, "learning_rate": 1.1584099300812345e-05, "loss": 0.0118, "step": 3168 }, { "epoch": 186.41176470588235, "grad_norm": 0.5024369359016418, "learning_rate": 1.157939638462484e-05, "loss": 0.0129, "step": 3169 }, { "epoch": 186.47058823529412, "grad_norm": 0.26399973034858704, "learning_rate": 1.1574693110151546e-05, "loss": 0.0043, "step": 3170 }, { "epoch": 186.52941176470588, "grad_norm": 0.6783190965652466, "learning_rate": 1.15699894784594e-05, "loss": 0.0096, "step": 3171 }, { "epoch": 186.58823529411765, "grad_norm": 0.5118505358695984, "learning_rate": 1.1565285490615423e-05, "loss": 0.0109, "step": 3172 }, { "epoch": 186.64705882352942, "grad_norm": 0.7806510329246521, "learning_rate": 1.1560581147686707e-05, "loss": 0.0103, "step": 3173 }, { "epoch": 186.7058823529412, "grad_norm": 0.420890748500824, "learning_rate": 1.1555876450740438e-05, "loss": 0.0056, "step": 3174 }, { "epoch": 186.76470588235293, "grad_norm": 0.8203549385070801, "learning_rate": 1.1551171400843873e-05, "loss": 0.0094, "step": 3175 }, { "epoch": 186.8235294117647, "grad_norm": 0.3969504237174988, "learning_rate": 1.1546465999064346e-05, "loss": 0.0067, "step": 3176 }, { "epoch": 186.88235294117646, "grad_norm": 0.7028231024742126, "learning_rate": 1.1541760246469289e-05, "loss": 0.0127, "step": 3177 }, { "epoch": 186.94117647058823, "grad_norm": 0.8069395422935486, "learning_rate": 1.1537054144126189e-05, "loss": 0.0128, "step": 3178 }, { "epoch": 187.0, "grad_norm": 1.1906875371932983, "learning_rate": 1.1532347693102632e-05, "loss": 0.0193, "step": 3179 }, { "epoch": 187.05882352941177, "grad_norm": 0.681277871131897, "learning_rate": 1.1527640894466273e-05, "loss": 0.0134, "step": 3180 }, { "epoch": 187.11764705882354, "grad_norm": 0.5335513949394226, "learning_rate": 1.152293374928485e-05, "loss": 0.009, "step": 3181 }, { "epoch": 187.1764705882353, "grad_norm": 0.3998095393180847, "learning_rate": 1.1518226258626174e-05, "loss": 0.0075, "step": 3182 }, { "epoch": 187.23529411764707, "grad_norm": 0.6933133602142334, "learning_rate": 1.1513518423558143e-05, "loss": 0.0076, "step": 3183 }, { "epoch": 187.2941176470588, "grad_norm": 0.32907772064208984, "learning_rate": 1.1508810245148724e-05, "loss": 0.006, "step": 3184 }, { "epoch": 187.35294117647058, "grad_norm": 0.5590981841087341, "learning_rate": 1.1504101724465975e-05, "loss": 0.0115, "step": 3185 }, { "epoch": 187.41176470588235, "grad_norm": 0.5690488815307617, "learning_rate": 1.1499392862578011e-05, "loss": 0.0092, "step": 3186 }, { "epoch": 187.47058823529412, "grad_norm": 0.5366075038909912, "learning_rate": 1.1494683660553047e-05, "loss": 0.0072, "step": 3187 }, { "epoch": 187.52941176470588, "grad_norm": 0.7944682240486145, "learning_rate": 1.148997411945936e-05, "loss": 0.0185, "step": 3188 }, { "epoch": 187.58823529411765, "grad_norm": 0.739120602607727, "learning_rate": 1.1485264240365306e-05, "loss": 0.0105, "step": 3189 }, { "epoch": 187.64705882352942, "grad_norm": 0.5236390829086304, "learning_rate": 1.1480554024339325e-05, "loss": 0.0072, "step": 3190 }, { "epoch": 187.7058823529412, "grad_norm": 0.6807261109352112, "learning_rate": 1.1475843472449925e-05, "loss": 0.0137, "step": 3191 }, { "epoch": 187.76470588235293, "grad_norm": 0.49922308325767517, "learning_rate": 1.14711325857657e-05, "loss": 0.0087, "step": 3192 }, { "epoch": 187.8235294117647, "grad_norm": 0.4014601707458496, "learning_rate": 1.1466421365355305e-05, "loss": 0.0078, "step": 3193 }, { "epoch": 187.88235294117646, "grad_norm": 0.9380149841308594, "learning_rate": 1.1461709812287487e-05, "loss": 0.0235, "step": 3194 }, { "epoch": 187.94117647058823, "grad_norm": 0.5750796794891357, "learning_rate": 1.1456997927631055e-05, "loss": 0.0105, "step": 3195 }, { "epoch": 188.0, "grad_norm": 0.5588123202323914, "learning_rate": 1.1452285712454905e-05, "loss": 0.0098, "step": 3196 }, { "epoch": 188.05882352941177, "grad_norm": 0.8750551342964172, "learning_rate": 1.1447573167827996e-05, "loss": 0.012, "step": 3197 }, { "epoch": 188.11764705882354, "grad_norm": 0.6650190949440002, "learning_rate": 1.1442860294819376e-05, "loss": 0.0135, "step": 3198 }, { "epoch": 188.1764705882353, "grad_norm": 0.5946412682533264, "learning_rate": 1.1438147094498148e-05, "loss": 0.0073, "step": 3199 }, { "epoch": 188.23529411764707, "grad_norm": 0.7699983716011047, "learning_rate": 1.1433433567933514e-05, "loss": 0.0058, "step": 3200 }, { "epoch": 188.2941176470588, "grad_norm": 0.5290194153785706, "learning_rate": 1.1428719716194729e-05, "loss": 0.0125, "step": 3201 }, { "epoch": 188.35294117647058, "grad_norm": 0.620716392993927, "learning_rate": 1.1424005540351126e-05, "loss": 0.0106, "step": 3202 }, { "epoch": 188.41176470588235, "grad_norm": 0.5466468334197998, "learning_rate": 1.1419291041472125e-05, "loss": 0.008, "step": 3203 }, { "epoch": 188.47058823529412, "grad_norm": 0.5374093651771545, "learning_rate": 1.1414576220627199e-05, "loss": 0.0062, "step": 3204 }, { "epoch": 188.52941176470588, "grad_norm": 0.42346128821372986, "learning_rate": 1.140986107888591e-05, "loss": 0.0058, "step": 3205 }, { "epoch": 188.58823529411765, "grad_norm": 0.6488127708435059, "learning_rate": 1.1405145617317885e-05, "loss": 0.0153, "step": 3206 }, { "epoch": 188.64705882352942, "grad_norm": 0.5881394147872925, "learning_rate": 1.140042983699283e-05, "loss": 0.0092, "step": 3207 }, { "epoch": 188.7058823529412, "grad_norm": 2.5769784450531006, "learning_rate": 1.1395713738980512e-05, "loss": 0.015, "step": 3208 }, { "epoch": 188.76470588235293, "grad_norm": 1.0765271186828613, "learning_rate": 1.1390997324350783e-05, "loss": 0.0172, "step": 3209 }, { "epoch": 188.8235294117647, "grad_norm": 0.34962213039398193, "learning_rate": 1.1386280594173555e-05, "loss": 0.0051, "step": 3210 }, { "epoch": 188.88235294117646, "grad_norm": 0.6877102851867676, "learning_rate": 1.1381563549518823e-05, "loss": 0.0107, "step": 3211 }, { "epoch": 188.94117647058823, "grad_norm": 0.7188571691513062, "learning_rate": 1.1376846191456646e-05, "loss": 0.0135, "step": 3212 }, { "epoch": 189.0, "grad_norm": 0.9256550669670105, "learning_rate": 1.1372128521057155e-05, "loss": 0.0119, "step": 3213 }, { "epoch": 189.05882352941177, "grad_norm": 0.39693793654441833, "learning_rate": 1.1367410539390553e-05, "loss": 0.0089, "step": 3214 }, { "epoch": 189.11764705882354, "grad_norm": 0.33351653814315796, "learning_rate": 1.1362692247527112e-05, "loss": 0.0062, "step": 3215 }, { "epoch": 189.1764705882353, "grad_norm": 0.6067271828651428, "learning_rate": 1.1357973646537177e-05, "loss": 0.0102, "step": 3216 }, { "epoch": 189.23529411764707, "grad_norm": 0.4484587609767914, "learning_rate": 1.1353254737491161e-05, "loss": 0.0077, "step": 3217 }, { "epoch": 189.2941176470588, "grad_norm": 0.3381579518318176, "learning_rate": 1.1348535521459551e-05, "loss": 0.0047, "step": 3218 }, { "epoch": 189.35294117647058, "grad_norm": 0.6768056154251099, "learning_rate": 1.1343815999512895e-05, "loss": 0.0216, "step": 3219 }, { "epoch": 189.41176470588235, "grad_norm": 0.8428821563720703, "learning_rate": 1.1339096172721823e-05, "loss": 0.01, "step": 3220 }, { "epoch": 189.47058823529412, "grad_norm": 7.2963714599609375, "learning_rate": 1.1334376042157017e-05, "loss": 0.0138, "step": 3221 }, { "epoch": 189.52941176470588, "grad_norm": 0.4710461497306824, "learning_rate": 1.1329655608889246e-05, "loss": 0.0076, "step": 3222 }, { "epoch": 189.58823529411765, "grad_norm": 0.8477650880813599, "learning_rate": 1.1324934873989332e-05, "loss": 0.0164, "step": 3223 }, { "epoch": 189.64705882352942, "grad_norm": 0.6418139934539795, "learning_rate": 1.1320213838528185e-05, "loss": 0.0086, "step": 3224 }, { "epoch": 189.7058823529412, "grad_norm": 0.6126993298530579, "learning_rate": 1.1315492503576757e-05, "loss": 0.0086, "step": 3225 }, { "epoch": 189.76470588235293, "grad_norm": 0.7284581661224365, "learning_rate": 1.1310770870206091e-05, "loss": 0.0108, "step": 3226 }, { "epoch": 189.8235294117647, "grad_norm": 0.3054935932159424, "learning_rate": 1.130604893948729e-05, "loss": 0.0045, "step": 3227 }, { "epoch": 189.88235294117646, "grad_norm": 0.8641675114631653, "learning_rate": 1.1301326712491512e-05, "loss": 0.0186, "step": 3228 }, { "epoch": 189.94117647058823, "grad_norm": 0.8824946284294128, "learning_rate": 1.1296604190290004e-05, "loss": 0.0088, "step": 3229 }, { "epoch": 190.0, "grad_norm": 2.436894416809082, "learning_rate": 1.1291881373954066e-05, "loss": 0.0198, "step": 3230 }, { "epoch": 190.05882352941177, "grad_norm": 0.3932388722896576, "learning_rate": 1.1287158264555069e-05, "loss": 0.0058, "step": 3231 }, { "epoch": 190.11764705882354, "grad_norm": 0.7630366683006287, "learning_rate": 1.1282434863164446e-05, "loss": 0.007, "step": 3232 }, { "epoch": 190.1764705882353, "grad_norm": 1.6610591411590576, "learning_rate": 1.1277711170853705e-05, "loss": 0.0163, "step": 3233 }, { "epoch": 190.23529411764707, "grad_norm": 1.4642736911773682, "learning_rate": 1.1272987188694409e-05, "loss": 0.0154, "step": 3234 }, { "epoch": 190.2941176470588, "grad_norm": 1.0890969038009644, "learning_rate": 1.1268262917758199e-05, "loss": 0.009, "step": 3235 }, { "epoch": 190.35294117647058, "grad_norm": 0.8631327152252197, "learning_rate": 1.1263538359116767e-05, "loss": 0.0129, "step": 3236 }, { "epoch": 190.41176470588235, "grad_norm": 0.8123766183853149, "learning_rate": 1.1258813513841889e-05, "loss": 0.0114, "step": 3237 }, { "epoch": 190.47058823529412, "grad_norm": 0.8716486096382141, "learning_rate": 1.1254088383005383e-05, "loss": 0.0139, "step": 3238 }, { "epoch": 190.52941176470588, "grad_norm": 1.1834709644317627, "learning_rate": 1.1249362967679156e-05, "loss": 0.0125, "step": 3239 }, { "epoch": 190.58823529411765, "grad_norm": 0.7146326303482056, "learning_rate": 1.1244637268935157e-05, "loss": 0.0152, "step": 3240 }, { "epoch": 190.64705882352942, "grad_norm": 0.8744481205940247, "learning_rate": 1.1239911287845418e-05, "loss": 0.0131, "step": 3241 }, { "epoch": 190.7058823529412, "grad_norm": 0.7602493762969971, "learning_rate": 1.1235185025482025e-05, "loss": 0.0115, "step": 3242 }, { "epoch": 190.76470588235293, "grad_norm": 1.2374101877212524, "learning_rate": 1.1230458482917124e-05, "loss": 0.0082, "step": 3243 }, { "epoch": 190.8235294117647, "grad_norm": 0.3195934295654297, "learning_rate": 1.1225731661222938e-05, "loss": 0.0048, "step": 3244 }, { "epoch": 190.88235294117646, "grad_norm": 1.2733758687973022, "learning_rate": 1.122100456147174e-05, "loss": 0.0212, "step": 3245 }, { "epoch": 190.94117647058823, "grad_norm": 0.8548177480697632, "learning_rate": 1.1216277184735875e-05, "loss": 0.0146, "step": 3246 }, { "epoch": 191.0, "grad_norm": 1.069840669631958, "learning_rate": 1.1211549532087749e-05, "loss": 0.0108, "step": 3247 }, { "epoch": 191.05882352941177, "grad_norm": 0.8075317740440369, "learning_rate": 1.1206821604599827e-05, "loss": 0.0099, "step": 3248 }, { "epoch": 191.11764705882354, "grad_norm": 0.7963787317276001, "learning_rate": 1.1202093403344638e-05, "loss": 0.0143, "step": 3249 }, { "epoch": 191.1764705882353, "grad_norm": 0.767851710319519, "learning_rate": 1.1197364929394772e-05, "loss": 0.0151, "step": 3250 }, { "epoch": 191.23529411764707, "grad_norm": 0.804637610912323, "learning_rate": 1.1192636183822887e-05, "loss": 0.0196, "step": 3251 }, { "epoch": 191.2941176470588, "grad_norm": 1.0264089107513428, "learning_rate": 1.1187907167701694e-05, "loss": 0.01, "step": 3252 }, { "epoch": 191.35294117647058, "grad_norm": 0.4684655964374542, "learning_rate": 1.1183177882103974e-05, "loss": 0.0094, "step": 3253 }, { "epoch": 191.41176470588235, "grad_norm": 0.6694850921630859, "learning_rate": 1.117844832810256e-05, "loss": 0.0068, "step": 3254 }, { "epoch": 191.47058823529412, "grad_norm": 2.0198493003845215, "learning_rate": 1.1173718506770353e-05, "loss": 0.0175, "step": 3255 }, { "epoch": 191.52941176470588, "grad_norm": 0.6808951497077942, "learning_rate": 1.1168988419180308e-05, "loss": 0.0075, "step": 3256 }, { "epoch": 191.58823529411765, "grad_norm": 0.718745768070221, "learning_rate": 1.1164258066405453e-05, "loss": 0.009, "step": 3257 }, { "epoch": 191.64705882352942, "grad_norm": 1.948945164680481, "learning_rate": 1.1159527449518861e-05, "loss": 0.0155, "step": 3258 }, { "epoch": 191.7058823529412, "grad_norm": 0.4979529082775116, "learning_rate": 1.1154796569593673e-05, "loss": 0.0055, "step": 3259 }, { "epoch": 191.76470588235293, "grad_norm": 0.8279953002929688, "learning_rate": 1.1150065427703088e-05, "loss": 0.0209, "step": 3260 }, { "epoch": 191.8235294117647, "grad_norm": 1.0644099712371826, "learning_rate": 1.1145334024920364e-05, "loss": 0.0105, "step": 3261 }, { "epoch": 191.88235294117646, "grad_norm": 1.4526000022888184, "learning_rate": 1.1140602362318824e-05, "loss": 0.008, "step": 3262 }, { "epoch": 191.94117647058823, "grad_norm": 0.6447782516479492, "learning_rate": 1.1135870440971836e-05, "loss": 0.0083, "step": 3263 }, { "epoch": 192.0, "grad_norm": 1.9511919021606445, "learning_rate": 1.1131138261952845e-05, "loss": 0.0161, "step": 3264 }, { "epoch": 192.05882352941177, "grad_norm": 0.49022072553634644, "learning_rate": 1.1126405826335336e-05, "loss": 0.0112, "step": 3265 }, { "epoch": 192.11764705882354, "grad_norm": 0.48885732889175415, "learning_rate": 1.112167313519287e-05, "loss": 0.0049, "step": 3266 }, { "epoch": 192.1764705882353, "grad_norm": 1.5390304327011108, "learning_rate": 1.1116940189599048e-05, "loss": 0.012, "step": 3267 }, { "epoch": 192.23529411764707, "grad_norm": 0.800063967704773, "learning_rate": 1.1112206990627547e-05, "loss": 0.0156, "step": 3268 }, { "epoch": 192.2941176470588, "grad_norm": 0.6743875741958618, "learning_rate": 1.1107473539352084e-05, "loss": 0.0073, "step": 3269 }, { "epoch": 192.35294117647058, "grad_norm": 0.5225236415863037, "learning_rate": 1.1102739836846447e-05, "loss": 0.0061, "step": 3270 }, { "epoch": 192.41176470588235, "grad_norm": 0.5735645294189453, "learning_rate": 1.1098005884184475e-05, "loss": 0.0175, "step": 3271 }, { "epoch": 192.47058823529412, "grad_norm": 1.1004399061203003, "learning_rate": 1.1093271682440063e-05, "loss": 0.0122, "step": 3272 }, { "epoch": 192.52941176470588, "grad_norm": 1.1626893281936646, "learning_rate": 1.1088537232687164e-05, "loss": 0.0114, "step": 3273 }, { "epoch": 192.58823529411765, "grad_norm": 0.7332838177680969, "learning_rate": 1.1083802535999789e-05, "loss": 0.0103, "step": 3274 }, { "epoch": 192.64705882352942, "grad_norm": 1.8718154430389404, "learning_rate": 1.1079067593452e-05, "loss": 0.0172, "step": 3275 }, { "epoch": 192.7058823529412, "grad_norm": 0.44756245613098145, "learning_rate": 1.1074332406117918e-05, "loss": 0.0058, "step": 3276 }, { "epoch": 192.76470588235293, "grad_norm": 0.6385247111320496, "learning_rate": 1.1069596975071722e-05, "loss": 0.0108, "step": 3277 }, { "epoch": 192.8235294117647, "grad_norm": 0.5962647795677185, "learning_rate": 1.1064861301387641e-05, "loss": 0.0118, "step": 3278 }, { "epoch": 192.88235294117646, "grad_norm": 0.6403566598892212, "learning_rate": 1.1060125386139965e-05, "loss": 0.0054, "step": 3279 }, { "epoch": 192.94117647058823, "grad_norm": 0.8743329644203186, "learning_rate": 1.105538923040303e-05, "loss": 0.0084, "step": 3280 }, { "epoch": 193.0, "grad_norm": 1.3458137512207031, "learning_rate": 1.105065283525124e-05, "loss": 0.019, "step": 3281 }, { "epoch": 193.05882352941177, "grad_norm": 0.6348336338996887, "learning_rate": 1.1045916201759039e-05, "loss": 0.0133, "step": 3282 }, { "epoch": 193.11764705882354, "grad_norm": 0.44266948103904724, "learning_rate": 1.1041179331000936e-05, "loss": 0.0073, "step": 3283 }, { "epoch": 193.1764705882353, "grad_norm": 0.2964407503604889, "learning_rate": 1.1036442224051485e-05, "loss": 0.0053, "step": 3284 }, { "epoch": 193.23529411764707, "grad_norm": 0.39041921496391296, "learning_rate": 1.1031704881985298e-05, "loss": 0.0101, "step": 3285 }, { "epoch": 193.2941176470588, "grad_norm": 0.5685369372367859, "learning_rate": 1.1026967305877046e-05, "loss": 0.0118, "step": 3286 }, { "epoch": 193.35294117647058, "grad_norm": 0.6530228853225708, "learning_rate": 1.102222949680144e-05, "loss": 0.0114, "step": 3287 }, { "epoch": 193.41176470588235, "grad_norm": 0.9715292453765869, "learning_rate": 1.1017491455833258e-05, "loss": 0.0158, "step": 3288 }, { "epoch": 193.47058823529412, "grad_norm": 0.40086865425109863, "learning_rate": 1.1012753184047316e-05, "loss": 0.0062, "step": 3289 }, { "epoch": 193.52941176470588, "grad_norm": 1.6728899478912354, "learning_rate": 1.1008014682518495e-05, "loss": 0.0108, "step": 3290 }, { "epoch": 193.58823529411765, "grad_norm": 1.067245602607727, "learning_rate": 1.1003275952321724e-05, "loss": 0.0112, "step": 3291 }, { "epoch": 193.64705882352942, "grad_norm": 0.5383948683738708, "learning_rate": 1.0998536994531982e-05, "loss": 0.0065, "step": 3292 }, { "epoch": 193.7058823529412, "grad_norm": 0.7143935561180115, "learning_rate": 1.0993797810224299e-05, "loss": 0.0076, "step": 3293 }, { "epoch": 193.76470588235293, "grad_norm": 0.5741183161735535, "learning_rate": 1.098905840047376e-05, "loss": 0.0153, "step": 3294 }, { "epoch": 193.8235294117647, "grad_norm": 0.597521960735321, "learning_rate": 1.0984318766355495e-05, "loss": 0.0117, "step": 3295 }, { "epoch": 193.88235294117646, "grad_norm": 1.1099756956100464, "learning_rate": 1.0979578908944698e-05, "loss": 0.011, "step": 3296 }, { "epoch": 193.94117647058823, "grad_norm": 0.48150625824928284, "learning_rate": 1.0974838829316597e-05, "loss": 0.0077, "step": 3297 }, { "epoch": 194.0, "grad_norm": 0.6554444432258606, "learning_rate": 1.0970098528546482e-05, "loss": 0.0144, "step": 3298 }, { "epoch": 194.05882352941177, "grad_norm": 0.9587862491607666, "learning_rate": 1.0965358007709687e-05, "loss": 0.0131, "step": 3299 }, { "epoch": 194.11764705882354, "grad_norm": 0.4443546533584595, "learning_rate": 1.0960617267881601e-05, "loss": 0.0057, "step": 3300 }, { "epoch": 194.1764705882353, "grad_norm": 0.45704346895217896, "learning_rate": 1.095587631013766e-05, "loss": 0.0061, "step": 3301 }, { "epoch": 194.23529411764707, "grad_norm": 0.6759663820266724, "learning_rate": 1.0951135135553346e-05, "loss": 0.022, "step": 3302 }, { "epoch": 194.2941176470588, "grad_norm": 0.7912783622741699, "learning_rate": 1.0946393745204196e-05, "loss": 0.0063, "step": 3303 }, { "epoch": 194.35294117647058, "grad_norm": 0.6114739179611206, "learning_rate": 1.094165214016579e-05, "loss": 0.0111, "step": 3304 }, { "epoch": 194.41176470588235, "grad_norm": 0.6887639164924622, "learning_rate": 1.093691032151377e-05, "loss": 0.0101, "step": 3305 }, { "epoch": 194.47058823529412, "grad_norm": 0.5875768065452576, "learning_rate": 1.0932168290323803e-05, "loss": 0.0051, "step": 3306 }, { "epoch": 194.52941176470588, "grad_norm": 0.5792519450187683, "learning_rate": 1.092742604767163e-05, "loss": 0.0184, "step": 3307 }, { "epoch": 194.58823529411765, "grad_norm": 0.9030596613883972, "learning_rate": 1.092268359463302e-05, "loss": 0.0124, "step": 3308 }, { "epoch": 194.64705882352942, "grad_norm": 0.4022775888442993, "learning_rate": 1.0917940932283803e-05, "loss": 0.0066, "step": 3309 }, { "epoch": 194.7058823529412, "grad_norm": 0.6346903443336487, "learning_rate": 1.0913198061699847e-05, "loss": 0.0085, "step": 3310 }, { "epoch": 194.76470588235293, "grad_norm": 0.6959624886512756, "learning_rate": 1.0908454983957075e-05, "loss": 0.009, "step": 3311 }, { "epoch": 194.8235294117647, "grad_norm": 0.6008943319320679, "learning_rate": 1.090371170013145e-05, "loss": 0.0121, "step": 3312 }, { "epoch": 194.88235294117646, "grad_norm": 0.7145216464996338, "learning_rate": 1.0898968211298988e-05, "loss": 0.0085, "step": 3313 }, { "epoch": 194.94117647058823, "grad_norm": 0.4418826997280121, "learning_rate": 1.0894224518535742e-05, "loss": 0.006, "step": 3314 }, { "epoch": 195.0, "grad_norm": 0.7191243767738342, "learning_rate": 1.088948062291783e-05, "loss": 0.0081, "step": 3315 }, { "epoch": 195.05882352941177, "grad_norm": 0.8716846108436584, "learning_rate": 1.0884736525521395e-05, "loss": 0.0079, "step": 3316 }, { "epoch": 195.11764705882354, "grad_norm": 0.6322591304779053, "learning_rate": 1.0879992227422633e-05, "loss": 0.0055, "step": 3317 }, { "epoch": 195.1764705882353, "grad_norm": 0.3721468448638916, "learning_rate": 1.0875247729697795e-05, "loss": 0.0051, "step": 3318 }, { "epoch": 195.23529411764707, "grad_norm": 0.5733743906021118, "learning_rate": 1.0870503033423161e-05, "loss": 0.0147, "step": 3319 }, { "epoch": 195.2941176470588, "grad_norm": 0.9203158020973206, "learning_rate": 1.0865758139675071e-05, "loss": 0.0115, "step": 3320 }, { "epoch": 195.35294117647058, "grad_norm": 0.47411975264549255, "learning_rate": 1.08610130495299e-05, "loss": 0.0057, "step": 3321 }, { "epoch": 195.41176470588235, "grad_norm": 0.650707483291626, "learning_rate": 1.0856267764064072e-05, "loss": 0.0105, "step": 3322 }, { "epoch": 195.47058823529412, "grad_norm": 0.5678887367248535, "learning_rate": 1.0851522284354054e-05, "loss": 0.0131, "step": 3323 }, { "epoch": 195.52941176470588, "grad_norm": 0.8815030455589294, "learning_rate": 1.084677661147636e-05, "loss": 0.0084, "step": 3324 }, { "epoch": 195.58823529411765, "grad_norm": 0.4874792993068695, "learning_rate": 1.0842030746507537e-05, "loss": 0.0074, "step": 3325 }, { "epoch": 195.64705882352942, "grad_norm": 0.6122292876243591, "learning_rate": 1.0837284690524193e-05, "loss": 0.0082, "step": 3326 }, { "epoch": 195.7058823529412, "grad_norm": 0.5511192083358765, "learning_rate": 1.0832538444602963e-05, "loss": 0.014, "step": 3327 }, { "epoch": 195.76470588235293, "grad_norm": 0.6026151776313782, "learning_rate": 1.0827792009820537e-05, "loss": 0.0088, "step": 3328 }, { "epoch": 195.8235294117647, "grad_norm": 1.6833758354187012, "learning_rate": 1.082304538725364e-05, "loss": 0.0182, "step": 3329 }, { "epoch": 195.88235294117646, "grad_norm": 0.5726675987243652, "learning_rate": 1.0818298577979043e-05, "loss": 0.0148, "step": 3330 }, { "epoch": 195.94117647058823, "grad_norm": 0.46061205863952637, "learning_rate": 1.0813551583073561e-05, "loss": 0.0097, "step": 3331 }, { "epoch": 196.0, "grad_norm": 0.421731561422348, "learning_rate": 1.0808804403614044e-05, "loss": 0.0061, "step": 3332 }, { "epoch": 196.05882352941177, "grad_norm": 0.5646241307258606, "learning_rate": 1.0804057040677393e-05, "loss": 0.0077, "step": 3333 }, { "epoch": 196.11764705882354, "grad_norm": 0.37047556042671204, "learning_rate": 1.0799309495340544e-05, "loss": 0.0071, "step": 3334 }, { "epoch": 196.1764705882353, "grad_norm": 0.3495754599571228, "learning_rate": 1.079456176868048e-05, "loss": 0.0039, "step": 3335 }, { "epoch": 196.23529411764707, "grad_norm": 0.3757985532283783, "learning_rate": 1.078981386177422e-05, "loss": 0.0071, "step": 3336 }, { "epoch": 196.2941176470588, "grad_norm": 0.5946148037910461, "learning_rate": 1.0785065775698826e-05, "loss": 0.012, "step": 3337 }, { "epoch": 196.35294117647058, "grad_norm": 0.6606171727180481, "learning_rate": 1.0780317511531397e-05, "loss": 0.0158, "step": 3338 }, { "epoch": 196.41176470588235, "grad_norm": 0.48215633630752563, "learning_rate": 1.0775569070349084e-05, "loss": 0.0123, "step": 3339 }, { "epoch": 196.47058823529412, "grad_norm": 0.5550562143325806, "learning_rate": 1.0770820453229063e-05, "loss": 0.0072, "step": 3340 }, { "epoch": 196.52941176470588, "grad_norm": 0.3200398087501526, "learning_rate": 1.076607166124856e-05, "loss": 0.005, "step": 3341 }, { "epoch": 196.58823529411765, "grad_norm": 0.8271969556808472, "learning_rate": 1.0761322695484838e-05, "loss": 0.0154, "step": 3342 }, { "epoch": 196.64705882352942, "grad_norm": 0.9972195625305176, "learning_rate": 1.0756573557015193e-05, "loss": 0.0133, "step": 3343 }, { "epoch": 196.7058823529412, "grad_norm": 0.5622900724411011, "learning_rate": 1.0751824246916975e-05, "loss": 0.0156, "step": 3344 }, { "epoch": 196.76470588235293, "grad_norm": 0.5998724699020386, "learning_rate": 1.0747074766267561e-05, "loss": 0.0074, "step": 3345 }, { "epoch": 196.8235294117647, "grad_norm": 0.6753220558166504, "learning_rate": 1.0742325116144367e-05, "loss": 0.0102, "step": 3346 }, { "epoch": 196.88235294117646, "grad_norm": 0.6193666458129883, "learning_rate": 1.0737575297624852e-05, "loss": 0.0064, "step": 3347 }, { "epoch": 196.94117647058823, "grad_norm": 0.5436004400253296, "learning_rate": 1.0732825311786512e-05, "loss": 0.0079, "step": 3348 }, { "epoch": 197.0, "grad_norm": 0.7212775349617004, "learning_rate": 1.0728075159706881e-05, "loss": 0.0066, "step": 3349 }, { "epoch": 197.05882352941177, "grad_norm": 0.5709425210952759, "learning_rate": 1.072332484246353e-05, "loss": 0.0098, "step": 3350 }, { "epoch": 197.11764705882354, "grad_norm": 0.35751649737358093, "learning_rate": 1.0718574361134066e-05, "loss": 0.0043, "step": 3351 }, { "epoch": 197.1764705882353, "grad_norm": 0.4989495873451233, "learning_rate": 1.0713823716796136e-05, "loss": 0.0059, "step": 3352 }, { "epoch": 197.23529411764707, "grad_norm": 0.5936537384986877, "learning_rate": 1.0709072910527425e-05, "loss": 0.0136, "step": 3353 }, { "epoch": 197.2941176470588, "grad_norm": 0.603768527507782, "learning_rate": 1.0704321943405648e-05, "loss": 0.0121, "step": 3354 }, { "epoch": 197.35294117647058, "grad_norm": 0.5736927390098572, "learning_rate": 1.0699570816508565e-05, "loss": 0.0141, "step": 3355 }, { "epoch": 197.41176470588235, "grad_norm": 0.21487264335155487, "learning_rate": 1.0694819530913965e-05, "loss": 0.0042, "step": 3356 }, { "epoch": 197.47058823529412, "grad_norm": 0.3870476484298706, "learning_rate": 1.069006808769968e-05, "loss": 0.0069, "step": 3357 }, { "epoch": 197.52941176470588, "grad_norm": 1.1069742441177368, "learning_rate": 1.068531648794357e-05, "loss": 0.0081, "step": 3358 }, { "epoch": 197.58823529411765, "grad_norm": 0.6872608661651611, "learning_rate": 1.0680564732723537e-05, "loss": 0.0118, "step": 3359 }, { "epoch": 197.64705882352942, "grad_norm": 0.5206134915351868, "learning_rate": 1.0675812823117516e-05, "loss": 0.0073, "step": 3360 }, { "epoch": 197.7058823529412, "grad_norm": 0.5748189091682434, "learning_rate": 1.0671060760203475e-05, "loss": 0.0118, "step": 3361 }, { "epoch": 197.76470588235293, "grad_norm": 0.8579354882240295, "learning_rate": 1.0666308545059423e-05, "loss": 0.0087, "step": 3362 }, { "epoch": 197.8235294117647, "grad_norm": 0.48447051644325256, "learning_rate": 1.0661556178763392e-05, "loss": 0.0063, "step": 3363 }, { "epoch": 197.88235294117646, "grad_norm": 0.6755900382995605, "learning_rate": 1.0656803662393462e-05, "loss": 0.0158, "step": 3364 }, { "epoch": 197.94117647058823, "grad_norm": 0.4638415277004242, "learning_rate": 1.0652050997027736e-05, "loss": 0.0109, "step": 3365 }, { "epoch": 198.0, "grad_norm": 0.2703321874141693, "learning_rate": 1.0647298183744359e-05, "loss": 0.0049, "step": 3366 }, { "epoch": 198.05882352941177, "grad_norm": 0.6394265294075012, "learning_rate": 1.0642545223621502e-05, "loss": 0.0099, "step": 3367 }, { "epoch": 198.11764705882354, "grad_norm": 0.5080444812774658, "learning_rate": 1.0637792117737378e-05, "loss": 0.0136, "step": 3368 }, { "epoch": 198.1764705882353, "grad_norm": 0.3847365379333496, "learning_rate": 1.0633038867170222e-05, "loss": 0.0061, "step": 3369 }, { "epoch": 198.23529411764707, "grad_norm": 0.5004255175590515, "learning_rate": 1.0628285472998316e-05, "loss": 0.0098, "step": 3370 }, { "epoch": 198.2941176470588, "grad_norm": 0.5412321090698242, "learning_rate": 1.0623531936299958e-05, "loss": 0.0123, "step": 3371 }, { "epoch": 198.35294117647058, "grad_norm": 0.4309237003326416, "learning_rate": 1.0618778258153495e-05, "loss": 0.0096, "step": 3372 }, { "epoch": 198.41176470588235, "grad_norm": 0.6255188584327698, "learning_rate": 1.0614024439637292e-05, "loss": 0.0163, "step": 3373 }, { "epoch": 198.47058823529412, "grad_norm": 0.5641874670982361, "learning_rate": 1.0609270481829754e-05, "loss": 0.0062, "step": 3374 }, { "epoch": 198.52941176470588, "grad_norm": 0.4309609830379486, "learning_rate": 1.0604516385809317e-05, "loss": 0.0074, "step": 3375 }, { "epoch": 198.58823529411765, "grad_norm": 0.21914562582969666, "learning_rate": 1.0599762152654442e-05, "loss": 0.0041, "step": 3376 }, { "epoch": 198.64705882352942, "grad_norm": 0.35446369647979736, "learning_rate": 1.0595007783443635e-05, "loss": 0.0051, "step": 3377 }, { "epoch": 198.7058823529412, "grad_norm": 0.3455387353897095, "learning_rate": 1.0590253279255411e-05, "loss": 0.0066, "step": 3378 }, { "epoch": 198.76470588235293, "grad_norm": 0.43019899725914, "learning_rate": 1.0585498641168342e-05, "loss": 0.0084, "step": 3379 }, { "epoch": 198.8235294117647, "grad_norm": 0.26074251532554626, "learning_rate": 1.0580743870261006e-05, "loss": 0.0037, "step": 3380 }, { "epoch": 198.88235294117646, "grad_norm": 0.6372624039649963, "learning_rate": 1.0575988967612028e-05, "loss": 0.0126, "step": 3381 }, { "epoch": 198.94117647058823, "grad_norm": 0.563014030456543, "learning_rate": 1.0571233934300053e-05, "loss": 0.0111, "step": 3382 }, { "epoch": 199.0, "grad_norm": 0.3991870880126953, "learning_rate": 1.0566478771403763e-05, "loss": 0.0089, "step": 3383 }, { "epoch": 199.05882352941177, "grad_norm": 0.530103325843811, "learning_rate": 1.0561723480001864e-05, "loss": 0.0071, "step": 3384 }, { "epoch": 199.11764705882354, "grad_norm": 0.7871471047401428, "learning_rate": 1.0556968061173097e-05, "loss": 0.0152, "step": 3385 }, { "epoch": 199.1764705882353, "grad_norm": 0.5750459432601929, "learning_rate": 1.0552212515996218e-05, "loss": 0.0048, "step": 3386 }, { "epoch": 199.23529411764707, "grad_norm": 0.6583125591278076, "learning_rate": 1.0547456845550033e-05, "loss": 0.0101, "step": 3387 }, { "epoch": 199.2941176470588, "grad_norm": 0.703303873538971, "learning_rate": 1.0542701050913357e-05, "loss": 0.0143, "step": 3388 }, { "epoch": 199.35294117647058, "grad_norm": 1.052396535873413, "learning_rate": 1.0537945133165049e-05, "loss": 0.0183, "step": 3389 }, { "epoch": 199.41176470588235, "grad_norm": 0.33063453435897827, "learning_rate": 1.0533189093383979e-05, "loss": 0.0067, "step": 3390 }, { "epoch": 199.47058823529412, "grad_norm": 0.30169084668159485, "learning_rate": 1.0528432932649057e-05, "loss": 0.0048, "step": 3391 }, { "epoch": 199.52941176470588, "grad_norm": 0.6786894798278809, "learning_rate": 1.052367665203922e-05, "loss": 0.0155, "step": 3392 }, { "epoch": 199.58823529411765, "grad_norm": 0.5916212201118469, "learning_rate": 1.0518920252633426e-05, "loss": 0.0126, "step": 3393 }, { "epoch": 199.64705882352942, "grad_norm": 0.46488049626350403, "learning_rate": 1.0514163735510669e-05, "loss": 0.0091, "step": 3394 }, { "epoch": 199.7058823529412, "grad_norm": 0.3523831069469452, "learning_rate": 1.0509407101749955e-05, "loss": 0.0045, "step": 3395 }, { "epoch": 199.76470588235293, "grad_norm": 0.6381136775016785, "learning_rate": 1.0504650352430334e-05, "loss": 0.0063, "step": 3396 }, { "epoch": 199.8235294117647, "grad_norm": 0.3825772702693939, "learning_rate": 1.0499893488630868e-05, "loss": 0.0048, "step": 3397 }, { "epoch": 199.88235294117646, "grad_norm": 0.8061888813972473, "learning_rate": 1.0495136511430654e-05, "loss": 0.0071, "step": 3398 }, { "epoch": 199.94117647058823, "grad_norm": 0.6754483580589294, "learning_rate": 1.0490379421908811e-05, "loss": 0.0069, "step": 3399 }, { "epoch": 200.0, "grad_norm": 0.4437805712223053, "learning_rate": 1.0485622221144485e-05, "loss": 0.0088, "step": 3400 }, { "epoch": 200.05882352941177, "grad_norm": 0.8205817937850952, "learning_rate": 1.048086491021684e-05, "loss": 0.0108, "step": 3401 }, { "epoch": 200.11764705882354, "grad_norm": 0.33706730604171753, "learning_rate": 1.0476107490205079e-05, "loss": 0.0072, "step": 3402 }, { "epoch": 200.1764705882353, "grad_norm": 0.5950855612754822, "learning_rate": 1.0471349962188418e-05, "loss": 0.0121, "step": 3403 }, { "epoch": 200.23529411764707, "grad_norm": 0.37278032302856445, "learning_rate": 1.0466592327246101e-05, "loss": 0.0067, "step": 3404 }, { "epoch": 200.2941176470588, "grad_norm": 3.7016637325286865, "learning_rate": 1.0461834586457398e-05, "loss": 0.014, "step": 3405 }, { "epoch": 200.35294117647058, "grad_norm": 0.6629802584648132, "learning_rate": 1.0457076740901599e-05, "loss": 0.0067, "step": 3406 }, { "epoch": 200.41176470588235, "grad_norm": 0.6877986788749695, "learning_rate": 1.0452318791658027e-05, "loss": 0.0084, "step": 3407 }, { "epoch": 200.47058823529412, "grad_norm": 0.6161410212516785, "learning_rate": 1.0447560739806013e-05, "loss": 0.0133, "step": 3408 }, { "epoch": 200.52941176470588, "grad_norm": 0.6633144021034241, "learning_rate": 1.044280258642493e-05, "loss": 0.0047, "step": 3409 }, { "epoch": 200.58823529411765, "grad_norm": 0.27792593836784363, "learning_rate": 1.0438044332594152e-05, "loss": 0.0051, "step": 3410 }, { "epoch": 200.64705882352942, "grad_norm": 0.2987692952156067, "learning_rate": 1.0433285979393102e-05, "loss": 0.0043, "step": 3411 }, { "epoch": 200.7058823529412, "grad_norm": 0.5147924423217773, "learning_rate": 1.04285275279012e-05, "loss": 0.0063, "step": 3412 }, { "epoch": 200.76470588235293, "grad_norm": 1.0551624298095703, "learning_rate": 1.0423768979197906e-05, "loss": 0.0091, "step": 3413 }, { "epoch": 200.8235294117647, "grad_norm": 0.697706937789917, "learning_rate": 1.0419010334362694e-05, "loss": 0.0147, "step": 3414 }, { "epoch": 200.88235294117646, "grad_norm": 0.6694627404212952, "learning_rate": 1.0414251594475062e-05, "loss": 0.0175, "step": 3415 }, { "epoch": 200.94117647058823, "grad_norm": 0.6742429733276367, "learning_rate": 1.0409492760614532e-05, "loss": 0.009, "step": 3416 }, { "epoch": 201.0, "grad_norm": 0.4949852228164673, "learning_rate": 1.0404733833860639e-05, "loss": 0.0067, "step": 3417 }, { "epoch": 201.05882352941177, "grad_norm": 0.5726327300071716, "learning_rate": 1.039997481529295e-05, "loss": 0.0061, "step": 3418 }, { "epoch": 201.11764705882354, "grad_norm": 0.2758271098136902, "learning_rate": 1.0395215705991043e-05, "loss": 0.0045, "step": 3419 }, { "epoch": 201.1764705882353, "grad_norm": 0.7214770317077637, "learning_rate": 1.0390456507034525e-05, "loss": 0.0059, "step": 3420 }, { "epoch": 201.23529411764707, "grad_norm": 0.5486095547676086, "learning_rate": 1.0385697219503014e-05, "loss": 0.0107, "step": 3421 }, { "epoch": 201.2941176470588, "grad_norm": 0.29414239525794983, "learning_rate": 1.0380937844476165e-05, "loss": 0.0038, "step": 3422 }, { "epoch": 201.35294117647058, "grad_norm": 0.48533859848976135, "learning_rate": 1.0376178383033625e-05, "loss": 0.0088, "step": 3423 }, { "epoch": 201.41176470588235, "grad_norm": 0.8743560910224915, "learning_rate": 1.0371418836255091e-05, "loss": 0.0085, "step": 3424 }, { "epoch": 201.47058823529412, "grad_norm": 0.7046306133270264, "learning_rate": 1.036665920522026e-05, "loss": 0.0098, "step": 3425 }, { "epoch": 201.52941176470588, "grad_norm": 0.5896130800247192, "learning_rate": 1.0361899491008854e-05, "loss": 0.0104, "step": 3426 }, { "epoch": 201.58823529411765, "grad_norm": 0.5950531959533691, "learning_rate": 1.0357139694700609e-05, "loss": 0.0117, "step": 3427 }, { "epoch": 201.64705882352942, "grad_norm": 0.4830342233181, "learning_rate": 1.0352379817375292e-05, "loss": 0.0083, "step": 3428 }, { "epoch": 201.7058823529412, "grad_norm": 0.7207565903663635, "learning_rate": 1.0347619860112673e-05, "loss": 0.019, "step": 3429 }, { "epoch": 201.76470588235293, "grad_norm": 0.8983915448188782, "learning_rate": 1.034285982399255e-05, "loss": 0.0111, "step": 3430 }, { "epoch": 201.8235294117647, "grad_norm": 0.3921505808830261, "learning_rate": 1.033809971009474e-05, "loss": 0.0043, "step": 3431 }, { "epoch": 201.88235294117646, "grad_norm": 0.447058767080307, "learning_rate": 1.0333339519499069e-05, "loss": 0.0051, "step": 3432 }, { "epoch": 201.94117647058823, "grad_norm": 0.7316491603851318, "learning_rate": 1.032857925328539e-05, "loss": 0.0181, "step": 3433 }, { "epoch": 202.0, "grad_norm": 0.28741729259490967, "learning_rate": 1.0323818912533561e-05, "loss": 0.0046, "step": 3434 }, { "epoch": 202.05882352941177, "grad_norm": 0.4935266971588135, "learning_rate": 1.0319058498323473e-05, "loss": 0.0058, "step": 3435 }, { "epoch": 202.11764705882354, "grad_norm": 0.6482890844345093, "learning_rate": 1.0314298011735018e-05, "loss": 0.0119, "step": 3436 }, { "epoch": 202.1764705882353, "grad_norm": 0.6289078593254089, "learning_rate": 1.0309537453848117e-05, "loss": 0.0107, "step": 3437 }, { "epoch": 202.23529411764707, "grad_norm": 0.3940635323524475, "learning_rate": 1.0304776825742699e-05, "loss": 0.004, "step": 3438 }, { "epoch": 202.2941176470588, "grad_norm": 0.6484424471855164, "learning_rate": 1.0300016128498715e-05, "loss": 0.0083, "step": 3439 }, { "epoch": 202.35294117647058, "grad_norm": 0.8304846286773682, "learning_rate": 1.0295255363196121e-05, "loss": 0.0075, "step": 3440 }, { "epoch": 202.41176470588235, "grad_norm": 0.6356516480445862, "learning_rate": 1.0290494530914906e-05, "loss": 0.0102, "step": 3441 }, { "epoch": 202.47058823529412, "grad_norm": 0.5393570065498352, "learning_rate": 1.0285733632735056e-05, "loss": 0.0089, "step": 3442 }, { "epoch": 202.52941176470588, "grad_norm": 0.7670809626579285, "learning_rate": 1.028097266973658e-05, "loss": 0.0109, "step": 3443 }, { "epoch": 202.58823529411765, "grad_norm": 0.5762925744056702, "learning_rate": 1.0276211642999505e-05, "loss": 0.0041, "step": 3444 }, { "epoch": 202.64705882352942, "grad_norm": 0.8074893951416016, "learning_rate": 1.0271450553603868e-05, "loss": 0.0203, "step": 3445 }, { "epoch": 202.7058823529412, "grad_norm": 0.5593661665916443, "learning_rate": 1.0266689402629721e-05, "loss": 0.0077, "step": 3446 }, { "epoch": 202.76470588235293, "grad_norm": 0.41847580671310425, "learning_rate": 1.0261928191157129e-05, "loss": 0.0103, "step": 3447 }, { "epoch": 202.8235294117647, "grad_norm": 0.24880154430866241, "learning_rate": 1.0257166920266177e-05, "loss": 0.0048, "step": 3448 }, { "epoch": 202.88235294117646, "grad_norm": 0.8014354109764099, "learning_rate": 1.0252405591036952e-05, "loss": 0.0154, "step": 3449 }, { "epoch": 202.94117647058823, "grad_norm": 0.6225651502609253, "learning_rate": 1.0247644204549565e-05, "loss": 0.0079, "step": 3450 }, { "epoch": 203.0, "grad_norm": 0.5882680416107178, "learning_rate": 1.0242882761884132e-05, "loss": 0.0054, "step": 3451 }, { "epoch": 203.05882352941177, "grad_norm": 0.421850323677063, "learning_rate": 1.0238121264120789e-05, "loss": 0.0051, "step": 3452 }, { "epoch": 203.11764705882354, "grad_norm": 0.3533374071121216, "learning_rate": 1.0233359712339681e-05, "loss": 0.0056, "step": 3453 }, { "epoch": 203.1764705882353, "grad_norm": 0.4691959619522095, "learning_rate": 1.0228598107620962e-05, "loss": 0.0094, "step": 3454 }, { "epoch": 203.23529411764707, "grad_norm": 0.7191398739814758, "learning_rate": 1.0223836451044804e-05, "loss": 0.006, "step": 3455 }, { "epoch": 203.2941176470588, "grad_norm": 0.9584029912948608, "learning_rate": 1.0219074743691384e-05, "loss": 0.0115, "step": 3456 }, { "epoch": 203.35294117647058, "grad_norm": 0.5664317607879639, "learning_rate": 1.0214312986640902e-05, "loss": 0.0068, "step": 3457 }, { "epoch": 203.41176470588235, "grad_norm": 0.6876184344291687, "learning_rate": 1.0209551180973553e-05, "loss": 0.0122, "step": 3458 }, { "epoch": 203.47058823529412, "grad_norm": 0.716497004032135, "learning_rate": 1.0204789327769559e-05, "loss": 0.0051, "step": 3459 }, { "epoch": 203.52941176470588, "grad_norm": 1.3285398483276367, "learning_rate": 1.020002742810914e-05, "loss": 0.0125, "step": 3460 }, { "epoch": 203.58823529411765, "grad_norm": 0.6341179609298706, "learning_rate": 1.0195265483072536e-05, "loss": 0.0066, "step": 3461 }, { "epoch": 203.64705882352942, "grad_norm": 0.5297707915306091, "learning_rate": 1.019050349373999e-05, "loss": 0.0079, "step": 3462 }, { "epoch": 203.7058823529412, "grad_norm": 0.5228769779205322, "learning_rate": 1.0185741461191764e-05, "loss": 0.0057, "step": 3463 }, { "epoch": 203.76470588235293, "grad_norm": 0.38561615347862244, "learning_rate": 1.0180979386508119e-05, "loss": 0.0057, "step": 3464 }, { "epoch": 203.8235294117647, "grad_norm": 0.4922335743904114, "learning_rate": 1.017621727076933e-05, "loss": 0.0102, "step": 3465 }, { "epoch": 203.88235294117646, "grad_norm": 0.6263774037361145, "learning_rate": 1.0171455115055688e-05, "loss": 0.0141, "step": 3466 }, { "epoch": 203.94117647058823, "grad_norm": 0.6894011497497559, "learning_rate": 1.016669292044748e-05, "loss": 0.0123, "step": 3467 }, { "epoch": 204.0, "grad_norm": 0.7548149824142456, "learning_rate": 1.0161930688025018e-05, "loss": 0.0153, "step": 3468 }, { "epoch": 204.05882352941177, "grad_norm": 0.5237783193588257, "learning_rate": 1.0157168418868601e-05, "loss": 0.0108, "step": 3469 }, { "epoch": 204.11764705882354, "grad_norm": 0.7679816484451294, "learning_rate": 1.0152406114058563e-05, "loss": 0.0062, "step": 3470 }, { "epoch": 204.1764705882353, "grad_norm": 1.050896167755127, "learning_rate": 1.014764377467522e-05, "loss": 0.0145, "step": 3471 }, { "epoch": 204.23529411764707, "grad_norm": 0.43016666173934937, "learning_rate": 1.0142881401798917e-05, "loss": 0.0124, "step": 3472 }, { "epoch": 204.2941176470588, "grad_norm": 0.46605777740478516, "learning_rate": 1.013811899650999e-05, "loss": 0.0056, "step": 3473 }, { "epoch": 204.35294117647058, "grad_norm": 0.38385623693466187, "learning_rate": 1.0133356559888793e-05, "loss": 0.0042, "step": 3474 }, { "epoch": 204.41176470588235, "grad_norm": 0.42726752161979675, "learning_rate": 1.0128594093015685e-05, "loss": 0.0087, "step": 3475 }, { "epoch": 204.47058823529412, "grad_norm": 0.3524323105812073, "learning_rate": 1.0123831596971033e-05, "loss": 0.008, "step": 3476 }, { "epoch": 204.52941176470588, "grad_norm": 0.3485679030418396, "learning_rate": 1.0119069072835204e-05, "loss": 0.0065, "step": 3477 }, { "epoch": 204.58823529411765, "grad_norm": 0.5992925763130188, "learning_rate": 1.0114306521688574e-05, "loss": 0.0129, "step": 3478 }, { "epoch": 204.64705882352942, "grad_norm": 0.7294977307319641, "learning_rate": 1.010954394461153e-05, "loss": 0.0074, "step": 3479 }, { "epoch": 204.7058823529412, "grad_norm": 0.3994596600532532, "learning_rate": 1.0104781342684465e-05, "loss": 0.0053, "step": 3480 }, { "epoch": 204.76470588235293, "grad_norm": 0.3672890067100525, "learning_rate": 1.010001871698777e-05, "loss": 0.0045, "step": 3481 }, { "epoch": 204.8235294117647, "grad_norm": 0.411395788192749, "learning_rate": 1.0095256068601845e-05, "loss": 0.0073, "step": 3482 }, { "epoch": 204.88235294117646, "grad_norm": 0.6902328729629517, "learning_rate": 1.00904933986071e-05, "loss": 0.0087, "step": 3483 }, { "epoch": 204.94117647058823, "grad_norm": 0.8038604855537415, "learning_rate": 1.008573070808394e-05, "loss": 0.012, "step": 3484 }, { "epoch": 205.0, "grad_norm": 0.7812236547470093, "learning_rate": 1.0080967998112787e-05, "loss": 0.0111, "step": 3485 }, { "epoch": 205.05882352941177, "grad_norm": 0.38746488094329834, "learning_rate": 1.0076205269774058e-05, "loss": 0.0055, "step": 3486 }, { "epoch": 205.11764705882354, "grad_norm": 0.6825348138809204, "learning_rate": 1.0071442524148177e-05, "loss": 0.0087, "step": 3487 }, { "epoch": 205.1764705882353, "grad_norm": 0.5162530541419983, "learning_rate": 1.006667976231557e-05, "loss": 0.0108, "step": 3488 }, { "epoch": 205.23529411764707, "grad_norm": 0.5111679434776306, "learning_rate": 1.0061916985356675e-05, "loss": 0.0113, "step": 3489 }, { "epoch": 205.2941176470588, "grad_norm": 0.44166165590286255, "learning_rate": 1.0057154194351922e-05, "loss": 0.0075, "step": 3490 }, { "epoch": 205.35294117647058, "grad_norm": 0.43521878123283386, "learning_rate": 1.0052391390381747e-05, "loss": 0.0107, "step": 3491 }, { "epoch": 205.41176470588235, "grad_norm": 0.5790126919746399, "learning_rate": 1.0047628574526595e-05, "loss": 0.0069, "step": 3492 }, { "epoch": 205.47058823529412, "grad_norm": 0.5223351716995239, "learning_rate": 1.004286574786691e-05, "loss": 0.0067, "step": 3493 }, { "epoch": 205.52941176470588, "grad_norm": 0.5079670548439026, "learning_rate": 1.0038102911483139e-05, "loss": 0.0116, "step": 3494 }, { "epoch": 205.58823529411765, "grad_norm": 0.4736677408218384, "learning_rate": 1.0033340066455729e-05, "loss": 0.0049, "step": 3495 }, { "epoch": 205.64705882352942, "grad_norm": 0.5309202671051025, "learning_rate": 1.002857721386513e-05, "loss": 0.0111, "step": 3496 }, { "epoch": 205.7058823529412, "grad_norm": 0.3337430953979492, "learning_rate": 1.0023814354791794e-05, "loss": 0.0069, "step": 3497 }, { "epoch": 205.76470588235293, "grad_norm": 0.5075371861457825, "learning_rate": 1.0019051490316178e-05, "loss": 0.0092, "step": 3498 }, { "epoch": 205.8235294117647, "grad_norm": 0.7633586525917053, "learning_rate": 1.0014288621518733e-05, "loss": 0.0061, "step": 3499 }, { "epoch": 205.88235294117646, "grad_norm": 2.006873369216919, "learning_rate": 1.0009525749479918e-05, "loss": 0.0098, "step": 3500 }, { "epoch": 205.94117647058823, "grad_norm": 0.7887892127037048, "learning_rate": 1.0004762875280187e-05, "loss": 0.0096, "step": 3501 }, { "epoch": 206.0, "grad_norm": 0.5501015782356262, "learning_rate": 1e-05, "loss": 0.0094, "step": 3502 }, { "epoch": 206.05882352941177, "grad_norm": 0.6687799096107483, "learning_rate": 9.995237124719816e-06, "loss": 0.0119, "step": 3503 }, { "epoch": 206.11764705882354, "grad_norm": 0.38236314058303833, "learning_rate": 9.990474250520085e-06, "loss": 0.0076, "step": 3504 }, { "epoch": 206.1764705882353, "grad_norm": 0.46291279792785645, "learning_rate": 9.98571137848127e-06, "loss": 0.0076, "step": 3505 }, { "epoch": 206.23529411764707, "grad_norm": 0.5846573710441589, "learning_rate": 9.980948509683825e-06, "loss": 0.0119, "step": 3506 }, { "epoch": 206.2941176470588, "grad_norm": 0.5129162669181824, "learning_rate": 9.97618564520821e-06, "loss": 0.0169, "step": 3507 }, { "epoch": 206.35294117647058, "grad_norm": 3.1662909984588623, "learning_rate": 9.971422786134874e-06, "loss": 0.0217, "step": 3508 }, { "epoch": 206.41176470588235, "grad_norm": 0.35352444648742676, "learning_rate": 9.966659933544276e-06, "loss": 0.0041, "step": 3509 }, { "epoch": 206.47058823529412, "grad_norm": 0.6336624622344971, "learning_rate": 9.961897088516864e-06, "loss": 0.0052, "step": 3510 }, { "epoch": 206.52941176470588, "grad_norm": 0.5341898202896118, "learning_rate": 9.957134252133094e-06, "loss": 0.0052, "step": 3511 }, { "epoch": 206.58823529411765, "grad_norm": 1.1906570196151733, "learning_rate": 9.952371425473406e-06, "loss": 0.0083, "step": 3512 }, { "epoch": 206.64705882352942, "grad_norm": 0.42609813809394836, "learning_rate": 9.947608609618257e-06, "loss": 0.006, "step": 3513 }, { "epoch": 206.7058823529412, "grad_norm": 0.45863547921180725, "learning_rate": 9.942845805648082e-06, "loss": 0.0048, "step": 3514 }, { "epoch": 206.76470588235293, "grad_norm": 0.6257054805755615, "learning_rate": 9.93808301464333e-06, "loss": 0.0085, "step": 3515 }, { "epoch": 206.8235294117647, "grad_norm": 0.5355355739593506, "learning_rate": 9.933320237684433e-06, "loss": 0.0087, "step": 3516 }, { "epoch": 206.88235294117646, "grad_norm": 0.902189314365387, "learning_rate": 9.928557475851826e-06, "loss": 0.0112, "step": 3517 }, { "epoch": 206.94117647058823, "grad_norm": 0.5867468118667603, "learning_rate": 9.923794730225947e-06, "loss": 0.0063, "step": 3518 }, { "epoch": 207.0, "grad_norm": 0.2959466576576233, "learning_rate": 9.919032001887215e-06, "loss": 0.0063, "step": 3519 }, { "epoch": 207.05882352941177, "grad_norm": 0.3136676251888275, "learning_rate": 9.914269291916063e-06, "loss": 0.0033, "step": 3520 }, { "epoch": 207.11764705882354, "grad_norm": 0.7396752834320068, "learning_rate": 9.909506601392905e-06, "loss": 0.0085, "step": 3521 }, { "epoch": 207.1764705882353, "grad_norm": 0.9058459997177124, "learning_rate": 9.904743931398158e-06, "loss": 0.0098, "step": 3522 }, { "epoch": 207.23529411764707, "grad_norm": 1.20993971824646, "learning_rate": 9.899981283012234e-06, "loss": 0.014, "step": 3523 }, { "epoch": 207.2941176470588, "grad_norm": 0.6732929348945618, "learning_rate": 9.89521865731554e-06, "loss": 0.0165, "step": 3524 }, { "epoch": 207.35294117647058, "grad_norm": 1.7431646585464478, "learning_rate": 9.890456055388471e-06, "loss": 0.013, "step": 3525 }, { "epoch": 207.41176470588235, "grad_norm": 0.5298877954483032, "learning_rate": 9.88569347831143e-06, "loss": 0.0049, "step": 3526 }, { "epoch": 207.47058823529412, "grad_norm": 0.39570504426956177, "learning_rate": 9.8809309271648e-06, "loss": 0.0062, "step": 3527 }, { "epoch": 207.52941176470588, "grad_norm": 1.4189156293869019, "learning_rate": 9.87616840302897e-06, "loss": 0.0075, "step": 3528 }, { "epoch": 207.58823529411765, "grad_norm": 0.573283851146698, "learning_rate": 9.871405906984317e-06, "loss": 0.0086, "step": 3529 }, { "epoch": 207.64705882352942, "grad_norm": 0.3923742473125458, "learning_rate": 9.866643440111208e-06, "loss": 0.0061, "step": 3530 }, { "epoch": 207.7058823529412, "grad_norm": 0.5227124691009521, "learning_rate": 9.861881003490016e-06, "loss": 0.0108, "step": 3531 }, { "epoch": 207.76470588235293, "grad_norm": 0.42857229709625244, "learning_rate": 9.857118598201088e-06, "loss": 0.0056, "step": 3532 }, { "epoch": 207.8235294117647, "grad_norm": 0.5640078186988831, "learning_rate": 9.852356225324783e-06, "loss": 0.0097, "step": 3533 }, { "epoch": 207.88235294117646, "grad_norm": 0.43843379616737366, "learning_rate": 9.84759388594144e-06, "loss": 0.0064, "step": 3534 }, { "epoch": 207.94117647058823, "grad_norm": 0.3896380662918091, "learning_rate": 9.842831581131402e-06, "loss": 0.0078, "step": 3535 }, { "epoch": 208.0, "grad_norm": 0.38202187418937683, "learning_rate": 9.838069311974986e-06, "loss": 0.0044, "step": 3536 }, { "epoch": 208.05882352941177, "grad_norm": 0.521273672580719, "learning_rate": 9.833307079552522e-06, "loss": 0.0071, "step": 3537 }, { "epoch": 208.11764705882354, "grad_norm": 0.47406548261642456, "learning_rate": 9.828544884944314e-06, "loss": 0.0056, "step": 3538 }, { "epoch": 208.1764705882353, "grad_norm": 0.6531280279159546, "learning_rate": 9.823782729230673e-06, "loss": 0.0068, "step": 3539 }, { "epoch": 208.23529411764707, "grad_norm": 0.7169029116630554, "learning_rate": 9.819020613491883e-06, "loss": 0.0069, "step": 3540 }, { "epoch": 208.2941176470588, "grad_norm": 0.7050901651382446, "learning_rate": 9.81425853880824e-06, "loss": 0.0143, "step": 3541 }, { "epoch": 208.35294117647058, "grad_norm": 0.5533369779586792, "learning_rate": 9.80949650626001e-06, "loss": 0.0121, "step": 3542 }, { "epoch": 208.41176470588235, "grad_norm": 0.5218326449394226, "learning_rate": 9.804734516927466e-06, "loss": 0.0045, "step": 3543 }, { "epoch": 208.47058823529412, "grad_norm": 0.41361841559410095, "learning_rate": 9.799972571890862e-06, "loss": 0.0073, "step": 3544 }, { "epoch": 208.52941176470588, "grad_norm": 0.5999132394790649, "learning_rate": 9.795210672230444e-06, "loss": 0.0139, "step": 3545 }, { "epoch": 208.58823529411765, "grad_norm": 0.7342587113380432, "learning_rate": 9.790448819026448e-06, "loss": 0.0117, "step": 3546 }, { "epoch": 208.64705882352942, "grad_norm": 0.4833593964576721, "learning_rate": 9.785687013359102e-06, "loss": 0.01, "step": 3547 }, { "epoch": 208.7058823529412, "grad_norm": 0.5045291781425476, "learning_rate": 9.780925256308617e-06, "loss": 0.005, "step": 3548 }, { "epoch": 208.76470588235293, "grad_norm": 0.6320129036903381, "learning_rate": 9.776163548955199e-06, "loss": 0.0114, "step": 3549 }, { "epoch": 208.8235294117647, "grad_norm": 0.46570274233818054, "learning_rate": 9.771401892379043e-06, "loss": 0.0049, "step": 3550 }, { "epoch": 208.88235294117646, "grad_norm": 1.357369065284729, "learning_rate": 9.766640287660322e-06, "loss": 0.012, "step": 3551 }, { "epoch": 208.94117647058823, "grad_norm": 0.5415012240409851, "learning_rate": 9.761878735879214e-06, "loss": 0.0062, "step": 3552 }, { "epoch": 209.0, "grad_norm": 1.5594338178634644, "learning_rate": 9.757117238115871e-06, "loss": 0.01, "step": 3553 }, { "epoch": 209.05882352941177, "grad_norm": 0.651517391204834, "learning_rate": 9.75235579545044e-06, "loss": 0.0128, "step": 3554 }, { "epoch": 209.11764705882354, "grad_norm": 0.5685669183731079, "learning_rate": 9.747594408963052e-06, "loss": 0.0112, "step": 3555 }, { "epoch": 209.1764705882353, "grad_norm": 0.6170831918716431, "learning_rate": 9.742833079733828e-06, "loss": 0.0096, "step": 3556 }, { "epoch": 209.23529411764707, "grad_norm": 0.8340187668800354, "learning_rate": 9.738071808842874e-06, "loss": 0.0077, "step": 3557 }, { "epoch": 209.2941176470588, "grad_norm": 0.4306275248527527, "learning_rate": 9.733310597370284e-06, "loss": 0.0063, "step": 3558 }, { "epoch": 209.35294117647058, "grad_norm": 0.7505781650543213, "learning_rate": 9.728549446396139e-06, "loss": 0.0114, "step": 3559 }, { "epoch": 209.41176470588235, "grad_norm": 0.6068425178527832, "learning_rate": 9.7237883570005e-06, "loss": 0.0128, "step": 3560 }, { "epoch": 209.47058823529412, "grad_norm": 0.372850239276886, "learning_rate": 9.719027330263427e-06, "loss": 0.0051, "step": 3561 }, { "epoch": 209.52941176470588, "grad_norm": 0.44104430079460144, "learning_rate": 9.714266367264951e-06, "loss": 0.0065, "step": 3562 }, { "epoch": 209.58823529411765, "grad_norm": 0.3919260501861572, "learning_rate": 9.7095054690851e-06, "loss": 0.0041, "step": 3563 }, { "epoch": 209.64705882352942, "grad_norm": 2.0335912704467773, "learning_rate": 9.704744636803882e-06, "loss": 0.0133, "step": 3564 }, { "epoch": 209.7058823529412, "grad_norm": 0.4651314318180084, "learning_rate": 9.69998387150129e-06, "loss": 0.0097, "step": 3565 }, { "epoch": 209.76470588235293, "grad_norm": 0.5777485966682434, "learning_rate": 9.695223174257303e-06, "loss": 0.0097, "step": 3566 }, { "epoch": 209.8235294117647, "grad_norm": 0.5761001706123352, "learning_rate": 9.690462546151887e-06, "loss": 0.007, "step": 3567 }, { "epoch": 209.88235294117646, "grad_norm": 0.6995345950126648, "learning_rate": 9.685701988264985e-06, "loss": 0.0087, "step": 3568 }, { "epoch": 209.94117647058823, "grad_norm": 0.45156624913215637, "learning_rate": 9.680941501676533e-06, "loss": 0.005, "step": 3569 }, { "epoch": 210.0, "grad_norm": 0.5910322666168213, "learning_rate": 9.676181087466444e-06, "loss": 0.0123, "step": 3570 }, { "epoch": 210.05882352941177, "grad_norm": 0.34413161873817444, "learning_rate": 9.671420746714617e-06, "loss": 0.0043, "step": 3571 }, { "epoch": 210.11764705882354, "grad_norm": 1.289819359779358, "learning_rate": 9.666660480500936e-06, "loss": 0.0142, "step": 3572 }, { "epoch": 210.1764705882353, "grad_norm": 0.6117986440658569, "learning_rate": 9.661900289905264e-06, "loss": 0.0082, "step": 3573 }, { "epoch": 210.23529411764707, "grad_norm": 0.6166220307350159, "learning_rate": 9.657140176007455e-06, "loss": 0.0055, "step": 3574 }, { "epoch": 210.2941176470588, "grad_norm": 0.6363047361373901, "learning_rate": 9.652380139887332e-06, "loss": 0.0094, "step": 3575 }, { "epoch": 210.35294117647058, "grad_norm": 0.7812364101409912, "learning_rate": 9.647620182624715e-06, "loss": 0.0152, "step": 3576 }, { "epoch": 210.41176470588235, "grad_norm": 0.46824511885643005, "learning_rate": 9.642860305299395e-06, "loss": 0.0071, "step": 3577 }, { "epoch": 210.47058823529412, "grad_norm": 0.6071686744689941, "learning_rate": 9.638100508991153e-06, "loss": 0.0147, "step": 3578 }, { "epoch": 210.52941176470588, "grad_norm": 0.507701575756073, "learning_rate": 9.633340794779745e-06, "loss": 0.0094, "step": 3579 }, { "epoch": 210.58823529411765, "grad_norm": 0.48157960176467896, "learning_rate": 9.628581163744912e-06, "loss": 0.0108, "step": 3580 }, { "epoch": 210.64705882352942, "grad_norm": 0.41959109902381897, "learning_rate": 9.623821616966376e-06, "loss": 0.0056, "step": 3581 }, { "epoch": 210.7058823529412, "grad_norm": 0.5809347629547119, "learning_rate": 9.619062155523842e-06, "loss": 0.0074, "step": 3582 }, { "epoch": 210.76470588235293, "grad_norm": 0.5090842843055725, "learning_rate": 9.614302780496989e-06, "loss": 0.0078, "step": 3583 }, { "epoch": 210.8235294117647, "grad_norm": 0.4300100803375244, "learning_rate": 9.60954349296548e-06, "loss": 0.0087, "step": 3584 }, { "epoch": 210.88235294117646, "grad_norm": 0.8209686875343323, "learning_rate": 9.604784294008962e-06, "loss": 0.0092, "step": 3585 }, { "epoch": 210.94117647058823, "grad_norm": 0.43164941668510437, "learning_rate": 9.600025184707055e-06, "loss": 0.0054, "step": 3586 }, { "epoch": 211.0, "grad_norm": 0.3201170861721039, "learning_rate": 9.595266166139366e-06, "loss": 0.0047, "step": 3587 }, { "epoch": 211.05882352941177, "grad_norm": 0.670695424079895, "learning_rate": 9.590507239385473e-06, "loss": 0.0061, "step": 3588 }, { "epoch": 211.11764705882354, "grad_norm": 0.5105672478675842, "learning_rate": 9.585748405524942e-06, "loss": 0.0079, "step": 3589 }, { "epoch": 211.1764705882353, "grad_norm": 0.5217043161392212, "learning_rate": 9.58098966563731e-06, "loss": 0.0058, "step": 3590 }, { "epoch": 211.23529411764707, "grad_norm": 0.19551360607147217, "learning_rate": 9.576231020802099e-06, "loss": 0.0038, "step": 3591 }, { "epoch": 211.2941176470588, "grad_norm": 0.6877105236053467, "learning_rate": 9.571472472098804e-06, "loss": 0.0124, "step": 3592 }, { "epoch": 211.35294117647058, "grad_norm": 0.5195525288581848, "learning_rate": 9.566714020606905e-06, "loss": 0.0059, "step": 3593 }, { "epoch": 211.41176470588235, "grad_norm": 0.194919154047966, "learning_rate": 9.56195566740585e-06, "loss": 0.0032, "step": 3594 }, { "epoch": 211.47058823529412, "grad_norm": 0.5506737232208252, "learning_rate": 9.557197413575077e-06, "loss": 0.0076, "step": 3595 }, { "epoch": 211.52941176470588, "grad_norm": 0.41101884841918945, "learning_rate": 9.552439260193992e-06, "loss": 0.0113, "step": 3596 }, { "epoch": 211.58823529411765, "grad_norm": 1.0812875032424927, "learning_rate": 9.547681208341978e-06, "loss": 0.0078, "step": 3597 }, { "epoch": 211.64705882352942, "grad_norm": 0.6069225668907166, "learning_rate": 9.542923259098404e-06, "loss": 0.0107, "step": 3598 }, { "epoch": 211.7058823529412, "grad_norm": 0.44649383425712585, "learning_rate": 9.538165413542607e-06, "loss": 0.0085, "step": 3599 }, { "epoch": 211.76470588235293, "grad_norm": 0.7270436882972717, "learning_rate": 9.533407672753904e-06, "loss": 0.0099, "step": 3600 }, { "epoch": 211.8235294117647, "grad_norm": 0.5898965001106262, "learning_rate": 9.528650037811587e-06, "loss": 0.0148, "step": 3601 }, { "epoch": 211.88235294117646, "grad_norm": 0.8377848267555237, "learning_rate": 9.523892509794926e-06, "loss": 0.0068, "step": 3602 }, { "epoch": 211.94117647058823, "grad_norm": 0.5274443626403809, "learning_rate": 9.519135089783162e-06, "loss": 0.0079, "step": 3603 }, { "epoch": 212.0, "grad_norm": 0.3180359899997711, "learning_rate": 9.514377778855521e-06, "loss": 0.0046, "step": 3604 }, { "epoch": 212.05882352941177, "grad_norm": 0.44702035188674927, "learning_rate": 9.509620578091192e-06, "loss": 0.0113, "step": 3605 }, { "epoch": 212.11764705882354, "grad_norm": 2.245037794113159, "learning_rate": 9.504863488569344e-06, "loss": 0.0045, "step": 3606 }, { "epoch": 212.1764705882353, "grad_norm": 0.37032151222229004, "learning_rate": 9.500106511369132e-06, "loss": 0.0096, "step": 3607 }, { "epoch": 212.23529411764707, "grad_norm": 0.49438023567199707, "learning_rate": 9.495349647569666e-06, "loss": 0.0098, "step": 3608 }, { "epoch": 212.2941176470588, "grad_norm": 0.8840548396110535, "learning_rate": 9.490592898250045e-06, "loss": 0.0073, "step": 3609 }, { "epoch": 212.35294117647058, "grad_norm": 0.5578643679618835, "learning_rate": 9.485836264489333e-06, "loss": 0.0079, "step": 3610 }, { "epoch": 212.41176470588235, "grad_norm": 0.4757553040981293, "learning_rate": 9.481079747366572e-06, "loss": 0.0068, "step": 3611 }, { "epoch": 212.47058823529412, "grad_norm": 0.4393394887447357, "learning_rate": 9.476323347960781e-06, "loss": 0.0096, "step": 3612 }, { "epoch": 212.52941176470588, "grad_norm": 0.3535398244857788, "learning_rate": 9.471567067350943e-06, "loss": 0.006, "step": 3613 }, { "epoch": 212.58823529411765, "grad_norm": 0.723148763179779, "learning_rate": 9.466810906616024e-06, "loss": 0.008, "step": 3614 }, { "epoch": 212.64705882352942, "grad_norm": 1.6461063623428345, "learning_rate": 9.462054866834953e-06, "loss": 0.0087, "step": 3615 }, { "epoch": 212.7058823529412, "grad_norm": 0.7828130125999451, "learning_rate": 9.457298949086643e-06, "loss": 0.0157, "step": 3616 }, { "epoch": 212.76470588235293, "grad_norm": 0.2808553874492645, "learning_rate": 9.452543154449967e-06, "loss": 0.006, "step": 3617 }, { "epoch": 212.8235294117647, "grad_norm": 0.27236753702163696, "learning_rate": 9.447787484003781e-06, "loss": 0.0048, "step": 3618 }, { "epoch": 212.88235294117646, "grad_norm": 1.2037855386734009, "learning_rate": 9.443031938826905e-06, "loss": 0.0066, "step": 3619 }, { "epoch": 212.94117647058823, "grad_norm": 0.28537946939468384, "learning_rate": 9.438276519998136e-06, "loss": 0.0048, "step": 3620 }, { "epoch": 213.0, "grad_norm": 0.4715133607387543, "learning_rate": 9.433521228596237e-06, "loss": 0.0093, "step": 3621 }, { "epoch": 213.05882352941177, "grad_norm": 0.42878398299217224, "learning_rate": 9.428766065699949e-06, "loss": 0.0057, "step": 3622 }, { "epoch": 213.11764705882354, "grad_norm": 1.9800128936767578, "learning_rate": 9.424011032387973e-06, "loss": 0.028, "step": 3623 }, { "epoch": 213.1764705882353, "grad_norm": 0.47418320178985596, "learning_rate": 9.419256129738996e-06, "loss": 0.005, "step": 3624 }, { "epoch": 213.23529411764707, "grad_norm": 0.6176667213439941, "learning_rate": 9.414501358831663e-06, "loss": 0.0073, "step": 3625 }, { "epoch": 213.2941176470588, "grad_norm": 0.997819721698761, "learning_rate": 9.409746720744588e-06, "loss": 0.0056, "step": 3626 }, { "epoch": 213.35294117647058, "grad_norm": 0.47863221168518066, "learning_rate": 9.40499221655637e-06, "loss": 0.0077, "step": 3627 }, { "epoch": 213.41176470588235, "grad_norm": 1.0540419816970825, "learning_rate": 9.400237847345558e-06, "loss": 0.0097, "step": 3628 }, { "epoch": 213.47058823529412, "grad_norm": 0.4904393255710602, "learning_rate": 9.395483614190685e-06, "loss": 0.0061, "step": 3629 }, { "epoch": 213.52941176470588, "grad_norm": 0.595282793045044, "learning_rate": 9.390729518170246e-06, "loss": 0.0115, "step": 3630 }, { "epoch": 213.58823529411765, "grad_norm": 0.49039730429649353, "learning_rate": 9.38597556036271e-06, "loss": 0.0066, "step": 3631 }, { "epoch": 213.64705882352942, "grad_norm": 2.538259983062744, "learning_rate": 9.381221741846505e-06, "loss": 0.0087, "step": 3632 }, { "epoch": 213.7058823529412, "grad_norm": 0.42396873235702515, "learning_rate": 9.376468063700042e-06, "loss": 0.0073, "step": 3633 }, { "epoch": 213.76470588235293, "grad_norm": 0.5029367208480835, "learning_rate": 9.371714527001686e-06, "loss": 0.011, "step": 3634 }, { "epoch": 213.8235294117647, "grad_norm": 0.33892813324928284, "learning_rate": 9.366961132829778e-06, "loss": 0.0048, "step": 3635 }, { "epoch": 213.88235294117646, "grad_norm": 3.0572409629821777, "learning_rate": 9.362207882262622e-06, "loss": 0.0113, "step": 3636 }, { "epoch": 213.94117647058823, "grad_norm": 0.8606774806976318, "learning_rate": 9.3574547763785e-06, "loss": 0.009, "step": 3637 }, { "epoch": 214.0, "grad_norm": 0.5526056289672852, "learning_rate": 9.352701816255643e-06, "loss": 0.0105, "step": 3638 }, { "epoch": 214.05882352941177, "grad_norm": 0.5380684733390808, "learning_rate": 9.347949002972265e-06, "loss": 0.0075, "step": 3639 }, { "epoch": 214.11764705882354, "grad_norm": 0.7757055163383484, "learning_rate": 9.343196337606541e-06, "loss": 0.0067, "step": 3640 }, { "epoch": 214.1764705882353, "grad_norm": 0.5229249596595764, "learning_rate": 9.33844382123661e-06, "loss": 0.0068, "step": 3641 }, { "epoch": 214.23529411764707, "grad_norm": 0.34470587968826294, "learning_rate": 9.333691454940582e-06, "loss": 0.0082, "step": 3642 }, { "epoch": 214.2941176470588, "grad_norm": 1.605133056640625, "learning_rate": 9.328939239796525e-06, "loss": 0.0096, "step": 3643 }, { "epoch": 214.35294117647058, "grad_norm": 3.2599892616271973, "learning_rate": 9.324187176882486e-06, "loss": 0.0184, "step": 3644 }, { "epoch": 214.41176470588235, "grad_norm": 0.9596477746963501, "learning_rate": 9.319435267276462e-06, "loss": 0.0102, "step": 3645 }, { "epoch": 214.47058823529412, "grad_norm": 0.6091804504394531, "learning_rate": 9.314683512056432e-06, "loss": 0.0083, "step": 3646 }, { "epoch": 214.52941176470588, "grad_norm": 0.9157789349555969, "learning_rate": 9.309931912300322e-06, "loss": 0.0105, "step": 3647 }, { "epoch": 214.58823529411765, "grad_norm": 2.040769577026367, "learning_rate": 9.305180469086035e-06, "loss": 0.0154, "step": 3648 }, { "epoch": 214.64705882352942, "grad_norm": 1.1065703630447388, "learning_rate": 9.300429183491435e-06, "loss": 0.0166, "step": 3649 }, { "epoch": 214.7058823529412, "grad_norm": 0.3025611340999603, "learning_rate": 9.295678056594354e-06, "loss": 0.004, "step": 3650 }, { "epoch": 214.76470588235293, "grad_norm": 0.8256703615188599, "learning_rate": 9.290927089472578e-06, "loss": 0.0107, "step": 3651 }, { "epoch": 214.8235294117647, "grad_norm": 1.1335759162902832, "learning_rate": 9.286176283203864e-06, "loss": 0.0106, "step": 3652 }, { "epoch": 214.88235294117646, "grad_norm": 0.5361834764480591, "learning_rate": 9.281425638865936e-06, "loss": 0.0091, "step": 3653 }, { "epoch": 214.94117647058823, "grad_norm": 0.540104866027832, "learning_rate": 9.276675157536472e-06, "loss": 0.0094, "step": 3654 }, { "epoch": 215.0, "grad_norm": 0.6611533761024475, "learning_rate": 9.27192484029312e-06, "loss": 0.0068, "step": 3655 }, { "epoch": 215.05882352941177, "grad_norm": 0.6451895833015442, "learning_rate": 9.267174688213488e-06, "loss": 0.0068, "step": 3656 }, { "epoch": 215.11764705882354, "grad_norm": 0.5445311665534973, "learning_rate": 9.26242470237515e-06, "loss": 0.0043, "step": 3657 }, { "epoch": 215.1764705882353, "grad_norm": 0.6780261993408203, "learning_rate": 9.257674883855635e-06, "loss": 0.0108, "step": 3658 }, { "epoch": 215.23529411764707, "grad_norm": 1.1202576160430908, "learning_rate": 9.252925233732444e-06, "loss": 0.0168, "step": 3659 }, { "epoch": 215.2941176470588, "grad_norm": 0.6718978881835938, "learning_rate": 9.248175753083026e-06, "loss": 0.0046, "step": 3660 }, { "epoch": 215.35294117647058, "grad_norm": 1.1033532619476318, "learning_rate": 9.243426442984809e-06, "loss": 0.0106, "step": 3661 }, { "epoch": 215.41176470588235, "grad_norm": 0.672132670879364, "learning_rate": 9.238677304515165e-06, "loss": 0.0078, "step": 3662 }, { "epoch": 215.47058823529412, "grad_norm": 1.4003657102584839, "learning_rate": 9.233928338751443e-06, "loss": 0.0142, "step": 3663 }, { "epoch": 215.52941176470588, "grad_norm": 0.4877281188964844, "learning_rate": 9.22917954677094e-06, "loss": 0.0069, "step": 3664 }, { "epoch": 215.58823529411765, "grad_norm": 0.3864426016807556, "learning_rate": 9.224430929650918e-06, "loss": 0.0056, "step": 3665 }, { "epoch": 215.64705882352942, "grad_norm": 1.1421433687210083, "learning_rate": 9.219682488468605e-06, "loss": 0.0143, "step": 3666 }, { "epoch": 215.7058823529412, "grad_norm": 0.5724522471427917, "learning_rate": 9.214934224301177e-06, "loss": 0.0067, "step": 3667 }, { "epoch": 215.76470588235293, "grad_norm": 0.41496583819389343, "learning_rate": 9.210186138225784e-06, "loss": 0.0057, "step": 3668 }, { "epoch": 215.8235294117647, "grad_norm": 0.7260374426841736, "learning_rate": 9.205438231319523e-06, "loss": 0.0099, "step": 3669 }, { "epoch": 215.88235294117646, "grad_norm": 0.6201553344726562, "learning_rate": 9.20069050465946e-06, "loss": 0.0164, "step": 3670 }, { "epoch": 215.94117647058823, "grad_norm": 0.6920731067657471, "learning_rate": 9.195942959322609e-06, "loss": 0.0105, "step": 3671 }, { "epoch": 216.0, "grad_norm": 1.0746402740478516, "learning_rate": 9.19119559638596e-06, "loss": 0.0082, "step": 3672 }, { "epoch": 216.05882352941177, "grad_norm": 0.24610617756843567, "learning_rate": 9.186448416926442e-06, "loss": 0.0039, "step": 3673 }, { "epoch": 216.11764705882354, "grad_norm": 0.46229612827301025, "learning_rate": 9.18170142202096e-06, "loss": 0.0093, "step": 3674 }, { "epoch": 216.1764705882353, "grad_norm": 0.42822250723838806, "learning_rate": 9.176954612746361e-06, "loss": 0.005, "step": 3675 }, { "epoch": 216.23529411764707, "grad_norm": 0.3092705011367798, "learning_rate": 9.172207990179466e-06, "loss": 0.0053, "step": 3676 }, { "epoch": 216.2941176470588, "grad_norm": 1.1199593544006348, "learning_rate": 9.16746155539704e-06, "loss": 0.0099, "step": 3677 }, { "epoch": 216.35294117647058, "grad_norm": 0.6092677116394043, "learning_rate": 9.16271530947581e-06, "loss": 0.0093, "step": 3678 }, { "epoch": 216.41176470588235, "grad_norm": 0.7856438159942627, "learning_rate": 9.157969253492466e-06, "loss": 0.0056, "step": 3679 }, { "epoch": 216.47058823529412, "grad_norm": 3.3190433979034424, "learning_rate": 9.153223388523644e-06, "loss": 0.0089, "step": 3680 }, { "epoch": 216.52941176470588, "grad_norm": 0.4092002511024475, "learning_rate": 9.148477715645948e-06, "loss": 0.0087, "step": 3681 }, { "epoch": 216.58823529411765, "grad_norm": 0.8120889663696289, "learning_rate": 9.14373223593593e-06, "loss": 0.015, "step": 3682 }, { "epoch": 216.64705882352942, "grad_norm": 0.7531610131263733, "learning_rate": 9.138986950470104e-06, "loss": 0.0099, "step": 3683 }, { "epoch": 216.7058823529412, "grad_norm": 1.1476078033447266, "learning_rate": 9.134241860324932e-06, "loss": 0.0128, "step": 3684 }, { "epoch": 216.76470588235293, "grad_norm": 0.7796741127967834, "learning_rate": 9.129496966576842e-06, "loss": 0.0151, "step": 3685 }, { "epoch": 216.8235294117647, "grad_norm": 0.5102100968360901, "learning_rate": 9.124752270302208e-06, "loss": 0.0077, "step": 3686 }, { "epoch": 216.88235294117646, "grad_norm": 0.3234547972679138, "learning_rate": 9.120007772577369e-06, "loss": 0.0053, "step": 3687 }, { "epoch": 216.94117647058823, "grad_norm": 0.5076788663864136, "learning_rate": 9.115263474478608e-06, "loss": 0.0075, "step": 3688 }, { "epoch": 217.0, "grad_norm": 0.3291344940662384, "learning_rate": 9.110519377082174e-06, "loss": 0.0041, "step": 3689 }, { "epoch": 217.05882352941177, "grad_norm": 0.7282571196556091, "learning_rate": 9.10577548146426e-06, "loss": 0.0111, "step": 3690 }, { "epoch": 217.11764705882354, "grad_norm": 0.5266460180282593, "learning_rate": 9.101031788701015e-06, "loss": 0.0058, "step": 3691 }, { "epoch": 217.1764705882353, "grad_norm": 1.0153146982192993, "learning_rate": 9.096288299868552e-06, "loss": 0.0064, "step": 3692 }, { "epoch": 217.23529411764707, "grad_norm": 0.5586175918579102, "learning_rate": 9.091545016042927e-06, "loss": 0.009, "step": 3693 }, { "epoch": 217.2941176470588, "grad_norm": 0.6655679941177368, "learning_rate": 9.086801938300155e-06, "loss": 0.0079, "step": 3694 }, { "epoch": 217.35294117647058, "grad_norm": 0.7130745053291321, "learning_rate": 9.082059067716198e-06, "loss": 0.0087, "step": 3695 }, { "epoch": 217.41176470588235, "grad_norm": 2.652920961380005, "learning_rate": 9.07731640536698e-06, "loss": 0.013, "step": 3696 }, { "epoch": 217.47058823529412, "grad_norm": 0.5974937677383423, "learning_rate": 9.072573952328372e-06, "loss": 0.0095, "step": 3697 }, { "epoch": 217.52941176470588, "grad_norm": 0.933212399482727, "learning_rate": 9.067831709676199e-06, "loss": 0.0109, "step": 3698 }, { "epoch": 217.58823529411765, "grad_norm": 0.7574440240859985, "learning_rate": 9.063089678486235e-06, "loss": 0.0071, "step": 3699 }, { "epoch": 217.64705882352942, "grad_norm": 0.37310266494750977, "learning_rate": 9.058347859834212e-06, "loss": 0.0042, "step": 3700 }, { "epoch": 217.7058823529412, "grad_norm": 0.7731842398643494, "learning_rate": 9.053606254795807e-06, "loss": 0.0086, "step": 3701 }, { "epoch": 217.76470588235293, "grad_norm": 0.47079259157180786, "learning_rate": 9.048864864446658e-06, "loss": 0.0083, "step": 3702 }, { "epoch": 217.8235294117647, "grad_norm": 0.6576310396194458, "learning_rate": 9.044123689862343e-06, "loss": 0.0062, "step": 3703 }, { "epoch": 217.88235294117646, "grad_norm": 0.8155561089515686, "learning_rate": 9.0393827321184e-06, "loss": 0.012, "step": 3704 }, { "epoch": 217.94117647058823, "grad_norm": 0.6164320707321167, "learning_rate": 9.034641992290314e-06, "loss": 0.0118, "step": 3705 }, { "epoch": 218.0, "grad_norm": 0.610304057598114, "learning_rate": 9.02990147145352e-06, "loss": 0.0075, "step": 3706 }, { "epoch": 218.05882352941177, "grad_norm": 0.39975452423095703, "learning_rate": 9.025161170683406e-06, "loss": 0.0064, "step": 3707 }, { "epoch": 218.11764705882354, "grad_norm": 0.7454547882080078, "learning_rate": 9.020421091055305e-06, "loss": 0.0156, "step": 3708 }, { "epoch": 218.1764705882353, "grad_norm": 0.7669509649276733, "learning_rate": 9.015681233644506e-06, "loss": 0.0082, "step": 3709 }, { "epoch": 218.23529411764707, "grad_norm": 0.6704811453819275, "learning_rate": 9.010941599526243e-06, "loss": 0.0097, "step": 3710 }, { "epoch": 218.2941176470588, "grad_norm": 0.6298221349716187, "learning_rate": 9.006202189775706e-06, "loss": 0.0098, "step": 3711 }, { "epoch": 218.35294117647058, "grad_norm": 0.5401356220245361, "learning_rate": 9.001463005468022e-06, "loss": 0.0111, "step": 3712 }, { "epoch": 218.41176470588235, "grad_norm": 0.5124880075454712, "learning_rate": 8.99672404767828e-06, "loss": 0.0054, "step": 3713 }, { "epoch": 218.47058823529412, "grad_norm": 0.5425264835357666, "learning_rate": 8.991985317481507e-06, "loss": 0.0119, "step": 3714 }, { "epoch": 218.52941176470588, "grad_norm": 0.24000342190265656, "learning_rate": 8.987246815952688e-06, "loss": 0.0036, "step": 3715 }, { "epoch": 218.58823529411765, "grad_norm": 0.4758017063140869, "learning_rate": 8.982508544166747e-06, "loss": 0.0059, "step": 3716 }, { "epoch": 218.64705882352942, "grad_norm": 0.2757665514945984, "learning_rate": 8.977770503198562e-06, "loss": 0.0031, "step": 3717 }, { "epoch": 218.7058823529412, "grad_norm": 0.4153424799442291, "learning_rate": 8.97303269412296e-06, "loss": 0.0099, "step": 3718 }, { "epoch": 218.76470588235293, "grad_norm": 0.6476728320121765, "learning_rate": 8.968295118014703e-06, "loss": 0.0058, "step": 3719 }, { "epoch": 218.8235294117647, "grad_norm": 0.9608826637268066, "learning_rate": 8.96355777594852e-06, "loss": 0.0101, "step": 3720 }, { "epoch": 218.88235294117646, "grad_norm": 0.5716763138771057, "learning_rate": 8.958820668999066e-06, "loss": 0.0053, "step": 3721 }, { "epoch": 218.94117647058823, "grad_norm": 0.47232937812805176, "learning_rate": 8.954083798240964e-06, "loss": 0.0073, "step": 3722 }, { "epoch": 219.0, "grad_norm": 0.8510122895240784, "learning_rate": 8.949347164748761e-06, "loss": 0.0081, "step": 3723 }, { "epoch": 219.05882352941177, "grad_norm": 0.7598204016685486, "learning_rate": 8.944610769596971e-06, "loss": 0.0134, "step": 3724 }, { "epoch": 219.11764705882354, "grad_norm": 0.6115275025367737, "learning_rate": 8.939874613860038e-06, "loss": 0.0093, "step": 3725 }, { "epoch": 219.1764705882353, "grad_norm": 0.8773203492164612, "learning_rate": 8.935138698612362e-06, "loss": 0.0108, "step": 3726 }, { "epoch": 219.23529411764707, "grad_norm": 0.6003968119621277, "learning_rate": 8.93040302492828e-06, "loss": 0.0124, "step": 3727 }, { "epoch": 219.2941176470588, "grad_norm": 0.519257664680481, "learning_rate": 8.925667593882085e-06, "loss": 0.0059, "step": 3728 }, { "epoch": 219.35294117647058, "grad_norm": 0.2791740298271179, "learning_rate": 8.920932406548004e-06, "loss": 0.0047, "step": 3729 }, { "epoch": 219.41176470588235, "grad_norm": 0.2034168392419815, "learning_rate": 8.916197464000215e-06, "loss": 0.0039, "step": 3730 }, { "epoch": 219.47058823529412, "grad_norm": 0.28545671701431274, "learning_rate": 8.91146276731284e-06, "loss": 0.0045, "step": 3731 }, { "epoch": 219.52941176470588, "grad_norm": 0.5624377131462097, "learning_rate": 8.906728317559939e-06, "loss": 0.007, "step": 3732 }, { "epoch": 219.58823529411765, "grad_norm": 0.2238883376121521, "learning_rate": 8.901994115815527e-06, "loss": 0.0031, "step": 3733 }, { "epoch": 219.64705882352942, "grad_norm": 0.7751594185829163, "learning_rate": 8.897260163153554e-06, "loss": 0.0122, "step": 3734 }, { "epoch": 219.7058823529412, "grad_norm": 0.6111215353012085, "learning_rate": 8.892526460647919e-06, "loss": 0.0109, "step": 3735 }, { "epoch": 219.76470588235293, "grad_norm": 0.5052698850631714, "learning_rate": 8.887793009372456e-06, "loss": 0.0084, "step": 3736 }, { "epoch": 219.8235294117647, "grad_norm": 0.4054105281829834, "learning_rate": 8.883059810400955e-06, "loss": 0.0092, "step": 3737 }, { "epoch": 219.88235294117646, "grad_norm": 0.39825063943862915, "learning_rate": 8.878326864807135e-06, "loss": 0.0069, "step": 3738 }, { "epoch": 219.94117647058823, "grad_norm": 0.33057838678359985, "learning_rate": 8.873594173664666e-06, "loss": 0.0049, "step": 3739 }, { "epoch": 220.0, "grad_norm": 0.471405953168869, "learning_rate": 8.868861738047158e-06, "loss": 0.0078, "step": 3740 }, { "epoch": 220.05882352941177, "grad_norm": 0.49493882060050964, "learning_rate": 8.864129559028167e-06, "loss": 0.0084, "step": 3741 }, { "epoch": 220.11764705882354, "grad_norm": 0.5402039289474487, "learning_rate": 8.859397637681179e-06, "loss": 0.0088, "step": 3742 }, { "epoch": 220.1764705882353, "grad_norm": 0.3671208322048187, "learning_rate": 8.854665975079639e-06, "loss": 0.0057, "step": 3743 }, { "epoch": 220.23529411764707, "grad_norm": 0.33038410544395447, "learning_rate": 8.849934572296916e-06, "loss": 0.0043, "step": 3744 }, { "epoch": 220.2941176470588, "grad_norm": 0.33165398240089417, "learning_rate": 8.84520343040633e-06, "loss": 0.005, "step": 3745 }, { "epoch": 220.35294117647058, "grad_norm": 0.8988210558891296, "learning_rate": 8.840472550481144e-06, "loss": 0.0119, "step": 3746 }, { "epoch": 220.41176470588235, "grad_norm": 0.8461763262748718, "learning_rate": 8.835741933594549e-06, "loss": 0.0083, "step": 3747 }, { "epoch": 220.47058823529412, "grad_norm": 1.0007860660552979, "learning_rate": 8.831011580819693e-06, "loss": 0.0061, "step": 3748 }, { "epoch": 220.52941176470588, "grad_norm": 0.6597498655319214, "learning_rate": 8.826281493229649e-06, "loss": 0.0129, "step": 3749 }, { "epoch": 220.58823529411765, "grad_norm": 0.6247397065162659, "learning_rate": 8.821551671897443e-06, "loss": 0.009, "step": 3750 }, { "epoch": 220.64705882352942, "grad_norm": 0.3971775472164154, "learning_rate": 8.81682211789603e-06, "loss": 0.0058, "step": 3751 }, { "epoch": 220.7058823529412, "grad_norm": 0.41647443175315857, "learning_rate": 8.812092832298307e-06, "loss": 0.0051, "step": 3752 }, { "epoch": 220.76470588235293, "grad_norm": 0.5306386351585388, "learning_rate": 8.807363816177114e-06, "loss": 0.0055, "step": 3753 }, { "epoch": 220.8235294117647, "grad_norm": 1.07358717918396, "learning_rate": 8.80263507060523e-06, "loss": 0.0094, "step": 3754 }, { "epoch": 220.88235294117646, "grad_norm": 0.40306368470191956, "learning_rate": 8.797906596655365e-06, "loss": 0.0069, "step": 3755 }, { "epoch": 220.94117647058823, "grad_norm": 0.9598408937454224, "learning_rate": 8.793178395400178e-06, "loss": 0.0064, "step": 3756 }, { "epoch": 221.0, "grad_norm": 0.5883159637451172, "learning_rate": 8.788450467912254e-06, "loss": 0.0098, "step": 3757 }, { "epoch": 221.05882352941177, "grad_norm": 0.4935269057750702, "learning_rate": 8.783722815264127e-06, "loss": 0.0125, "step": 3758 }, { "epoch": 221.11764705882354, "grad_norm": 0.4206066429615021, "learning_rate": 8.778995438528264e-06, "loss": 0.0094, "step": 3759 }, { "epoch": 221.1764705882353, "grad_norm": 0.9588775634765625, "learning_rate": 8.774268338777067e-06, "loss": 0.0057, "step": 3760 }, { "epoch": 221.23529411764707, "grad_norm": 0.4774058163166046, "learning_rate": 8.76954151708288e-06, "loss": 0.0087, "step": 3761 }, { "epoch": 221.2941176470588, "grad_norm": 0.3848017752170563, "learning_rate": 8.764814974517982e-06, "loss": 0.0073, "step": 3762 }, { "epoch": 221.35294117647058, "grad_norm": 0.3484910726547241, "learning_rate": 8.760088712154587e-06, "loss": 0.0044, "step": 3763 }, { "epoch": 221.41176470588235, "grad_norm": 0.48483195900917053, "learning_rate": 8.755362731064845e-06, "loss": 0.011, "step": 3764 }, { "epoch": 221.47058823529412, "grad_norm": 0.7278291583061218, "learning_rate": 8.75063703232085e-06, "loss": 0.007, "step": 3765 }, { "epoch": 221.52941176470588, "grad_norm": 0.5828948020935059, "learning_rate": 8.745911616994618e-06, "loss": 0.0099, "step": 3766 }, { "epoch": 221.58823529411765, "grad_norm": 0.43428200483322144, "learning_rate": 8.741186486158118e-06, "loss": 0.0072, "step": 3767 }, { "epoch": 221.64705882352942, "grad_norm": 0.33534523844718933, "learning_rate": 8.736461640883234e-06, "loss": 0.0069, "step": 3768 }, { "epoch": 221.7058823529412, "grad_norm": 0.5098663568496704, "learning_rate": 8.731737082241806e-06, "loss": 0.0061, "step": 3769 }, { "epoch": 221.76470588235293, "grad_norm": 0.2691812515258789, "learning_rate": 8.727012811305596e-06, "loss": 0.0062, "step": 3770 }, { "epoch": 221.8235294117647, "grad_norm": 0.46133190393447876, "learning_rate": 8.7222888291463e-06, "loss": 0.0079, "step": 3771 }, { "epoch": 221.88235294117646, "grad_norm": 0.3925269544124603, "learning_rate": 8.717565136835559e-06, "loss": 0.0043, "step": 3772 }, { "epoch": 221.94117647058823, "grad_norm": 0.299628883600235, "learning_rate": 8.712841735444936e-06, "loss": 0.0039, "step": 3773 }, { "epoch": 222.0, "grad_norm": 0.35647445917129517, "learning_rate": 8.708118626045939e-06, "loss": 0.0063, "step": 3774 }, { "epoch": 222.05882352941177, "grad_norm": 0.42691275477409363, "learning_rate": 8.70339580971e-06, "loss": 0.005, "step": 3775 }, { "epoch": 222.11764705882354, "grad_norm": 0.3536698520183563, "learning_rate": 8.698673287508493e-06, "loss": 0.0073, "step": 3776 }, { "epoch": 222.1764705882353, "grad_norm": 0.464478462934494, "learning_rate": 8.693951060512718e-06, "loss": 0.0097, "step": 3777 }, { "epoch": 222.23529411764707, "grad_norm": 0.4560844600200653, "learning_rate": 8.689229129793914e-06, "loss": 0.0061, "step": 3778 }, { "epoch": 222.2941176470588, "grad_norm": 0.48446956276893616, "learning_rate": 8.684507496423246e-06, "loss": 0.0114, "step": 3779 }, { "epoch": 222.35294117647058, "grad_norm": 0.6555633544921875, "learning_rate": 8.679786161471821e-06, "loss": 0.0138, "step": 3780 }, { "epoch": 222.41176470588235, "grad_norm": 0.31225091218948364, "learning_rate": 8.67506512601067e-06, "loss": 0.0067, "step": 3781 }, { "epoch": 222.47058823529412, "grad_norm": 0.23314917087554932, "learning_rate": 8.67034439111076e-06, "loss": 0.0038, "step": 3782 }, { "epoch": 222.52941176470588, "grad_norm": 0.582762598991394, "learning_rate": 8.665623957842988e-06, "loss": 0.0097, "step": 3783 }, { "epoch": 222.58823529411765, "grad_norm": 0.3478120267391205, "learning_rate": 8.660903827278184e-06, "loss": 0.0047, "step": 3784 }, { "epoch": 222.64705882352942, "grad_norm": 0.7796268463134766, "learning_rate": 8.65618400048711e-06, "loss": 0.006, "step": 3785 }, { "epoch": 222.7058823529412, "grad_norm": 0.7971413731575012, "learning_rate": 8.651464478540454e-06, "loss": 0.0064, "step": 3786 }, { "epoch": 222.76470588235293, "grad_norm": 0.6930517554283142, "learning_rate": 8.646745262508844e-06, "loss": 0.0069, "step": 3787 }, { "epoch": 222.8235294117647, "grad_norm": 0.439034640789032, "learning_rate": 8.642026353462828e-06, "loss": 0.011, "step": 3788 }, { "epoch": 222.88235294117646, "grad_norm": 0.3164282441139221, "learning_rate": 8.637307752472895e-06, "loss": 0.0041, "step": 3789 }, { "epoch": 222.94117647058823, "grad_norm": 0.6952687501907349, "learning_rate": 8.632589460609453e-06, "loss": 0.0055, "step": 3790 }, { "epoch": 223.0, "grad_norm": 0.4409971833229065, "learning_rate": 8.62787147894285e-06, "loss": 0.0057, "step": 3791 }, { "epoch": 223.05882352941177, "grad_norm": 3.954468011856079, "learning_rate": 8.623153808543359e-06, "loss": 0.0104, "step": 3792 }, { "epoch": 223.11764705882354, "grad_norm": 0.4339049160480499, "learning_rate": 8.618436450481182e-06, "loss": 0.0075, "step": 3793 }, { "epoch": 223.1764705882353, "grad_norm": 0.8380426168441772, "learning_rate": 8.613719405826447e-06, "loss": 0.0063, "step": 3794 }, { "epoch": 223.23529411764707, "grad_norm": 0.3270447552204132, "learning_rate": 8.609002675649223e-06, "loss": 0.0048, "step": 3795 }, { "epoch": 223.2941176470588, "grad_norm": 0.43155109882354736, "learning_rate": 8.604286261019491e-06, "loss": 0.0087, "step": 3796 }, { "epoch": 223.35294117647058, "grad_norm": 0.5294931530952454, "learning_rate": 8.599570163007175e-06, "loss": 0.0068, "step": 3797 }, { "epoch": 223.41176470588235, "grad_norm": 0.5053173899650574, "learning_rate": 8.59485438268212e-06, "loss": 0.0141, "step": 3798 }, { "epoch": 223.47058823529412, "grad_norm": 0.4914010465145111, "learning_rate": 8.590138921114095e-06, "loss": 0.008, "step": 3799 }, { "epoch": 223.52941176470588, "grad_norm": 0.2799346148967743, "learning_rate": 8.585423779372806e-06, "loss": 0.0046, "step": 3800 }, { "epoch": 223.58823529411765, "grad_norm": 0.29831168055534363, "learning_rate": 8.580708958527881e-06, "loss": 0.0043, "step": 3801 }, { "epoch": 223.64705882352942, "grad_norm": 0.3538876175880432, "learning_rate": 8.575994459648877e-06, "loss": 0.0079, "step": 3802 }, { "epoch": 223.7058823529412, "grad_norm": 1.0335819721221924, "learning_rate": 8.571280283805276e-06, "loss": 0.0181, "step": 3803 }, { "epoch": 223.76470588235293, "grad_norm": 0.731913149356842, "learning_rate": 8.566566432066491e-06, "loss": 0.0112, "step": 3804 }, { "epoch": 223.8235294117647, "grad_norm": 0.4541603624820709, "learning_rate": 8.561852905501853e-06, "loss": 0.0055, "step": 3805 }, { "epoch": 223.88235294117646, "grad_norm": 0.24044480919837952, "learning_rate": 8.55713970518063e-06, "loss": 0.0038, "step": 3806 }, { "epoch": 223.94117647058823, "grad_norm": 1.0406794548034668, "learning_rate": 8.552426832172006e-06, "loss": 0.0064, "step": 3807 }, { "epoch": 224.0, "grad_norm": 7.344024181365967, "learning_rate": 8.5477142875451e-06, "loss": 0.0152, "step": 3808 }, { "epoch": 224.05882352941177, "grad_norm": 0.824496328830719, "learning_rate": 8.543002072368948e-06, "loss": 0.0078, "step": 3809 }, { "epoch": 224.11764705882354, "grad_norm": 0.6503702402114868, "learning_rate": 8.538290187712518e-06, "loss": 0.0097, "step": 3810 }, { "epoch": 224.1764705882353, "grad_norm": 0.5702837705612183, "learning_rate": 8.5335786346447e-06, "loss": 0.0085, "step": 3811 }, { "epoch": 224.23529411764707, "grad_norm": 4.4777607917785645, "learning_rate": 8.528867414234302e-06, "loss": 0.0313, "step": 3812 }, { "epoch": 224.2941176470588, "grad_norm": 2.1127898693084717, "learning_rate": 8.524156527550075e-06, "loss": 0.0079, "step": 3813 }, { "epoch": 224.35294117647058, "grad_norm": 0.7909692525863647, "learning_rate": 8.519445975660677e-06, "loss": 0.0107, "step": 3814 }, { "epoch": 224.41176470588235, "grad_norm": 0.25657379627227783, "learning_rate": 8.514735759634694e-06, "loss": 0.0037, "step": 3815 }, { "epoch": 224.47058823529412, "grad_norm": 0.25533294677734375, "learning_rate": 8.510025880540644e-06, "loss": 0.0043, "step": 3816 }, { "epoch": 224.52941176470588, "grad_norm": 0.8342107534408569, "learning_rate": 8.505316339446955e-06, "loss": 0.0071, "step": 3817 }, { "epoch": 224.58823529411765, "grad_norm": 0.755322277545929, "learning_rate": 8.500607137421989e-06, "loss": 0.0103, "step": 3818 }, { "epoch": 224.64705882352942, "grad_norm": 0.39461851119995117, "learning_rate": 8.495898275534026e-06, "loss": 0.0044, "step": 3819 }, { "epoch": 224.7058823529412, "grad_norm": 0.5714526176452637, "learning_rate": 8.491189754851274e-06, "loss": 0.0051, "step": 3820 }, { "epoch": 224.76470588235293, "grad_norm": 1.337615728378296, "learning_rate": 8.486481576441857e-06, "loss": 0.012, "step": 3821 }, { "epoch": 224.8235294117647, "grad_norm": 0.6374750137329102, "learning_rate": 8.481773741373827e-06, "loss": 0.012, "step": 3822 }, { "epoch": 224.88235294117646, "grad_norm": 0.7392492294311523, "learning_rate": 8.477066250715151e-06, "loss": 0.0095, "step": 3823 }, { "epoch": 224.94117647058823, "grad_norm": 0.6298739910125732, "learning_rate": 8.472359105533727e-06, "loss": 0.0091, "step": 3824 }, { "epoch": 225.0, "grad_norm": 0.5613191723823547, "learning_rate": 8.46765230689737e-06, "loss": 0.0069, "step": 3825 }, { "epoch": 225.05882352941177, "grad_norm": 0.4837597608566284, "learning_rate": 8.462945855873811e-06, "loss": 0.0081, "step": 3826 }, { "epoch": 225.11764705882354, "grad_norm": 0.6188231706619263, "learning_rate": 8.458239753530715e-06, "loss": 0.007, "step": 3827 }, { "epoch": 225.1764705882353, "grad_norm": 0.6569743752479553, "learning_rate": 8.453534000935652e-06, "loss": 0.0073, "step": 3828 }, { "epoch": 225.23529411764707, "grad_norm": 0.6702036261558533, "learning_rate": 8.44882859915613e-06, "loss": 0.006, "step": 3829 }, { "epoch": 225.2941176470588, "grad_norm": 0.7594362497329712, "learning_rate": 8.444123549259562e-06, "loss": 0.0081, "step": 3830 }, { "epoch": 225.35294117647058, "grad_norm": 0.7209574580192566, "learning_rate": 8.439418852313293e-06, "loss": 0.0076, "step": 3831 }, { "epoch": 225.41176470588235, "grad_norm": 0.5463789701461792, "learning_rate": 8.434714509384578e-06, "loss": 0.0077, "step": 3832 }, { "epoch": 225.47058823529412, "grad_norm": 0.48054030537605286, "learning_rate": 8.4300105215406e-06, "loss": 0.0102, "step": 3833 }, { "epoch": 225.52941176470588, "grad_norm": 0.5884886384010315, "learning_rate": 8.425306889848454e-06, "loss": 0.0082, "step": 3834 }, { "epoch": 225.58823529411765, "grad_norm": 0.49518540501594543, "learning_rate": 8.42060361537516e-06, "loss": 0.01, "step": 3835 }, { "epoch": 225.64705882352942, "grad_norm": 0.44663578271865845, "learning_rate": 8.415900699187656e-06, "loss": 0.0067, "step": 3836 }, { "epoch": 225.7058823529412, "grad_norm": 0.579125702381134, "learning_rate": 8.411198142352797e-06, "loss": 0.01, "step": 3837 }, { "epoch": 225.76470588235293, "grad_norm": 0.6207137107849121, "learning_rate": 8.406495945937359e-06, "loss": 0.0073, "step": 3838 }, { "epoch": 225.8235294117647, "grad_norm": 0.7957891821861267, "learning_rate": 8.401794111008028e-06, "loss": 0.0081, "step": 3839 }, { "epoch": 225.88235294117646, "grad_norm": 2.118884325027466, "learning_rate": 8.397092638631422e-06, "loss": 0.0134, "step": 3840 }, { "epoch": 225.94117647058823, "grad_norm": 0.5258674621582031, "learning_rate": 8.392391529874064e-06, "loss": 0.0129, "step": 3841 }, { "epoch": 226.0, "grad_norm": 0.5664809346199036, "learning_rate": 8.387690785802403e-06, "loss": 0.0128, "step": 3842 }, { "epoch": 226.05882352941177, "grad_norm": 0.6833524107933044, "learning_rate": 8.382990407482799e-06, "loss": 0.0055, "step": 3843 }, { "epoch": 226.11764705882354, "grad_norm": 0.4303269684314728, "learning_rate": 8.378290395981535e-06, "loss": 0.0077, "step": 3844 }, { "epoch": 226.1764705882353, "grad_norm": 0.9381702542304993, "learning_rate": 8.373590752364804e-06, "loss": 0.0083, "step": 3845 }, { "epoch": 226.23529411764707, "grad_norm": 1.0337282419204712, "learning_rate": 8.368891477698726e-06, "loss": 0.0096, "step": 3846 }, { "epoch": 226.2941176470588, "grad_norm": 0.39054548740386963, "learning_rate": 8.36419257304932e-06, "loss": 0.0071, "step": 3847 }, { "epoch": 226.35294117647058, "grad_norm": 0.36655059456825256, "learning_rate": 8.359494039482544e-06, "loss": 0.0047, "step": 3848 }, { "epoch": 226.41176470588235, "grad_norm": 0.8034558296203613, "learning_rate": 8.35479587806425e-06, "loss": 0.0121, "step": 3849 }, { "epoch": 226.47058823529412, "grad_norm": 0.493578165769577, "learning_rate": 8.35009808986022e-06, "loss": 0.0066, "step": 3850 }, { "epoch": 226.52941176470588, "grad_norm": 0.5218874216079712, "learning_rate": 8.345400675936146e-06, "loss": 0.0096, "step": 3851 }, { "epoch": 226.58823529411765, "grad_norm": 0.5078453421592712, "learning_rate": 8.340703637357633e-06, "loss": 0.009, "step": 3852 }, { "epoch": 226.64705882352942, "grad_norm": 0.35236144065856934, "learning_rate": 8.336006975190205e-06, "loss": 0.0053, "step": 3853 }, { "epoch": 226.7058823529412, "grad_norm": 1.115964651107788, "learning_rate": 8.331310690499299e-06, "loss": 0.0057, "step": 3854 }, { "epoch": 226.76470588235293, "grad_norm": 0.7628450393676758, "learning_rate": 8.326614784350267e-06, "loss": 0.0071, "step": 3855 }, { "epoch": 226.8235294117647, "grad_norm": 0.540336549282074, "learning_rate": 8.321919257808372e-06, "loss": 0.0093, "step": 3856 }, { "epoch": 226.88235294117646, "grad_norm": 0.3753976821899414, "learning_rate": 8.317224111938797e-06, "loss": 0.007, "step": 3857 }, { "epoch": 226.94117647058823, "grad_norm": 0.4288780093193054, "learning_rate": 8.31252934780663e-06, "loss": 0.0062, "step": 3858 }, { "epoch": 227.0, "grad_norm": 0.5786919593811035, "learning_rate": 8.307834966476885e-06, "loss": 0.0108, "step": 3859 }, { "epoch": 227.05882352941177, "grad_norm": 0.49204590916633606, "learning_rate": 8.303140969014473e-06, "loss": 0.0042, "step": 3860 }, { "epoch": 227.11764705882354, "grad_norm": 0.7011065483093262, "learning_rate": 8.298447356484231e-06, "loss": 0.0074, "step": 3861 }, { "epoch": 227.1764705882353, "grad_norm": 0.39451128244400024, "learning_rate": 8.293754129950905e-06, "loss": 0.0033, "step": 3862 }, { "epoch": 227.23529411764707, "grad_norm": 0.2349790334701538, "learning_rate": 8.289061290479151e-06, "loss": 0.0039, "step": 3863 }, { "epoch": 227.2941176470588, "grad_norm": 0.9679757952690125, "learning_rate": 8.284368839133539e-06, "loss": 0.0051, "step": 3864 }, { "epoch": 227.35294117647058, "grad_norm": 0.38130131363868713, "learning_rate": 8.279676776978551e-06, "loss": 0.0071, "step": 3865 }, { "epoch": 227.41176470588235, "grad_norm": 0.41210415959358215, "learning_rate": 8.274985105078581e-06, "loss": 0.0046, "step": 3866 }, { "epoch": 227.47058823529412, "grad_norm": 0.5373477935791016, "learning_rate": 8.27029382449793e-06, "loss": 0.0142, "step": 3867 }, { "epoch": 227.52941176470588, "grad_norm": 0.4420635998249054, "learning_rate": 8.265602936300821e-06, "loss": 0.0077, "step": 3868 }, { "epoch": 227.58823529411765, "grad_norm": 1.015934944152832, "learning_rate": 8.260912441551376e-06, "loss": 0.0083, "step": 3869 }, { "epoch": 227.64705882352942, "grad_norm": 0.4483306109905243, "learning_rate": 8.256222341313635e-06, "loss": 0.0073, "step": 3870 }, { "epoch": 227.7058823529412, "grad_norm": 0.33711424469947815, "learning_rate": 8.251532636651544e-06, "loss": 0.0052, "step": 3871 }, { "epoch": 227.76470588235293, "grad_norm": 0.5023605227470398, "learning_rate": 8.246843328628966e-06, "loss": 0.0093, "step": 3872 }, { "epoch": 227.8235294117647, "grad_norm": 0.5362950563430786, "learning_rate": 8.242154418309665e-06, "loss": 0.0099, "step": 3873 }, { "epoch": 227.88235294117646, "grad_norm": 0.7037230134010315, "learning_rate": 8.237465906757323e-06, "loss": 0.0097, "step": 3874 }, { "epoch": 227.94117647058823, "grad_norm": 0.5699377655982971, "learning_rate": 8.232777795035526e-06, "loss": 0.0108, "step": 3875 }, { "epoch": 228.0, "grad_norm": 0.3360869586467743, "learning_rate": 8.228090084207773e-06, "loss": 0.0062, "step": 3876 }, { "epoch": 228.05882352941177, "grad_norm": 0.9226741790771484, "learning_rate": 8.223402775337468e-06, "loss": 0.0058, "step": 3877 }, { "epoch": 228.11764705882354, "grad_norm": 0.7655670046806335, "learning_rate": 8.21871586948793e-06, "loss": 0.0081, "step": 3878 }, { "epoch": 228.1764705882353, "grad_norm": 0.4117063879966736, "learning_rate": 8.214029367722378e-06, "loss": 0.0076, "step": 3879 }, { "epoch": 228.23529411764707, "grad_norm": 0.6770129203796387, "learning_rate": 8.209343271103947e-06, "loss": 0.0079, "step": 3880 }, { "epoch": 228.2941176470588, "grad_norm": 0.801058828830719, "learning_rate": 8.204657580695679e-06, "loss": 0.0086, "step": 3881 }, { "epoch": 228.35294117647058, "grad_norm": 0.4482906758785248, "learning_rate": 8.199972297560516e-06, "loss": 0.0071, "step": 3882 }, { "epoch": 228.41176470588235, "grad_norm": 0.4410945773124695, "learning_rate": 8.195287422761319e-06, "loss": 0.0088, "step": 3883 }, { "epoch": 228.47058823529412, "grad_norm": 0.6792762875556946, "learning_rate": 8.190602957360848e-06, "loss": 0.0097, "step": 3884 }, { "epoch": 228.52941176470588, "grad_norm": 0.8171510100364685, "learning_rate": 8.185918902421777e-06, "loss": 0.0108, "step": 3885 }, { "epoch": 228.58823529411765, "grad_norm": 0.6338450908660889, "learning_rate": 8.181235259006676e-06, "loss": 0.0041, "step": 3886 }, { "epoch": 228.64705882352942, "grad_norm": 0.596650242805481, "learning_rate": 8.176552028178038e-06, "loss": 0.0061, "step": 3887 }, { "epoch": 228.7058823529412, "grad_norm": 0.5713253021240234, "learning_rate": 8.171869210998244e-06, "loss": 0.011, "step": 3888 }, { "epoch": 228.76470588235293, "grad_norm": 0.4714401364326477, "learning_rate": 8.1671868085296e-06, "loss": 0.006, "step": 3889 }, { "epoch": 228.8235294117647, "grad_norm": 0.48883405327796936, "learning_rate": 8.162504821834296e-06, "loss": 0.0072, "step": 3890 }, { "epoch": 228.88235294117646, "grad_norm": 0.3425431549549103, "learning_rate": 8.157823251974451e-06, "loss": 0.0071, "step": 3891 }, { "epoch": 228.94117647058823, "grad_norm": 0.33643361926078796, "learning_rate": 8.153142100012073e-06, "loss": 0.0054, "step": 3892 }, { "epoch": 229.0, "grad_norm": 0.2827605903148651, "learning_rate": 8.148461367009081e-06, "loss": 0.0051, "step": 3893 }, { "epoch": 229.05882352941177, "grad_norm": 0.8216330409049988, "learning_rate": 8.143781054027298e-06, "loss": 0.0148, "step": 3894 }, { "epoch": 229.11764705882354, "grad_norm": 0.3967388868331909, "learning_rate": 8.139101162128453e-06, "loss": 0.0086, "step": 3895 }, { "epoch": 229.1764705882353, "grad_norm": 0.20511308312416077, "learning_rate": 8.134421692374181e-06, "loss": 0.0036, "step": 3896 }, { "epoch": 229.23529411764707, "grad_norm": 0.5773098468780518, "learning_rate": 8.129742645826013e-06, "loss": 0.0122, "step": 3897 }, { "epoch": 229.2941176470588, "grad_norm": 0.5655599236488342, "learning_rate": 8.125064023545394e-06, "loss": 0.0083, "step": 3898 }, { "epoch": 229.35294117647058, "grad_norm": 0.398944616317749, "learning_rate": 8.120385826593668e-06, "loss": 0.0037, "step": 3899 }, { "epoch": 229.41176470588235, "grad_norm": 0.7326630353927612, "learning_rate": 8.115708056032086e-06, "loss": 0.0121, "step": 3900 }, { "epoch": 229.47058823529412, "grad_norm": 0.6311984062194824, "learning_rate": 8.111030712921792e-06, "loss": 0.0085, "step": 3901 }, { "epoch": 229.52941176470588, "grad_norm": 0.6214360594749451, "learning_rate": 8.106353798323849e-06, "loss": 0.0047, "step": 3902 }, { "epoch": 229.58823529411765, "grad_norm": 0.5248537659645081, "learning_rate": 8.101677313299206e-06, "loss": 0.0078, "step": 3903 }, { "epoch": 229.64705882352942, "grad_norm": 0.4119224548339844, "learning_rate": 8.09700125890873e-06, "loss": 0.0035, "step": 3904 }, { "epoch": 229.7058823529412, "grad_norm": 0.36556026339530945, "learning_rate": 8.092325636213181e-06, "loss": 0.0059, "step": 3905 }, { "epoch": 229.76470588235293, "grad_norm": 0.20849117636680603, "learning_rate": 8.087650446273219e-06, "loss": 0.0036, "step": 3906 }, { "epoch": 229.8235294117647, "grad_norm": 0.6259618997573853, "learning_rate": 8.082975690149415e-06, "loss": 0.0058, "step": 3907 }, { "epoch": 229.88235294117646, "grad_norm": 0.563197135925293, "learning_rate": 8.078301368902233e-06, "loss": 0.0082, "step": 3908 }, { "epoch": 229.94117647058823, "grad_norm": 0.5682503581047058, "learning_rate": 8.073627483592049e-06, "loss": 0.007, "step": 3909 }, { "epoch": 230.0, "grad_norm": 0.3051157295703888, "learning_rate": 8.068954035279121e-06, "loss": 0.0034, "step": 3910 }, { "epoch": 230.05882352941177, "grad_norm": 0.4732005298137665, "learning_rate": 8.064281025023631e-06, "loss": 0.0084, "step": 3911 }, { "epoch": 230.11764705882354, "grad_norm": 0.4195215106010437, "learning_rate": 8.059608453885646e-06, "loss": 0.005, "step": 3912 }, { "epoch": 230.1764705882353, "grad_norm": 0.709376335144043, "learning_rate": 8.054936322925138e-06, "loss": 0.0076, "step": 3913 }, { "epoch": 230.23529411764707, "grad_norm": 0.5570607781410217, "learning_rate": 8.050264633201978e-06, "loss": 0.0107, "step": 3914 }, { "epoch": 230.2941176470588, "grad_norm": 0.34454214572906494, "learning_rate": 8.04559338577594e-06, "loss": 0.0046, "step": 3915 }, { "epoch": 230.35294117647058, "grad_norm": 0.6146670579910278, "learning_rate": 8.040922581706695e-06, "loss": 0.0169, "step": 3916 }, { "epoch": 230.41176470588235, "grad_norm": 0.8074133396148682, "learning_rate": 8.036252222053816e-06, "loss": 0.0133, "step": 3917 }, { "epoch": 230.47058823529412, "grad_norm": 0.7548741102218628, "learning_rate": 8.03158230787677e-06, "loss": 0.0092, "step": 3918 }, { "epoch": 230.52941176470588, "grad_norm": 0.42326006293296814, "learning_rate": 8.026912840234926e-06, "loss": 0.004, "step": 3919 }, { "epoch": 230.58823529411765, "grad_norm": 0.449770987033844, "learning_rate": 8.022243820187554e-06, "loss": 0.0052, "step": 3920 }, { "epoch": 230.64705882352942, "grad_norm": 0.4602759778499603, "learning_rate": 8.017575248793822e-06, "loss": 0.008, "step": 3921 }, { "epoch": 230.7058823529412, "grad_norm": 0.4303876757621765, "learning_rate": 8.01290712711279e-06, "loss": 0.003, "step": 3922 }, { "epoch": 230.76470588235293, "grad_norm": 0.7941396832466125, "learning_rate": 8.008239456203423e-06, "loss": 0.0048, "step": 3923 }, { "epoch": 230.8235294117647, "grad_norm": 0.42196452617645264, "learning_rate": 8.003572237124582e-06, "loss": 0.0063, "step": 3924 }, { "epoch": 230.88235294117646, "grad_norm": 0.40053790807724, "learning_rate": 7.998905470935024e-06, "loss": 0.0053, "step": 3925 }, { "epoch": 230.94117647058823, "grad_norm": 0.5431106686592102, "learning_rate": 7.994239158693404e-06, "loss": 0.0046, "step": 3926 }, { "epoch": 231.0, "grad_norm": 0.6576851606369019, "learning_rate": 7.989573301458274e-06, "loss": 0.006, "step": 3927 }, { "epoch": 231.05882352941177, "grad_norm": 0.8811235427856445, "learning_rate": 7.984907900288083e-06, "loss": 0.0137, "step": 3928 }, { "epoch": 231.11764705882354, "grad_norm": 0.38606250286102295, "learning_rate": 7.980242956241176e-06, "loss": 0.0062, "step": 3929 }, { "epoch": 231.1764705882353, "grad_norm": 0.27511870861053467, "learning_rate": 7.975578470375798e-06, "loss": 0.0038, "step": 3930 }, { "epoch": 231.23529411764707, "grad_norm": 0.3245171010494232, "learning_rate": 7.970914443750083e-06, "loss": 0.0036, "step": 3931 }, { "epoch": 231.2941176470588, "grad_norm": 0.6911083459854126, "learning_rate": 7.966250877422064e-06, "loss": 0.0121, "step": 3932 }, { "epoch": 231.35294117647058, "grad_norm": 0.5258083343505859, "learning_rate": 7.961587772449673e-06, "loss": 0.0088, "step": 3933 }, { "epoch": 231.41176470588235, "grad_norm": 0.4235660135746002, "learning_rate": 7.956925129890734e-06, "loss": 0.0066, "step": 3934 }, { "epoch": 231.47058823529412, "grad_norm": 0.438972145318985, "learning_rate": 7.952262950802967e-06, "loss": 0.0084, "step": 3935 }, { "epoch": 231.52941176470588, "grad_norm": 0.47985273599624634, "learning_rate": 7.947601236243982e-06, "loss": 0.0098, "step": 3936 }, { "epoch": 231.58823529411765, "grad_norm": 0.4691581726074219, "learning_rate": 7.942939987271295e-06, "loss": 0.0093, "step": 3937 }, { "epoch": 231.64705882352942, "grad_norm": 0.44553786516189575, "learning_rate": 7.938279204942302e-06, "loss": 0.0067, "step": 3938 }, { "epoch": 231.7058823529412, "grad_norm": 1.3065789937973022, "learning_rate": 7.93361889031431e-06, "loss": 0.0075, "step": 3939 }, { "epoch": 231.76470588235293, "grad_norm": 0.5378535985946655, "learning_rate": 7.9289590444445e-06, "loss": 0.0088, "step": 3940 }, { "epoch": 231.8235294117647, "grad_norm": 0.4999546408653259, "learning_rate": 7.924299668389967e-06, "loss": 0.0053, "step": 3941 }, { "epoch": 231.88235294117646, "grad_norm": 0.8135521411895752, "learning_rate": 7.919640763207682e-06, "loss": 0.0068, "step": 3942 }, { "epoch": 231.94117647058823, "grad_norm": 0.3705199956893921, "learning_rate": 7.914982329954524e-06, "loss": 0.0034, "step": 3943 }, { "epoch": 232.0, "grad_norm": 0.5154992938041687, "learning_rate": 7.91032436968725e-06, "loss": 0.0083, "step": 3944 }, { "epoch": 232.05882352941177, "grad_norm": 0.21808572113513947, "learning_rate": 7.905666883462525e-06, "loss": 0.0039, "step": 3945 }, { "epoch": 232.11764705882354, "grad_norm": 0.8844971656799316, "learning_rate": 7.901009872336892e-06, "loss": 0.0089, "step": 3946 }, { "epoch": 232.1764705882353, "grad_norm": 0.3625786602497101, "learning_rate": 7.896353337366795e-06, "loss": 0.0045, "step": 3947 }, { "epoch": 232.23529411764707, "grad_norm": 0.3544819951057434, "learning_rate": 7.891697279608574e-06, "loss": 0.0064, "step": 3948 }, { "epoch": 232.2941176470588, "grad_norm": 0.5546526312828064, "learning_rate": 7.887041700118447e-06, "loss": 0.0055, "step": 3949 }, { "epoch": 232.35294117647058, "grad_norm": 0.36180704832077026, "learning_rate": 7.882386599952536e-06, "loss": 0.0064, "step": 3950 }, { "epoch": 232.41176470588235, "grad_norm": 0.4100615084171295, "learning_rate": 7.877731980166846e-06, "loss": 0.0069, "step": 3951 }, { "epoch": 232.47058823529412, "grad_norm": 0.350301057100296, "learning_rate": 7.873077841817282e-06, "loss": 0.0038, "step": 3952 }, { "epoch": 232.52941176470588, "grad_norm": 0.5015125870704651, "learning_rate": 7.868424185959629e-06, "loss": 0.0048, "step": 3953 }, { "epoch": 232.58823529411765, "grad_norm": 0.8611544370651245, "learning_rate": 7.863771013649573e-06, "loss": 0.013, "step": 3954 }, { "epoch": 232.64705882352942, "grad_norm": 0.31366217136383057, "learning_rate": 7.85911832594268e-06, "loss": 0.0053, "step": 3955 }, { "epoch": 232.7058823529412, "grad_norm": 0.5188999772071838, "learning_rate": 7.854466123894416e-06, "loss": 0.0059, "step": 3956 }, { "epoch": 232.76470588235293, "grad_norm": 0.2959798574447632, "learning_rate": 7.849814408560129e-06, "loss": 0.0048, "step": 3957 }, { "epoch": 232.8235294117647, "grad_norm": 0.49844029545783997, "learning_rate": 7.845163180995064e-06, "loss": 0.0083, "step": 3958 }, { "epoch": 232.88235294117646, "grad_norm": 0.4910818040370941, "learning_rate": 7.840512442254347e-06, "loss": 0.005, "step": 3959 }, { "epoch": 232.94117647058823, "grad_norm": 0.39979007840156555, "learning_rate": 7.835862193392996e-06, "loss": 0.009, "step": 3960 }, { "epoch": 233.0, "grad_norm": 0.8131420016288757, "learning_rate": 7.831212435465925e-06, "loss": 0.0107, "step": 3961 }, { "epoch": 233.05882352941177, "grad_norm": 0.4837510287761688, "learning_rate": 7.826563169527925e-06, "loss": 0.0059, "step": 3962 }, { "epoch": 233.11764705882354, "grad_norm": 0.8268908262252808, "learning_rate": 7.821914396633685e-06, "loss": 0.0055, "step": 3963 }, { "epoch": 233.1764705882353, "grad_norm": 0.2699660062789917, "learning_rate": 7.817266117837774e-06, "loss": 0.0035, "step": 3964 }, { "epoch": 233.23529411764707, "grad_norm": 0.5758556723594666, "learning_rate": 7.81261833419466e-06, "loss": 0.0065, "step": 3965 }, { "epoch": 233.2941176470588, "grad_norm": 0.4118811786174774, "learning_rate": 7.807971046758686e-06, "loss": 0.0058, "step": 3966 }, { "epoch": 233.35294117647058, "grad_norm": 0.5689365267753601, "learning_rate": 7.803324256584092e-06, "loss": 0.0096, "step": 3967 }, { "epoch": 233.41176470588235, "grad_norm": 0.3563728332519531, "learning_rate": 7.798677964724996e-06, "loss": 0.0038, "step": 3968 }, { "epoch": 233.47058823529412, "grad_norm": 0.5693036317825317, "learning_rate": 7.794032172235418e-06, "loss": 0.0074, "step": 3969 }, { "epoch": 233.52941176470588, "grad_norm": 0.3937898576259613, "learning_rate": 7.789386880169246e-06, "loss": 0.0081, "step": 3970 }, { "epoch": 233.58823529411765, "grad_norm": 0.6244595050811768, "learning_rate": 7.784742089580267e-06, "loss": 0.0066, "step": 3971 }, { "epoch": 233.64705882352942, "grad_norm": 0.26976388692855835, "learning_rate": 7.780097801522154e-06, "loss": 0.0062, "step": 3972 }, { "epoch": 233.7058823529412, "grad_norm": 0.37679412961006165, "learning_rate": 7.775454017048455e-06, "loss": 0.0053, "step": 3973 }, { "epoch": 233.76470588235293, "grad_norm": 0.5378260612487793, "learning_rate": 7.770810737212621e-06, "loss": 0.0048, "step": 3974 }, { "epoch": 233.8235294117647, "grad_norm": 0.4790642261505127, "learning_rate": 7.766167963067974e-06, "loss": 0.0097, "step": 3975 }, { "epoch": 233.88235294117646, "grad_norm": 0.6920953392982483, "learning_rate": 7.761525695667726e-06, "loss": 0.0111, "step": 3976 }, { "epoch": 233.94117647058823, "grad_norm": 0.45429688692092896, "learning_rate": 7.756883936064975e-06, "loss": 0.006, "step": 3977 }, { "epoch": 234.0, "grad_norm": 1.5169446468353271, "learning_rate": 7.752242685312709e-06, "loss": 0.0117, "step": 3978 }, { "epoch": 234.05882352941177, "grad_norm": 0.22287093102931976, "learning_rate": 7.747601944463786e-06, "loss": 0.0032, "step": 3979 }, { "epoch": 234.11764705882354, "grad_norm": 0.35918527841567993, "learning_rate": 7.742961714570964e-06, "loss": 0.0077, "step": 3980 }, { "epoch": 234.1764705882353, "grad_norm": 0.41223275661468506, "learning_rate": 7.738321996686873e-06, "loss": 0.0074, "step": 3981 }, { "epoch": 234.23529411764707, "grad_norm": 0.534946620464325, "learning_rate": 7.733682791864039e-06, "loss": 0.0076, "step": 3982 }, { "epoch": 234.2941176470588, "grad_norm": 0.4200166165828705, "learning_rate": 7.729044101154857e-06, "loss": 0.0074, "step": 3983 }, { "epoch": 234.35294117647058, "grad_norm": 0.30143576860427856, "learning_rate": 7.724405925611618e-06, "loss": 0.0044, "step": 3984 }, { "epoch": 234.41176470588235, "grad_norm": 0.19736845791339874, "learning_rate": 7.719768266286492e-06, "loss": 0.0028, "step": 3985 }, { "epoch": 234.47058823529412, "grad_norm": 0.47746020555496216, "learning_rate": 7.715131124231527e-06, "loss": 0.0066, "step": 3986 }, { "epoch": 234.52941176470588, "grad_norm": 0.4333867132663727, "learning_rate": 7.710494500498662e-06, "loss": 0.0098, "step": 3987 }, { "epoch": 234.58823529411765, "grad_norm": 0.48534733057022095, "learning_rate": 7.705858396139712e-06, "loss": 0.0082, "step": 3988 }, { "epoch": 234.64705882352942, "grad_norm": 0.32600706815719604, "learning_rate": 7.701222812206376e-06, "loss": 0.0043, "step": 3989 }, { "epoch": 234.7058823529412, "grad_norm": 1.1223355531692505, "learning_rate": 7.696587749750235e-06, "loss": 0.0084, "step": 3990 }, { "epoch": 234.76470588235293, "grad_norm": 0.41964367032051086, "learning_rate": 7.691953209822758e-06, "loss": 0.0082, "step": 3991 }, { "epoch": 234.8235294117647, "grad_norm": 0.29235291481018066, "learning_rate": 7.687319193475282e-06, "loss": 0.0065, "step": 3992 }, { "epoch": 234.88235294117646, "grad_norm": 0.42748165130615234, "learning_rate": 7.682685701759035e-06, "loss": 0.0063, "step": 3993 }, { "epoch": 234.94117647058823, "grad_norm": 0.3071495592594147, "learning_rate": 7.678052735725126e-06, "loss": 0.0038, "step": 3994 }, { "epoch": 235.0, "grad_norm": 0.27646467089653015, "learning_rate": 7.673420296424541e-06, "loss": 0.0032, "step": 3995 }, { "epoch": 235.05882352941177, "grad_norm": 1.3347233533859253, "learning_rate": 7.668788384908148e-06, "loss": 0.0087, "step": 3996 }, { "epoch": 235.11764705882354, "grad_norm": 0.6270223259925842, "learning_rate": 7.664157002226696e-06, "loss": 0.0103, "step": 3997 }, { "epoch": 235.1764705882353, "grad_norm": 0.517241358757019, "learning_rate": 7.659526149430814e-06, "loss": 0.012, "step": 3998 }, { "epoch": 235.23529411764707, "grad_norm": 0.6176739931106567, "learning_rate": 7.654895827571005e-06, "loss": 0.0066, "step": 3999 }, { "epoch": 235.2941176470588, "grad_norm": 0.8632903695106506, "learning_rate": 7.650266037697664e-06, "loss": 0.0155, "step": 4000 }, { "epoch": 235.35294117647058, "grad_norm": 0.5483925342559814, "learning_rate": 7.645636780861051e-06, "loss": 0.0049, "step": 4001 }, { "epoch": 235.41176470588235, "grad_norm": 0.3990321159362793, "learning_rate": 7.64100805811132e-06, "loss": 0.0058, "step": 4002 }, { "epoch": 235.47058823529412, "grad_norm": 0.44446811079978943, "learning_rate": 7.636379870498489e-06, "loss": 0.0059, "step": 4003 }, { "epoch": 235.52941176470588, "grad_norm": 0.35318729281425476, "learning_rate": 7.631752219072465e-06, "loss": 0.0049, "step": 4004 }, { "epoch": 235.58823529411765, "grad_norm": 0.611282467842102, "learning_rate": 7.627125104883027e-06, "loss": 0.0115, "step": 4005 }, { "epoch": 235.64705882352942, "grad_norm": 0.23358161747455597, "learning_rate": 7.62249852897984e-06, "loss": 0.0025, "step": 4006 }, { "epoch": 235.7058823529412, "grad_norm": 0.19767151772975922, "learning_rate": 7.617872492412437e-06, "loss": 0.0032, "step": 4007 }, { "epoch": 235.76470588235293, "grad_norm": 1.0564677715301514, "learning_rate": 7.613246996230238e-06, "loss": 0.007, "step": 4008 }, { "epoch": 235.8235294117647, "grad_norm": 0.2788470685482025, "learning_rate": 7.608622041482531e-06, "loss": 0.0044, "step": 4009 }, { "epoch": 235.88235294117646, "grad_norm": 0.38043099641799927, "learning_rate": 7.603997629218491e-06, "loss": 0.0062, "step": 4010 }, { "epoch": 235.94117647058823, "grad_norm": 1.4863396883010864, "learning_rate": 7.599373760487162e-06, "loss": 0.006, "step": 4011 }, { "epoch": 236.0, "grad_norm": 0.3726658523082733, "learning_rate": 7.594750436337467e-06, "loss": 0.0055, "step": 4012 }, { "epoch": 236.05882352941177, "grad_norm": 0.45620661973953247, "learning_rate": 7.5901276578182095e-06, "loss": 0.01, "step": 4013 }, { "epoch": 236.11764705882354, "grad_norm": 0.34737348556518555, "learning_rate": 7.585505425978064e-06, "loss": 0.007, "step": 4014 }, { "epoch": 236.1764705882353, "grad_norm": 0.4854428768157959, "learning_rate": 7.580883741865584e-06, "loss": 0.0099, "step": 4015 }, { "epoch": 236.23529411764707, "grad_norm": 0.7368125319480896, "learning_rate": 7.576262606529195e-06, "loss": 0.0055, "step": 4016 }, { "epoch": 236.2941176470588, "grad_norm": 0.5199756026268005, "learning_rate": 7.5716420210172044e-06, "loss": 0.0088, "step": 4017 }, { "epoch": 236.35294117647058, "grad_norm": 0.2815396785736084, "learning_rate": 7.567021986377786e-06, "loss": 0.0038, "step": 4018 }, { "epoch": 236.41176470588235, "grad_norm": 0.3321971595287323, "learning_rate": 7.562402503659e-06, "loss": 0.0052, "step": 4019 }, { "epoch": 236.47058823529412, "grad_norm": 0.771985650062561, "learning_rate": 7.557783573908774e-06, "loss": 0.0065, "step": 4020 }, { "epoch": 236.52941176470588, "grad_norm": 0.3812125623226166, "learning_rate": 7.553165198174909e-06, "loss": 0.0037, "step": 4021 }, { "epoch": 236.58823529411765, "grad_norm": 0.40467673540115356, "learning_rate": 7.548547377505086e-06, "loss": 0.007, "step": 4022 }, { "epoch": 236.64705882352942, "grad_norm": 0.7779743075370789, "learning_rate": 7.543930112946851e-06, "loss": 0.0096, "step": 4023 }, { "epoch": 236.7058823529412, "grad_norm": 0.4994753301143646, "learning_rate": 7.539313405547636e-06, "loss": 0.0079, "step": 4024 }, { "epoch": 236.76470588235293, "grad_norm": 0.7331644892692566, "learning_rate": 7.534697256354735e-06, "loss": 0.0073, "step": 4025 }, { "epoch": 236.8235294117647, "grad_norm": 0.7132250666618347, "learning_rate": 7.530081666415325e-06, "loss": 0.0089, "step": 4026 }, { "epoch": 236.88235294117646, "grad_norm": 0.6572970747947693, "learning_rate": 7.525466636776452e-06, "loss": 0.0042, "step": 4027 }, { "epoch": 236.94117647058823, "grad_norm": 0.39682525396347046, "learning_rate": 7.520852168485029e-06, "loss": 0.0064, "step": 4028 }, { "epoch": 237.0, "grad_norm": 0.4439954161643982, "learning_rate": 7.516238262587851e-06, "loss": 0.012, "step": 4029 }, { "epoch": 237.05882352941177, "grad_norm": 0.745228111743927, "learning_rate": 7.511624920131582e-06, "loss": 0.0066, "step": 4030 }, { "epoch": 237.11764705882354, "grad_norm": 0.34764376282691956, "learning_rate": 7.50701214216276e-06, "loss": 0.0046, "step": 4031 }, { "epoch": 237.1764705882353, "grad_norm": 0.28427910804748535, "learning_rate": 7.502399929727786e-06, "loss": 0.0041, "step": 4032 }, { "epoch": 237.23529411764707, "grad_norm": 0.4445051848888397, "learning_rate": 7.497788283872947e-06, "loss": 0.0051, "step": 4033 }, { "epoch": 237.2941176470588, "grad_norm": 0.9580772519111633, "learning_rate": 7.493177205644389e-06, "loss": 0.0079, "step": 4034 }, { "epoch": 237.35294117647058, "grad_norm": 0.4796924591064453, "learning_rate": 7.488566696088137e-06, "loss": 0.0055, "step": 4035 }, { "epoch": 237.41176470588235, "grad_norm": 0.22922608256340027, "learning_rate": 7.483956756250084e-06, "loss": 0.0029, "step": 4036 }, { "epoch": 237.47058823529412, "grad_norm": 0.28849726915359497, "learning_rate": 7.4793473871759946e-06, "loss": 0.0068, "step": 4037 }, { "epoch": 237.52941176470588, "grad_norm": 0.3278886675834656, "learning_rate": 7.474738589911499e-06, "loss": 0.0067, "step": 4038 }, { "epoch": 237.58823529411765, "grad_norm": 0.44241008162498474, "learning_rate": 7.470130365502108e-06, "loss": 0.0047, "step": 4039 }, { "epoch": 237.64705882352942, "grad_norm": 0.46832743287086487, "learning_rate": 7.465522714993194e-06, "loss": 0.0083, "step": 4040 }, { "epoch": 237.7058823529412, "grad_norm": 0.4423748850822449, "learning_rate": 7.460915639429999e-06, "loss": 0.01, "step": 4041 }, { "epoch": 237.76470588235293, "grad_norm": 0.4908793866634369, "learning_rate": 7.456309139857642e-06, "loss": 0.0071, "step": 4042 }, { "epoch": 237.8235294117647, "grad_norm": 0.48126012086868286, "learning_rate": 7.451703217321104e-06, "loss": 0.0093, "step": 4043 }, { "epoch": 237.88235294117646, "grad_norm": 0.26363682746887207, "learning_rate": 7.447097872865238e-06, "loss": 0.0033, "step": 4044 }, { "epoch": 237.94117647058823, "grad_norm": 0.3336820602416992, "learning_rate": 7.442493107534765e-06, "loss": 0.0072, "step": 4045 }, { "epoch": 238.0, "grad_norm": 0.3320675790309906, "learning_rate": 7.4378889223742766e-06, "loss": 0.0038, "step": 4046 }, { "epoch": 238.05882352941177, "grad_norm": 0.47519686818122864, "learning_rate": 7.433285318428229e-06, "loss": 0.0104, "step": 4047 }, { "epoch": 238.11764705882354, "grad_norm": 0.6023996472358704, "learning_rate": 7.428682296740953e-06, "loss": 0.0065, "step": 4048 }, { "epoch": 238.1764705882353, "grad_norm": 0.5354682803153992, "learning_rate": 7.424079858356638e-06, "loss": 0.0049, "step": 4049 }, { "epoch": 238.23529411764707, "grad_norm": 0.8197327852249146, "learning_rate": 7.419478004319353e-06, "loss": 0.0044, "step": 4050 }, { "epoch": 238.2941176470588, "grad_norm": 0.4148412346839905, "learning_rate": 7.41487673567302e-06, "loss": 0.0082, "step": 4051 }, { "epoch": 238.35294117647058, "grad_norm": 0.16366781294345856, "learning_rate": 7.410276053461443e-06, "loss": 0.0026, "step": 4052 }, { "epoch": 238.41176470588235, "grad_norm": 0.5343950986862183, "learning_rate": 7.405675958728283e-06, "loss": 0.0067, "step": 4053 }, { "epoch": 238.47058823529412, "grad_norm": 0.5579627752304077, "learning_rate": 7.401076452517069e-06, "loss": 0.0082, "step": 4054 }, { "epoch": 238.52941176470588, "grad_norm": 0.46313589811325073, "learning_rate": 7.3964775358712e-06, "loss": 0.0109, "step": 4055 }, { "epoch": 238.58823529411765, "grad_norm": 0.9699116945266724, "learning_rate": 7.391879209833938e-06, "loss": 0.0053, "step": 4056 }, { "epoch": 238.64705882352942, "grad_norm": 0.34740927815437317, "learning_rate": 7.387281475448416e-06, "loss": 0.0067, "step": 4057 }, { "epoch": 238.7058823529412, "grad_norm": 0.4973260462284088, "learning_rate": 7.382684333757624e-06, "loss": 0.0054, "step": 4058 }, { "epoch": 238.76470588235293, "grad_norm": 0.21869488060474396, "learning_rate": 7.378087785804426e-06, "loss": 0.0031, "step": 4059 }, { "epoch": 238.8235294117647, "grad_norm": 0.4074307978153229, "learning_rate": 7.3734918326315476e-06, "loss": 0.0076, "step": 4060 }, { "epoch": 238.88235294117646, "grad_norm": 0.9286057353019714, "learning_rate": 7.368896475281582e-06, "loss": 0.0044, "step": 4061 }, { "epoch": 238.94117647058823, "grad_norm": 0.3293519616127014, "learning_rate": 7.364301714796982e-06, "loss": 0.0052, "step": 4062 }, { "epoch": 239.0, "grad_norm": 0.6526058316230774, "learning_rate": 7.35970755222007e-06, "loss": 0.0118, "step": 4063 }, { "epoch": 239.05882352941177, "grad_norm": 0.2176695615053177, "learning_rate": 7.355113988593028e-06, "loss": 0.0038, "step": 4064 }, { "epoch": 239.11764705882354, "grad_norm": 0.5078431963920593, "learning_rate": 7.350521024957911e-06, "loss": 0.0101, "step": 4065 }, { "epoch": 239.1764705882353, "grad_norm": 0.49799421429634094, "learning_rate": 7.345928662356627e-06, "loss": 0.0077, "step": 4066 }, { "epoch": 239.23529411764707, "grad_norm": 0.4193654954433441, "learning_rate": 7.341336901830953e-06, "loss": 0.0113, "step": 4067 }, { "epoch": 239.2941176470588, "grad_norm": 0.3723827302455902, "learning_rate": 7.336745744422532e-06, "loss": 0.0055, "step": 4068 }, { "epoch": 239.35294117647058, "grad_norm": 0.5056875348091125, "learning_rate": 7.332155191172863e-06, "loss": 0.0058, "step": 4069 }, { "epoch": 239.41176470588235, "grad_norm": 0.5535634756088257, "learning_rate": 7.327565243123317e-06, "loss": 0.0107, "step": 4070 }, { "epoch": 239.47058823529412, "grad_norm": 0.4298034608364105, "learning_rate": 7.322975901315118e-06, "loss": 0.0058, "step": 4071 }, { "epoch": 239.52941176470588, "grad_norm": 1.7891759872436523, "learning_rate": 7.318387166789362e-06, "loss": 0.01, "step": 4072 }, { "epoch": 239.58823529411765, "grad_norm": 0.42210060358047485, "learning_rate": 7.313799040587e-06, "loss": 0.0052, "step": 4073 }, { "epoch": 239.64705882352942, "grad_norm": 1.2445238828659058, "learning_rate": 7.309211523748847e-06, "loss": 0.0071, "step": 4074 }, { "epoch": 239.7058823529412, "grad_norm": 0.28817838430404663, "learning_rate": 7.304624617315581e-06, "loss": 0.0037, "step": 4075 }, { "epoch": 239.76470588235293, "grad_norm": 0.21002353727817535, "learning_rate": 7.300038322327745e-06, "loss": 0.0024, "step": 4076 }, { "epoch": 239.8235294117647, "grad_norm": 0.3727024793624878, "learning_rate": 7.295452639825731e-06, "loss": 0.0045, "step": 4077 }, { "epoch": 239.88235294117646, "grad_norm": 0.2389988750219345, "learning_rate": 7.2908675708498085e-06, "loss": 0.0037, "step": 4078 }, { "epoch": 239.94117647058823, "grad_norm": 0.5687462091445923, "learning_rate": 7.286283116440095e-06, "loss": 0.0079, "step": 4079 }, { "epoch": 240.0, "grad_norm": 0.29552149772644043, "learning_rate": 7.2816992776365714e-06, "loss": 0.0052, "step": 4080 }, { "epoch": 240.05882352941177, "grad_norm": 0.6822506785392761, "learning_rate": 7.277116055479087e-06, "loss": 0.0152, "step": 4081 }, { "epoch": 240.11764705882354, "grad_norm": 0.4794222414493561, "learning_rate": 7.272533451007338e-06, "loss": 0.0055, "step": 4082 }, { "epoch": 240.1764705882353, "grad_norm": 0.6708258390426636, "learning_rate": 7.267951465260894e-06, "loss": 0.0105, "step": 4083 }, { "epoch": 240.23529411764707, "grad_norm": 0.29340627789497375, "learning_rate": 7.263370099279173e-06, "loss": 0.0037, "step": 4084 }, { "epoch": 240.2941176470588, "grad_norm": 0.9611595273017883, "learning_rate": 7.2587893541014584e-06, "loss": 0.0058, "step": 4085 }, { "epoch": 240.35294117647058, "grad_norm": 0.2807147800922394, "learning_rate": 7.254209230766889e-06, "loss": 0.0041, "step": 4086 }, { "epoch": 240.41176470588235, "grad_norm": 0.18618400394916534, "learning_rate": 7.249629730314471e-06, "loss": 0.003, "step": 4087 }, { "epoch": 240.47058823529412, "grad_norm": 0.5846220850944519, "learning_rate": 7.245050853783058e-06, "loss": 0.0099, "step": 4088 }, { "epoch": 240.52941176470588, "grad_norm": 0.8448243737220764, "learning_rate": 7.24047260221137e-06, "loss": 0.0057, "step": 4089 }, { "epoch": 240.58823529411765, "grad_norm": 0.26760348677635193, "learning_rate": 7.23589497663798e-06, "loss": 0.0032, "step": 4090 }, { "epoch": 240.64705882352942, "grad_norm": 0.4386787414550781, "learning_rate": 7.231317978101324e-06, "loss": 0.0038, "step": 4091 }, { "epoch": 240.7058823529412, "grad_norm": 0.32437434792518616, "learning_rate": 7.2267416076396924e-06, "loss": 0.0073, "step": 4092 }, { "epoch": 240.76470588235293, "grad_norm": 0.43559587001800537, "learning_rate": 7.222165866291235e-06, "loss": 0.0091, "step": 4093 }, { "epoch": 240.8235294117647, "grad_norm": 0.20965413749217987, "learning_rate": 7.217590755093957e-06, "loss": 0.0025, "step": 4094 }, { "epoch": 240.88235294117646, "grad_norm": 0.4399826228618622, "learning_rate": 7.21301627508572e-06, "loss": 0.0049, "step": 4095 }, { "epoch": 240.94117647058823, "grad_norm": 0.38905954360961914, "learning_rate": 7.208442427304248e-06, "loss": 0.0052, "step": 4096 }, { "epoch": 241.0, "grad_norm": 0.6204647421836853, "learning_rate": 7.203869212787112e-06, "loss": 0.014, "step": 4097 }, { "epoch": 241.05882352941177, "grad_norm": 0.31722894310951233, "learning_rate": 7.199296632571749e-06, "loss": 0.0067, "step": 4098 }, { "epoch": 241.11764705882354, "grad_norm": 0.4234824776649475, "learning_rate": 7.1947246876954464e-06, "loss": 0.0068, "step": 4099 }, { "epoch": 241.1764705882353, "grad_norm": 0.8674313426017761, "learning_rate": 7.190153379195352e-06, "loss": 0.0057, "step": 4100 }, { "epoch": 241.23529411764707, "grad_norm": 0.3791322410106659, "learning_rate": 7.185582708108461e-06, "loss": 0.0058, "step": 4101 }, { "epoch": 241.2941176470588, "grad_norm": 0.3065345287322998, "learning_rate": 7.181012675471635e-06, "loss": 0.0053, "step": 4102 }, { "epoch": 241.35294117647058, "grad_norm": 0.18702556192874908, "learning_rate": 7.176443282321579e-06, "loss": 0.0021, "step": 4103 }, { "epoch": 241.41176470588235, "grad_norm": 0.2820743918418884, "learning_rate": 7.171874529694867e-06, "loss": 0.0036, "step": 4104 }, { "epoch": 241.47058823529412, "grad_norm": 0.1909448206424713, "learning_rate": 7.167306418627911e-06, "loss": 0.0031, "step": 4105 }, { "epoch": 241.52941176470588, "grad_norm": 0.43198519945144653, "learning_rate": 7.1627389501569926e-06, "loss": 0.005, "step": 4106 }, { "epoch": 241.58823529411765, "grad_norm": 0.5741397738456726, "learning_rate": 7.158172125318239e-06, "loss": 0.0069, "step": 4107 }, { "epoch": 241.64705882352942, "grad_norm": 0.6048576235771179, "learning_rate": 7.153605945147631e-06, "loss": 0.0094, "step": 4108 }, { "epoch": 241.7058823529412, "grad_norm": 2.3631715774536133, "learning_rate": 7.1490404106810114e-06, "loss": 0.0096, "step": 4109 }, { "epoch": 241.76470588235293, "grad_norm": 0.24186016619205475, "learning_rate": 7.1444755229540665e-06, "loss": 0.0038, "step": 4110 }, { "epoch": 241.8235294117647, "grad_norm": 0.6603586673736572, "learning_rate": 7.139911283002341e-06, "loss": 0.0114, "step": 4111 }, { "epoch": 241.88235294117646, "grad_norm": 0.3633979558944702, "learning_rate": 7.135347691861232e-06, "loss": 0.0095, "step": 4112 }, { "epoch": 241.94117647058823, "grad_norm": 0.29934829473495483, "learning_rate": 7.130784750565992e-06, "loss": 0.0051, "step": 4113 }, { "epoch": 242.0, "grad_norm": 0.16748149693012238, "learning_rate": 7.126222460151719e-06, "loss": 0.0026, "step": 4114 }, { "epoch": 242.05882352941177, "grad_norm": 0.19401656091213226, "learning_rate": 7.121660821653371e-06, "loss": 0.0034, "step": 4115 }, { "epoch": 242.11764705882354, "grad_norm": 0.3235096335411072, "learning_rate": 7.1170998361057544e-06, "loss": 0.0052, "step": 4116 }, { "epoch": 242.1764705882353, "grad_norm": 0.35657790303230286, "learning_rate": 7.112539504543528e-06, "loss": 0.0072, "step": 4117 }, { "epoch": 242.23529411764707, "grad_norm": 0.3321271538734436, "learning_rate": 7.107979828001199e-06, "loss": 0.0037, "step": 4118 }, { "epoch": 242.2941176470588, "grad_norm": 0.5203970670700073, "learning_rate": 7.103420807513134e-06, "loss": 0.006, "step": 4119 }, { "epoch": 242.35294117647058, "grad_norm": 3.4718406200408936, "learning_rate": 7.0988624441135435e-06, "loss": 0.008, "step": 4120 }, { "epoch": 242.41176470588235, "grad_norm": 0.360763281583786, "learning_rate": 7.094304738836491e-06, "loss": 0.0057, "step": 4121 }, { "epoch": 242.47058823529412, "grad_norm": 0.4574887752532959, "learning_rate": 7.0897476927158935e-06, "loss": 0.0045, "step": 4122 }, { "epoch": 242.52941176470588, "grad_norm": 0.5897229909896851, "learning_rate": 7.085191306785511e-06, "loss": 0.0101, "step": 4123 }, { "epoch": 242.58823529411765, "grad_norm": 0.3260568380355835, "learning_rate": 7.0806355820789665e-06, "loss": 0.0045, "step": 4124 }, { "epoch": 242.64705882352942, "grad_norm": 0.8122270703315735, "learning_rate": 7.076080519629717e-06, "loss": 0.0057, "step": 4125 }, { "epoch": 242.7058823529412, "grad_norm": 0.23183974623680115, "learning_rate": 7.071526120471086e-06, "loss": 0.0041, "step": 4126 }, { "epoch": 242.76470588235293, "grad_norm": 0.6148501038551331, "learning_rate": 7.0669723856362305e-06, "loss": 0.0121, "step": 4127 }, { "epoch": 242.8235294117647, "grad_norm": 0.9245434403419495, "learning_rate": 7.062419316158171e-06, "loss": 0.0077, "step": 4128 }, { "epoch": 242.88235294117646, "grad_norm": 0.6553527116775513, "learning_rate": 7.0578669130697645e-06, "loss": 0.0103, "step": 4129 }, { "epoch": 242.94117647058823, "grad_norm": 0.8667979836463928, "learning_rate": 7.0533151774037274e-06, "loss": 0.0057, "step": 4130 }, { "epoch": 243.0, "grad_norm": 0.4294201731681824, "learning_rate": 7.048764110192618e-06, "loss": 0.0085, "step": 4131 }, { "epoch": 243.05882352941177, "grad_norm": 0.35781750082969666, "learning_rate": 7.044213712468846e-06, "loss": 0.0041, "step": 4132 }, { "epoch": 243.11764705882354, "grad_norm": 0.3433360159397125, "learning_rate": 7.039663985264669e-06, "loss": 0.0051, "step": 4133 }, { "epoch": 243.1764705882353, "grad_norm": 0.580678403377533, "learning_rate": 7.035114929612188e-06, "loss": 0.0051, "step": 4134 }, { "epoch": 243.23529411764707, "grad_norm": 0.42922264337539673, "learning_rate": 7.030566546543359e-06, "loss": 0.0083, "step": 4135 }, { "epoch": 243.2941176470588, "grad_norm": 0.5140671730041504, "learning_rate": 7.026018837089981e-06, "loss": 0.0054, "step": 4136 }, { "epoch": 243.35294117647058, "grad_norm": 0.2199406772851944, "learning_rate": 7.021471802283701e-06, "loss": 0.0036, "step": 4137 }, { "epoch": 243.41176470588235, "grad_norm": 0.38219013810157776, "learning_rate": 7.016925443156013e-06, "loss": 0.008, "step": 4138 }, { "epoch": 243.47058823529412, "grad_norm": 3.015270233154297, "learning_rate": 7.012379760738258e-06, "loss": 0.0146, "step": 4139 }, { "epoch": 243.52941176470588, "grad_norm": 0.36262357234954834, "learning_rate": 7.007834756061622e-06, "loss": 0.0078, "step": 4140 }, { "epoch": 243.58823529411765, "grad_norm": 0.6152708530426025, "learning_rate": 7.003290430157141e-06, "loss": 0.0062, "step": 4141 }, { "epoch": 243.64705882352942, "grad_norm": 0.5480561256408691, "learning_rate": 6.998746784055692e-06, "loss": 0.0054, "step": 4142 }, { "epoch": 243.7058823529412, "grad_norm": 0.44392991065979004, "learning_rate": 6.994203818788003e-06, "loss": 0.005, "step": 4143 }, { "epoch": 243.76470588235293, "grad_norm": 0.4124230742454529, "learning_rate": 6.989661535384642e-06, "loss": 0.0071, "step": 4144 }, { "epoch": 243.8235294117647, "grad_norm": 0.5274748206138611, "learning_rate": 6.985119934876029e-06, "loss": 0.0081, "step": 4145 }, { "epoch": 243.88235294117646, "grad_norm": 0.36010807752609253, "learning_rate": 6.980579018292422e-06, "loss": 0.0047, "step": 4146 }, { "epoch": 243.94117647058823, "grad_norm": 1.223842978477478, "learning_rate": 6.976038786663925e-06, "loss": 0.0085, "step": 4147 }, { "epoch": 244.0, "grad_norm": 0.24430572986602783, "learning_rate": 6.971499241020495e-06, "loss": 0.0032, "step": 4148 }, { "epoch": 244.05882352941177, "grad_norm": 3.043470859527588, "learning_rate": 6.9669603823919205e-06, "loss": 0.0071, "step": 4149 }, { "epoch": 244.11764705882354, "grad_norm": 0.20354540646076202, "learning_rate": 6.9624222118078465e-06, "loss": 0.0033, "step": 4150 }, { "epoch": 244.1764705882353, "grad_norm": 0.31219714879989624, "learning_rate": 6.957884730297753e-06, "loss": 0.007, "step": 4151 }, { "epoch": 244.23529411764707, "grad_norm": 0.34687289595603943, "learning_rate": 6.9533479388909686e-06, "loss": 0.0046, "step": 4152 }, { "epoch": 244.2941176470588, "grad_norm": 2.389558792114258, "learning_rate": 6.9488118386166606e-06, "loss": 0.0118, "step": 4153 }, { "epoch": 244.35294117647058, "grad_norm": 0.36466366052627563, "learning_rate": 6.944276430503846e-06, "loss": 0.006, "step": 4154 }, { "epoch": 244.41176470588235, "grad_norm": 0.6836987733840942, "learning_rate": 6.9397417155813785e-06, "loss": 0.0128, "step": 4155 }, { "epoch": 244.47058823529412, "grad_norm": 0.24747353792190552, "learning_rate": 6.935207694877959e-06, "loss": 0.0029, "step": 4156 }, { "epoch": 244.52941176470588, "grad_norm": 0.5546451210975647, "learning_rate": 6.930674369422129e-06, "loss": 0.0077, "step": 4157 }, { "epoch": 244.58823529411765, "grad_norm": 1.4549219608306885, "learning_rate": 6.926141740242273e-06, "loss": 0.0069, "step": 4158 }, { "epoch": 244.64705882352942, "grad_norm": 0.5023272633552551, "learning_rate": 6.921609808366619e-06, "loss": 0.0084, "step": 4159 }, { "epoch": 244.7058823529412, "grad_norm": 0.23158563673496246, "learning_rate": 6.9170785748232294e-06, "loss": 0.0036, "step": 4160 }, { "epoch": 244.76470588235293, "grad_norm": 0.44724732637405396, "learning_rate": 6.912548040640019e-06, "loss": 0.0042, "step": 4161 }, { "epoch": 244.8235294117647, "grad_norm": 0.24277238547801971, "learning_rate": 6.908018206844736e-06, "loss": 0.0044, "step": 4162 }, { "epoch": 244.88235294117646, "grad_norm": 0.7680138945579529, "learning_rate": 6.903489074464973e-06, "loss": 0.004, "step": 4163 }, { "epoch": 244.94117647058823, "grad_norm": 0.5600971579551697, "learning_rate": 6.898960644528161e-06, "loss": 0.013, "step": 4164 }, { "epoch": 245.0, "grad_norm": 0.30865007638931274, "learning_rate": 6.894432918061579e-06, "loss": 0.0041, "step": 4165 }, { "epoch": 245.05882352941177, "grad_norm": 0.3967292308807373, "learning_rate": 6.889905896092335e-06, "loss": 0.0042, "step": 4166 }, { "epoch": 245.11764705882354, "grad_norm": 0.3791762888431549, "learning_rate": 6.885379579647386e-06, "loss": 0.0052, "step": 4167 }, { "epoch": 245.1764705882353, "grad_norm": 0.7337383031845093, "learning_rate": 6.880853969753525e-06, "loss": 0.0095, "step": 4168 }, { "epoch": 245.23529411764707, "grad_norm": 0.6651404500007629, "learning_rate": 6.8763290674373886e-06, "loss": 0.013, "step": 4169 }, { "epoch": 245.2941176470588, "grad_norm": 0.3722531497478485, "learning_rate": 6.871804873725445e-06, "loss": 0.0045, "step": 4170 }, { "epoch": 245.35294117647058, "grad_norm": 0.4570571482181549, "learning_rate": 6.8672813896440115e-06, "loss": 0.0074, "step": 4171 }, { "epoch": 245.41176470588235, "grad_norm": 2.670130729675293, "learning_rate": 6.86275861621924e-06, "loss": 0.0052, "step": 4172 }, { "epoch": 245.47058823529412, "grad_norm": 0.35630300641059875, "learning_rate": 6.858236554477114e-06, "loss": 0.0056, "step": 4173 }, { "epoch": 245.52941176470588, "grad_norm": 0.4409628212451935, "learning_rate": 6.853715205443469e-06, "loss": 0.007, "step": 4174 }, { "epoch": 245.58823529411765, "grad_norm": 0.3132122755050659, "learning_rate": 6.849194570143971e-06, "loss": 0.0026, "step": 4175 }, { "epoch": 245.64705882352942, "grad_norm": 0.36823564767837524, "learning_rate": 6.844674649604125e-06, "loss": 0.0064, "step": 4176 }, { "epoch": 245.7058823529412, "grad_norm": 0.2908667325973511, "learning_rate": 6.8401554448492716e-06, "loss": 0.0045, "step": 4177 }, { "epoch": 245.76470588235293, "grad_norm": 0.15438812971115112, "learning_rate": 6.8356369569045965e-06, "loss": 0.0026, "step": 4178 }, { "epoch": 245.8235294117647, "grad_norm": 0.44269105792045593, "learning_rate": 6.831119186795112e-06, "loss": 0.0067, "step": 4179 }, { "epoch": 245.88235294117646, "grad_norm": 0.4676896631717682, "learning_rate": 6.826602135545679e-06, "loss": 0.0119, "step": 4180 }, { "epoch": 245.94117647058823, "grad_norm": 0.3168982267379761, "learning_rate": 6.822085804180985e-06, "loss": 0.0049, "step": 4181 }, { "epoch": 246.0, "grad_norm": 0.8782669305801392, "learning_rate": 6.8175701937255645e-06, "loss": 0.0066, "step": 4182 }, { "epoch": 246.05882352941177, "grad_norm": 0.3364405333995819, "learning_rate": 6.8130553052037775e-06, "loss": 0.0035, "step": 4183 }, { "epoch": 246.11764705882354, "grad_norm": 0.39051494002342224, "learning_rate": 6.808541139639828e-06, "loss": 0.0043, "step": 4184 }, { "epoch": 246.1764705882353, "grad_norm": 0.6516656279563904, "learning_rate": 6.804027698057753e-06, "loss": 0.0086, "step": 4185 }, { "epoch": 246.23529411764707, "grad_norm": 0.43315479159355164, "learning_rate": 6.799514981481429e-06, "loss": 0.0092, "step": 4186 }, { "epoch": 246.2941176470588, "grad_norm": 0.668989896774292, "learning_rate": 6.795002990934563e-06, "loss": 0.0074, "step": 4187 }, { "epoch": 246.35294117647058, "grad_norm": 0.7365118861198425, "learning_rate": 6.790491727440695e-06, "loss": 0.0145, "step": 4188 }, { "epoch": 246.41176470588235, "grad_norm": 0.5199232697486877, "learning_rate": 6.78598119202321e-06, "loss": 0.0046, "step": 4189 }, { "epoch": 246.47058823529412, "grad_norm": 1.5502811670303345, "learning_rate": 6.7814713857053185e-06, "loss": 0.0087, "step": 4190 }, { "epoch": 246.52941176470588, "grad_norm": 0.343332439661026, "learning_rate": 6.776962309510073e-06, "loss": 0.006, "step": 4191 }, { "epoch": 246.58823529411765, "grad_norm": 0.6059707999229431, "learning_rate": 6.772453964460353e-06, "loss": 0.0122, "step": 4192 }, { "epoch": 246.64705882352942, "grad_norm": 0.2176249921321869, "learning_rate": 6.76794635157888e-06, "loss": 0.0039, "step": 4193 }, { "epoch": 246.7058823529412, "grad_norm": 0.44376641511917114, "learning_rate": 6.7634394718882005e-06, "loss": 0.0062, "step": 4194 }, { "epoch": 246.76470588235293, "grad_norm": 0.2252124696969986, "learning_rate": 6.7589333264107015e-06, "loss": 0.0037, "step": 4195 }, { "epoch": 246.8235294117647, "grad_norm": 0.4525424838066101, "learning_rate": 6.7544279161686e-06, "loss": 0.0039, "step": 4196 }, { "epoch": 246.88235294117646, "grad_norm": 0.6097435355186462, "learning_rate": 6.749923242183951e-06, "loss": 0.006, "step": 4197 }, { "epoch": 246.94117647058823, "grad_norm": 0.21726912260055542, "learning_rate": 6.745419305478635e-06, "loss": 0.0029, "step": 4198 }, { "epoch": 247.0, "grad_norm": 0.37684282660484314, "learning_rate": 6.740916107074372e-06, "loss": 0.0044, "step": 4199 }, { "epoch": 247.05882352941177, "grad_norm": 0.45946577191352844, "learning_rate": 6.736413647992712e-06, "loss": 0.0035, "step": 4200 }, { "epoch": 247.11764705882354, "grad_norm": 0.7836405634880066, "learning_rate": 6.7319119292550326e-06, "loss": 0.0068, "step": 4201 }, { "epoch": 247.1764705882353, "grad_norm": 0.2016318440437317, "learning_rate": 6.727410951882554e-06, "loss": 0.003, "step": 4202 }, { "epoch": 247.23529411764707, "grad_norm": 0.34585267305374146, "learning_rate": 6.722910716896316e-06, "loss": 0.0058, "step": 4203 }, { "epoch": 247.2941176470588, "grad_norm": 0.24688595533370972, "learning_rate": 6.718411225317201e-06, "loss": 0.0033, "step": 4204 }, { "epoch": 247.35294117647058, "grad_norm": 1.4980872869491577, "learning_rate": 6.713912478165916e-06, "loss": 0.0107, "step": 4205 }, { "epoch": 247.41176470588235, "grad_norm": 0.33919811248779297, "learning_rate": 6.709414476463001e-06, "loss": 0.0039, "step": 4206 }, { "epoch": 247.47058823529412, "grad_norm": 0.37690407037734985, "learning_rate": 6.7049172212288264e-06, "loss": 0.0039, "step": 4207 }, { "epoch": 247.52941176470588, "grad_norm": 0.6202356219291687, "learning_rate": 6.700420713483595e-06, "loss": 0.0067, "step": 4208 }, { "epoch": 247.58823529411765, "grad_norm": 0.274707555770874, "learning_rate": 6.695924954247337e-06, "loss": 0.0042, "step": 4209 }, { "epoch": 247.64705882352942, "grad_norm": 0.6382024884223938, "learning_rate": 6.691429944539918e-06, "loss": 0.0119, "step": 4210 }, { "epoch": 247.7058823529412, "grad_norm": 0.41368934512138367, "learning_rate": 6.686935685381026e-06, "loss": 0.0069, "step": 4211 }, { "epoch": 247.76470588235293, "grad_norm": 0.3594321310520172, "learning_rate": 6.682442177790186e-06, "loss": 0.0082, "step": 4212 }, { "epoch": 247.8235294117647, "grad_norm": 0.38983213901519775, "learning_rate": 6.67794942278675e-06, "loss": 0.0059, "step": 4213 }, { "epoch": 247.88235294117646, "grad_norm": 0.401404470205307, "learning_rate": 6.673457421389892e-06, "loss": 0.007, "step": 4214 }, { "epoch": 247.94117647058823, "grad_norm": 0.34244024753570557, "learning_rate": 6.66896617461863e-06, "loss": 0.0071, "step": 4215 }, { "epoch": 248.0, "grad_norm": 0.6695239543914795, "learning_rate": 6.664475683491797e-06, "loss": 0.0082, "step": 4216 }, { "epoch": 248.05882352941177, "grad_norm": 0.4341328740119934, "learning_rate": 6.659985949028064e-06, "loss": 0.0074, "step": 4217 }, { "epoch": 248.11764705882354, "grad_norm": 0.2159796953201294, "learning_rate": 6.655496972245923e-06, "loss": 0.0032, "step": 4218 }, { "epoch": 248.1764705882353, "grad_norm": 0.4275881052017212, "learning_rate": 6.651008754163701e-06, "loss": 0.0053, "step": 4219 }, { "epoch": 248.23529411764707, "grad_norm": 0.554253101348877, "learning_rate": 6.646521295799544e-06, "loss": 0.0081, "step": 4220 }, { "epoch": 248.2941176470588, "grad_norm": 0.5243242383003235, "learning_rate": 6.642034598171438e-06, "loss": 0.011, "step": 4221 }, { "epoch": 248.35294117647058, "grad_norm": 0.45440995693206787, "learning_rate": 6.637548662297187e-06, "loss": 0.0084, "step": 4222 }, { "epoch": 248.41176470588235, "grad_norm": 0.3352736830711365, "learning_rate": 6.633063489194422e-06, "loss": 0.0034, "step": 4223 }, { "epoch": 248.47058823529412, "grad_norm": 0.8443393707275391, "learning_rate": 6.628579079880605e-06, "loss": 0.0073, "step": 4224 }, { "epoch": 248.52941176470588, "grad_norm": 0.21387982368469238, "learning_rate": 6.624095435373026e-06, "loss": 0.003, "step": 4225 }, { "epoch": 248.58823529411765, "grad_norm": 0.3086501359939575, "learning_rate": 6.619612556688797e-06, "loss": 0.0031, "step": 4226 }, { "epoch": 248.64705882352942, "grad_norm": 0.4694216549396515, "learning_rate": 6.615130444844861e-06, "loss": 0.0077, "step": 4227 }, { "epoch": 248.7058823529412, "grad_norm": 0.45710790157318115, "learning_rate": 6.610649100857979e-06, "loss": 0.0065, "step": 4228 }, { "epoch": 248.76470588235293, "grad_norm": 1.8953917026519775, "learning_rate": 6.606168525744747e-06, "loss": 0.0087, "step": 4229 }, { "epoch": 248.8235294117647, "grad_norm": 0.40106236934661865, "learning_rate": 6.60168872052158e-06, "loss": 0.004, "step": 4230 }, { "epoch": 248.88235294117646, "grad_norm": 0.6717411279678345, "learning_rate": 6.597209686204725e-06, "loss": 0.0072, "step": 4231 }, { "epoch": 248.94117647058823, "grad_norm": 0.2805907428264618, "learning_rate": 6.592731423810246e-06, "loss": 0.0038, "step": 4232 }, { "epoch": 249.0, "grad_norm": 0.3992714285850525, "learning_rate": 6.588253934354039e-06, "loss": 0.0083, "step": 4233 }, { "epoch": 249.05882352941177, "grad_norm": 0.5710369944572449, "learning_rate": 6.583777218851819e-06, "loss": 0.0066, "step": 4234 }, { "epoch": 249.11764705882354, "grad_norm": 0.40905165672302246, "learning_rate": 6.57930127831913e-06, "loss": 0.0039, "step": 4235 }, { "epoch": 249.1764705882353, "grad_norm": 0.38477402925491333, "learning_rate": 6.574826113771337e-06, "loss": 0.0055, "step": 4236 }, { "epoch": 249.23529411764707, "grad_norm": 0.35797053575515747, "learning_rate": 6.570351726223634e-06, "loss": 0.0062, "step": 4237 }, { "epoch": 249.2941176470588, "grad_norm": 0.39965349435806274, "learning_rate": 6.565878116691029e-06, "loss": 0.0074, "step": 4238 }, { "epoch": 249.35294117647058, "grad_norm": 0.8002001047134399, "learning_rate": 6.561405286188365e-06, "loss": 0.0044, "step": 4239 }, { "epoch": 249.41176470588235, "grad_norm": 0.34150123596191406, "learning_rate": 6.5569332357302985e-06, "loss": 0.004, "step": 4240 }, { "epoch": 249.47058823529412, "grad_norm": 0.2765691578388214, "learning_rate": 6.5524619663313185e-06, "loss": 0.0037, "step": 4241 }, { "epoch": 249.52941176470588, "grad_norm": 0.5711659789085388, "learning_rate": 6.547991479005727e-06, "loss": 0.0058, "step": 4242 }, { "epoch": 249.58823529411765, "grad_norm": 0.2878190577030182, "learning_rate": 6.543521774767654e-06, "loss": 0.0039, "step": 4243 }, { "epoch": 249.64705882352942, "grad_norm": 0.3801127076148987, "learning_rate": 6.5390528546310515e-06, "loss": 0.0064, "step": 4244 }, { "epoch": 249.7058823529412, "grad_norm": 0.5074552297592163, "learning_rate": 6.534584719609694e-06, "loss": 0.0059, "step": 4245 }, { "epoch": 249.76470588235293, "grad_norm": 0.5458808541297913, "learning_rate": 6.530117370717177e-06, "loss": 0.0058, "step": 4246 }, { "epoch": 249.8235294117647, "grad_norm": 0.3502669632434845, "learning_rate": 6.525650808966917e-06, "loss": 0.0038, "step": 4247 }, { "epoch": 249.88235294117646, "grad_norm": 0.6435335278511047, "learning_rate": 6.521185035372155e-06, "loss": 0.0124, "step": 4248 }, { "epoch": 249.94117647058823, "grad_norm": 0.7963283061981201, "learning_rate": 6.516720050945946e-06, "loss": 0.0097, "step": 4249 }, { "epoch": 250.0, "grad_norm": 0.33320802450180054, "learning_rate": 6.5122558567011775e-06, "loss": 0.008, "step": 4250 }, { "epoch": 250.05882352941177, "grad_norm": 0.30057293176651, "learning_rate": 6.507792453650545e-06, "loss": 0.0038, "step": 4251 }, { "epoch": 250.11764705882354, "grad_norm": 0.3141414225101471, "learning_rate": 6.503329842806576e-06, "loss": 0.0033, "step": 4252 }, { "epoch": 250.1764705882353, "grad_norm": 0.42460986971855164, "learning_rate": 6.498868025181608e-06, "loss": 0.0099, "step": 4253 }, { "epoch": 250.23529411764707, "grad_norm": 0.54826420545578, "learning_rate": 6.494407001787809e-06, "loss": 0.0057, "step": 4254 }, { "epoch": 250.2941176470588, "grad_norm": 0.31310153007507324, "learning_rate": 6.489946773637158e-06, "loss": 0.0036, "step": 4255 }, { "epoch": 250.35294117647058, "grad_norm": 0.6222542524337769, "learning_rate": 6.485487341741455e-06, "loss": 0.0104, "step": 4256 }, { "epoch": 250.41176470588235, "grad_norm": 0.29179710149765015, "learning_rate": 6.481028707112327e-06, "loss": 0.0044, "step": 4257 }, { "epoch": 250.47058823529412, "grad_norm": 0.4179574251174927, "learning_rate": 6.476570870761208e-06, "loss": 0.008, "step": 4258 }, { "epoch": 250.52941176470588, "grad_norm": 0.5146847367286682, "learning_rate": 6.472113833699362e-06, "loss": 0.0059, "step": 4259 }, { "epoch": 250.58823529411765, "grad_norm": 0.3088968098163605, "learning_rate": 6.467657596937864e-06, "loss": 0.0029, "step": 4260 }, { "epoch": 250.64705882352942, "grad_norm": 0.16963082551956177, "learning_rate": 6.463202161487614e-06, "loss": 0.0033, "step": 4261 }, { "epoch": 250.7058823529412, "grad_norm": 0.2695719003677368, "learning_rate": 6.458747528359323e-06, "loss": 0.0074, "step": 4262 }, { "epoch": 250.76470588235293, "grad_norm": 0.30707523226737976, "learning_rate": 6.454293698563528e-06, "loss": 0.0055, "step": 4263 }, { "epoch": 250.8235294117647, "grad_norm": 0.40091222524642944, "learning_rate": 6.449840673110574e-06, "loss": 0.0067, "step": 4264 }, { "epoch": 250.88235294117646, "grad_norm": 2.0821852684020996, "learning_rate": 6.445388453010636e-06, "loss": 0.0113, "step": 4265 }, { "epoch": 250.94117647058823, "grad_norm": 0.7229670286178589, "learning_rate": 6.440937039273692e-06, "loss": 0.0091, "step": 4266 }, { "epoch": 251.0, "grad_norm": 0.45207148790359497, "learning_rate": 6.43648643290955e-06, "loss": 0.0041, "step": 4267 }, { "epoch": 251.05882352941177, "grad_norm": 0.3148837983608246, "learning_rate": 6.432036634927828e-06, "loss": 0.0039, "step": 4268 }, { "epoch": 251.11764705882354, "grad_norm": 0.5272027850151062, "learning_rate": 6.427587646337958e-06, "loss": 0.0045, "step": 4269 }, { "epoch": 251.1764705882353, "grad_norm": 0.29681724309921265, "learning_rate": 6.423139468149199e-06, "loss": 0.0048, "step": 4270 }, { "epoch": 251.23529411764707, "grad_norm": 0.6849111914634705, "learning_rate": 6.418692101370612e-06, "loss": 0.0104, "step": 4271 }, { "epoch": 251.2941176470588, "grad_norm": 0.3411201536655426, "learning_rate": 6.414245547011087e-06, "loss": 0.0065, "step": 4272 }, { "epoch": 251.35294117647058, "grad_norm": 0.32468149065971375, "learning_rate": 6.4097998060793225e-06, "loss": 0.0057, "step": 4273 }, { "epoch": 251.41176470588235, "grad_norm": 0.2631191313266754, "learning_rate": 6.405354879583833e-06, "loss": 0.0054, "step": 4274 }, { "epoch": 251.47058823529412, "grad_norm": 0.4117412269115448, "learning_rate": 6.400910768532949e-06, "loss": 0.0037, "step": 4275 }, { "epoch": 251.52941176470588, "grad_norm": 0.43607404828071594, "learning_rate": 6.396467473934818e-06, "loss": 0.0068, "step": 4276 }, { "epoch": 251.58823529411765, "grad_norm": 0.5096211433410645, "learning_rate": 6.3920249967974e-06, "loss": 0.0086, "step": 4277 }, { "epoch": 251.64705882352942, "grad_norm": 0.2314714640378952, "learning_rate": 6.387583338128471e-06, "loss": 0.003, "step": 4278 }, { "epoch": 251.7058823529412, "grad_norm": 0.3425886034965515, "learning_rate": 6.383142498935617e-06, "loss": 0.0044, "step": 4279 }, { "epoch": 251.76470588235293, "grad_norm": 0.7652384042739868, "learning_rate": 6.378702480226248e-06, "loss": 0.0087, "step": 4280 }, { "epoch": 251.8235294117647, "grad_norm": 0.5284035801887512, "learning_rate": 6.374263283007576e-06, "loss": 0.005, "step": 4281 }, { "epoch": 251.88235294117646, "grad_norm": 0.3427956998348236, "learning_rate": 6.369824908286631e-06, "loss": 0.0037, "step": 4282 }, { "epoch": 251.94117647058823, "grad_norm": 1.217817783355713, "learning_rate": 6.365387357070263e-06, "loss": 0.0156, "step": 4283 }, { "epoch": 252.0, "grad_norm": 0.3173118233680725, "learning_rate": 6.360950630365126e-06, "loss": 0.0046, "step": 4284 }, { "epoch": 252.05882352941177, "grad_norm": 0.7314711809158325, "learning_rate": 6.356514729177692e-06, "loss": 0.0048, "step": 4285 }, { "epoch": 252.11764705882354, "grad_norm": 0.41088348627090454, "learning_rate": 6.352079654514243e-06, "loss": 0.0057, "step": 4286 }, { "epoch": 252.1764705882353, "grad_norm": 0.3218865692615509, "learning_rate": 6.347645407380876e-06, "loss": 0.0034, "step": 4287 }, { "epoch": 252.23529411764707, "grad_norm": 0.4110042452812195, "learning_rate": 6.343211988783497e-06, "loss": 0.0059, "step": 4288 }, { "epoch": 252.2941176470588, "grad_norm": 0.4815292954444885, "learning_rate": 6.338779399727832e-06, "loss": 0.006, "step": 4289 }, { "epoch": 252.35294117647058, "grad_norm": 0.40171003341674805, "learning_rate": 6.334347641219407e-06, "loss": 0.0076, "step": 4290 }, { "epoch": 252.41176470588235, "grad_norm": 0.25457289814949036, "learning_rate": 6.329916714263568e-06, "loss": 0.0032, "step": 4291 }, { "epoch": 252.47058823529412, "grad_norm": 0.47255265712738037, "learning_rate": 6.325486619865471e-06, "loss": 0.0094, "step": 4292 }, { "epoch": 252.52941176470588, "grad_norm": 0.2633001208305359, "learning_rate": 6.32105735903008e-06, "loss": 0.0041, "step": 4293 }, { "epoch": 252.58823529411765, "grad_norm": 0.3870401382446289, "learning_rate": 6.316628932762176e-06, "loss": 0.0048, "step": 4294 }, { "epoch": 252.64705882352942, "grad_norm": 0.4585820138454437, "learning_rate": 6.31220134206634e-06, "loss": 0.0101, "step": 4295 }, { "epoch": 252.7058823529412, "grad_norm": 0.4873541593551636, "learning_rate": 6.3077745879469754e-06, "loss": 0.0051, "step": 4296 }, { "epoch": 252.76470588235293, "grad_norm": 0.36037710309028625, "learning_rate": 6.303348671408287e-06, "loss": 0.0036, "step": 4297 }, { "epoch": 252.8235294117647, "grad_norm": 0.2360202670097351, "learning_rate": 6.298923593454298e-06, "loss": 0.0036, "step": 4298 }, { "epoch": 252.88235294117646, "grad_norm": 0.6642223000526428, "learning_rate": 6.2944993550888325e-06, "loss": 0.0108, "step": 4299 }, { "epoch": 252.94117647058823, "grad_norm": 0.8342567086219788, "learning_rate": 6.290075957315531e-06, "loss": 0.0104, "step": 4300 }, { "epoch": 253.0, "grad_norm": 0.2814001441001892, "learning_rate": 6.2856534011378365e-06, "loss": 0.005, "step": 4301 }, { "epoch": 253.05882352941177, "grad_norm": 0.44378340244293213, "learning_rate": 6.28123168755901e-06, "loss": 0.0078, "step": 4302 }, { "epoch": 253.11764705882354, "grad_norm": 0.4611252248287201, "learning_rate": 6.276810817582111e-06, "loss": 0.007, "step": 4303 }, { "epoch": 253.1764705882353, "grad_norm": 0.340936541557312, "learning_rate": 6.272390792210019e-06, "loss": 0.0044, "step": 4304 }, { "epoch": 253.23529411764707, "grad_norm": 0.39057233929634094, "learning_rate": 6.267971612445411e-06, "loss": 0.0069, "step": 4305 }, { "epoch": 253.2941176470588, "grad_norm": 0.4533480107784271, "learning_rate": 6.263553279290781e-06, "loss": 0.0039, "step": 4306 }, { "epoch": 253.35294117647058, "grad_norm": 0.45498043298721313, "learning_rate": 6.259135793748426e-06, "loss": 0.0054, "step": 4307 }, { "epoch": 253.41176470588235, "grad_norm": 0.46223047375679016, "learning_rate": 6.254719156820449e-06, "loss": 0.0045, "step": 4308 }, { "epoch": 253.47058823529412, "grad_norm": 0.5205778479576111, "learning_rate": 6.250303369508767e-06, "loss": 0.0084, "step": 4309 }, { "epoch": 253.52941176470588, "grad_norm": 0.541648805141449, "learning_rate": 6.2458884328150974e-06, "loss": 0.004, "step": 4310 }, { "epoch": 253.58823529411765, "grad_norm": 0.6624765992164612, "learning_rate": 6.24147434774097e-06, "loss": 0.0121, "step": 4311 }, { "epoch": 253.64705882352942, "grad_norm": 0.5671725869178772, "learning_rate": 6.237061115287718e-06, "loss": 0.0091, "step": 4312 }, { "epoch": 253.7058823529412, "grad_norm": 0.270289808511734, "learning_rate": 6.232648736456485e-06, "loss": 0.0035, "step": 4313 }, { "epoch": 253.76470588235293, "grad_norm": 0.30778223276138306, "learning_rate": 6.2282372122482135e-06, "loss": 0.0041, "step": 4314 }, { "epoch": 253.8235294117647, "grad_norm": 0.33895477652549744, "learning_rate": 6.223826543663663e-06, "loss": 0.004, "step": 4315 }, { "epoch": 253.88235294117646, "grad_norm": 0.4019034206867218, "learning_rate": 6.219416731703386e-06, "loss": 0.0042, "step": 4316 }, { "epoch": 253.94117647058823, "grad_norm": 0.4311276972293854, "learning_rate": 6.215007777367754e-06, "loss": 0.0051, "step": 4317 }, { "epoch": 254.0, "grad_norm": 0.6630826592445374, "learning_rate": 6.210599681656933e-06, "loss": 0.0053, "step": 4318 }, { "epoch": 254.05882352941177, "grad_norm": 0.4427197575569153, "learning_rate": 6.206192445570902e-06, "loss": 0.0083, "step": 4319 }, { "epoch": 254.11764705882354, "grad_norm": 0.1662372499704361, "learning_rate": 6.201786070109441e-06, "loss": 0.003, "step": 4320 }, { "epoch": 254.1764705882353, "grad_norm": 0.25921881198883057, "learning_rate": 6.1973805562721326e-06, "loss": 0.0045, "step": 4321 }, { "epoch": 254.23529411764707, "grad_norm": 0.5327059030532837, "learning_rate": 6.1929759050583716e-06, "loss": 0.0081, "step": 4322 }, { "epoch": 254.2941176470588, "grad_norm": 0.2361348271369934, "learning_rate": 6.188572117467346e-06, "loss": 0.0035, "step": 4323 }, { "epoch": 254.35294117647058, "grad_norm": 0.532072126865387, "learning_rate": 6.184169194498062e-06, "loss": 0.0074, "step": 4324 }, { "epoch": 254.41176470588235, "grad_norm": 0.3786451518535614, "learning_rate": 6.179767137149314e-06, "loss": 0.0062, "step": 4325 }, { "epoch": 254.47058823529412, "grad_norm": 0.34968098998069763, "learning_rate": 6.175365946419713e-06, "loss": 0.0059, "step": 4326 }, { "epoch": 254.52941176470588, "grad_norm": 1.1034926176071167, "learning_rate": 6.170965623307668e-06, "loss": 0.0099, "step": 4327 }, { "epoch": 254.58823529411765, "grad_norm": 0.5420743227005005, "learning_rate": 6.1665661688113894e-06, "loss": 0.0063, "step": 4328 }, { "epoch": 254.64705882352942, "grad_norm": 0.21969494223594666, "learning_rate": 6.162167583928895e-06, "loss": 0.0034, "step": 4329 }, { "epoch": 254.7058823529412, "grad_norm": 0.2476285994052887, "learning_rate": 6.157769869658001e-06, "loss": 0.0029, "step": 4330 }, { "epoch": 254.76470588235293, "grad_norm": 0.4777834415435791, "learning_rate": 6.15337302699633e-06, "loss": 0.0089, "step": 4331 }, { "epoch": 254.8235294117647, "grad_norm": 0.25579404830932617, "learning_rate": 6.148977056941303e-06, "loss": 0.0055, "step": 4332 }, { "epoch": 254.88235294117646, "grad_norm": 0.2907293140888214, "learning_rate": 6.144581960490146e-06, "loss": 0.0056, "step": 4333 }, { "epoch": 254.94117647058823, "grad_norm": 0.38823258876800537, "learning_rate": 6.140187738639887e-06, "loss": 0.0045, "step": 4334 }, { "epoch": 255.0, "grad_norm": 0.39079639315605164, "learning_rate": 6.135794392387353e-06, "loss": 0.0049, "step": 4335 }, { "epoch": 255.05882352941177, "grad_norm": 0.43652695417404175, "learning_rate": 6.131401922729172e-06, "loss": 0.0065, "step": 4336 }, { "epoch": 255.11764705882354, "grad_norm": 0.2620249390602112, "learning_rate": 6.12701033066178e-06, "loss": 0.0039, "step": 4337 }, { "epoch": 255.1764705882353, "grad_norm": 0.36758267879486084, "learning_rate": 6.122619617181402e-06, "loss": 0.0039, "step": 4338 }, { "epoch": 255.23529411764707, "grad_norm": 0.60213702917099, "learning_rate": 6.118229783284079e-06, "loss": 0.013, "step": 4339 }, { "epoch": 255.2941176470588, "grad_norm": 0.28817495703697205, "learning_rate": 6.113840829965636e-06, "loss": 0.0038, "step": 4340 }, { "epoch": 255.35294117647058, "grad_norm": 0.5906272530555725, "learning_rate": 6.109452758221712e-06, "loss": 0.0072, "step": 4341 }, { "epoch": 255.41176470588235, "grad_norm": 0.5100365281105042, "learning_rate": 6.105065569047736e-06, "loss": 0.0101, "step": 4342 }, { "epoch": 255.47058823529412, "grad_norm": 0.8437408208847046, "learning_rate": 6.100679263438947e-06, "loss": 0.0063, "step": 4343 }, { "epoch": 255.52941176470588, "grad_norm": 0.2417427897453308, "learning_rate": 6.096293842390369e-06, "loss": 0.0033, "step": 4344 }, { "epoch": 255.58823529411765, "grad_norm": 0.4662017822265625, "learning_rate": 6.091909306896842e-06, "loss": 0.0064, "step": 4345 }, { "epoch": 255.64705882352942, "grad_norm": 0.7199515700340271, "learning_rate": 6.0875256579529925e-06, "loss": 0.009, "step": 4346 }, { "epoch": 255.7058823529412, "grad_norm": 0.28786736726760864, "learning_rate": 6.083142896553253e-06, "loss": 0.0046, "step": 4347 }, { "epoch": 255.76470588235293, "grad_norm": 0.2337045967578888, "learning_rate": 6.078761023691851e-06, "loss": 0.0029, "step": 4348 }, { "epoch": 255.8235294117647, "grad_norm": 0.25915002822875977, "learning_rate": 6.074380040362812e-06, "loss": 0.0056, "step": 4349 }, { "epoch": 255.88235294117646, "grad_norm": 0.41018757224082947, "learning_rate": 6.069999947559966e-06, "loss": 0.0031, "step": 4350 }, { "epoch": 255.94117647058823, "grad_norm": 0.264023095369339, "learning_rate": 6.06562074627693e-06, "loss": 0.0039, "step": 4351 }, { "epoch": 256.0, "grad_norm": 0.13017414510250092, "learning_rate": 6.061242437507131e-06, "loss": 0.0028, "step": 4352 }, { "epoch": 256.05882352941177, "grad_norm": 0.5158630013465881, "learning_rate": 6.056865022243781e-06, "loss": 0.0061, "step": 4353 }, { "epoch": 256.11764705882354, "grad_norm": 0.9300592541694641, "learning_rate": 6.052488501479903e-06, "loss": 0.008, "step": 4354 }, { "epoch": 256.1764705882353, "grad_norm": 0.5222323536872864, "learning_rate": 6.048112876208305e-06, "loss": 0.0082, "step": 4355 }, { "epoch": 256.2352941176471, "grad_norm": 0.3051545023918152, "learning_rate": 6.0437381474215995e-06, "loss": 0.0049, "step": 4356 }, { "epoch": 256.29411764705884, "grad_norm": 0.3401190936565399, "learning_rate": 6.03936431611219e-06, "loss": 0.0036, "step": 4357 }, { "epoch": 256.3529411764706, "grad_norm": 0.24749906361103058, "learning_rate": 6.034991383272284e-06, "loss": 0.0026, "step": 4358 }, { "epoch": 256.4117647058824, "grad_norm": 0.42921456694602966, "learning_rate": 6.030619349893875e-06, "loss": 0.0063, "step": 4359 }, { "epoch": 256.47058823529414, "grad_norm": 1.1463608741760254, "learning_rate": 6.026248216968763e-06, "loss": 0.009, "step": 4360 }, { "epoch": 256.52941176470586, "grad_norm": 0.33759501576423645, "learning_rate": 6.021877985488536e-06, "loss": 0.0039, "step": 4361 }, { "epoch": 256.5882352941176, "grad_norm": 0.37190380692481995, "learning_rate": 6.017508656444579e-06, "loss": 0.0063, "step": 4362 }, { "epoch": 256.6470588235294, "grad_norm": 0.6104803681373596, "learning_rate": 6.013140230828079e-06, "loss": 0.0044, "step": 4363 }, { "epoch": 256.70588235294116, "grad_norm": 0.3255051374435425, "learning_rate": 6.0087727096300044e-06, "loss": 0.0054, "step": 4364 }, { "epoch": 256.7647058823529, "grad_norm": 0.5295298099517822, "learning_rate": 6.004406093841134e-06, "loss": 0.0066, "step": 4365 }, { "epoch": 256.8235294117647, "grad_norm": 0.27600061893463135, "learning_rate": 6.0000403844520286e-06, "loss": 0.0066, "step": 4366 }, { "epoch": 256.88235294117646, "grad_norm": 0.38555237650871277, "learning_rate": 5.995675582453053e-06, "loss": 0.0059, "step": 4367 }, { "epoch": 256.94117647058823, "grad_norm": 0.7094131708145142, "learning_rate": 5.991311688834358e-06, "loss": 0.0072, "step": 4368 }, { "epoch": 257.0, "grad_norm": 0.463577538728714, "learning_rate": 5.986948704585895e-06, "loss": 0.0054, "step": 4369 }, { "epoch": 257.05882352941177, "grad_norm": 0.634361743927002, "learning_rate": 5.982586630697404e-06, "loss": 0.0106, "step": 4370 }, { "epoch": 257.11764705882354, "grad_norm": 0.19098946452140808, "learning_rate": 5.9782254681584225e-06, "loss": 0.0031, "step": 4371 }, { "epoch": 257.1764705882353, "grad_norm": 0.6173948645591736, "learning_rate": 5.973865217958277e-06, "loss": 0.0104, "step": 4372 }, { "epoch": 257.2352941176471, "grad_norm": 0.2382482886314392, "learning_rate": 5.969505881086093e-06, "loss": 0.0038, "step": 4373 }, { "epoch": 257.29411764705884, "grad_norm": 0.36104637384414673, "learning_rate": 5.965147458530782e-06, "loss": 0.0035, "step": 4374 }, { "epoch": 257.3529411764706, "grad_norm": 0.7801926732063293, "learning_rate": 5.960789951281052e-06, "loss": 0.0052, "step": 4375 }, { "epoch": 257.4117647058824, "grad_norm": 0.7168689370155334, "learning_rate": 5.956433360325406e-06, "loss": 0.0048, "step": 4376 }, { "epoch": 257.47058823529414, "grad_norm": 0.40405866503715515, "learning_rate": 5.9520776866521306e-06, "loss": 0.009, "step": 4377 }, { "epoch": 257.52941176470586, "grad_norm": 0.5177736878395081, "learning_rate": 5.947722931249313e-06, "loss": 0.0059, "step": 4378 }, { "epoch": 257.5882352941176, "grad_norm": 0.9039040207862854, "learning_rate": 5.943369095104827e-06, "loss": 0.0055, "step": 4379 }, { "epoch": 257.6470588235294, "grad_norm": 0.24318605661392212, "learning_rate": 5.939016179206343e-06, "loss": 0.0045, "step": 4380 }, { "epoch": 257.70588235294116, "grad_norm": 0.14386869966983795, "learning_rate": 5.934664184541315e-06, "loss": 0.0026, "step": 4381 }, { "epoch": 257.7647058823529, "grad_norm": 0.5175736546516418, "learning_rate": 5.9303131120969945e-06, "loss": 0.0077, "step": 4382 }, { "epoch": 257.8235294117647, "grad_norm": 0.24800562858581543, "learning_rate": 5.925962962860421e-06, "loss": 0.0029, "step": 4383 }, { "epoch": 257.88235294117646, "grad_norm": 0.34555765986442566, "learning_rate": 5.921613737818426e-06, "loss": 0.004, "step": 4384 }, { "epoch": 257.94117647058823, "grad_norm": 0.3340289294719696, "learning_rate": 5.917265437957627e-06, "loss": 0.0045, "step": 4385 }, { "epoch": 258.0, "grad_norm": 0.6358234882354736, "learning_rate": 5.912918064264441e-06, "loss": 0.015, "step": 4386 }, { "epoch": 258.05882352941177, "grad_norm": 0.35976436734199524, "learning_rate": 5.908571617725064e-06, "loss": 0.0035, "step": 4387 }, { "epoch": 258.11764705882354, "grad_norm": 0.39357081055641174, "learning_rate": 5.904226099325488e-06, "loss": 0.0035, "step": 4388 }, { "epoch": 258.1764705882353, "grad_norm": 0.4761029779911041, "learning_rate": 5.899881510051493e-06, "loss": 0.0064, "step": 4389 }, { "epoch": 258.2352941176471, "grad_norm": 0.6569572687149048, "learning_rate": 5.895537850888646e-06, "loss": 0.0098, "step": 4390 }, { "epoch": 258.29411764705884, "grad_norm": 0.17267629504203796, "learning_rate": 5.891195122822312e-06, "loss": 0.003, "step": 4391 }, { "epoch": 258.3529411764706, "grad_norm": 0.5622636079788208, "learning_rate": 5.8868533268376315e-06, "loss": 0.0075, "step": 4392 }, { "epoch": 258.4117647058824, "grad_norm": 0.2693101465702057, "learning_rate": 5.882512463919543e-06, "loss": 0.0043, "step": 4393 }, { "epoch": 258.47058823529414, "grad_norm": 0.5184099078178406, "learning_rate": 5.87817253505277e-06, "loss": 0.0078, "step": 4394 }, { "epoch": 258.52941176470586, "grad_norm": 0.2102564424276352, "learning_rate": 5.873833541221825e-06, "loss": 0.0026, "step": 4395 }, { "epoch": 258.5882352941176, "grad_norm": 0.24224871397018433, "learning_rate": 5.869495483411006e-06, "loss": 0.0037, "step": 4396 }, { "epoch": 258.6470588235294, "grad_norm": 0.3619987368583679, "learning_rate": 5.865158362604405e-06, "loss": 0.0068, "step": 4397 }, { "epoch": 258.70588235294116, "grad_norm": 0.5928031206130981, "learning_rate": 5.860822179785892e-06, "loss": 0.0069, "step": 4398 }, { "epoch": 258.7647058823529, "grad_norm": 0.8991755843162537, "learning_rate": 5.856486935939133e-06, "loss": 0.0063, "step": 4399 }, { "epoch": 258.8235294117647, "grad_norm": 0.5298477411270142, "learning_rate": 5.852152632047577e-06, "loss": 0.009, "step": 4400 }, { "epoch": 258.88235294117646, "grad_norm": 0.35081300139427185, "learning_rate": 5.847819269094456e-06, "loss": 0.0039, "step": 4401 }, { "epoch": 258.94117647058823, "grad_norm": 0.4538642168045044, "learning_rate": 5.843486848062798e-06, "loss": 0.0063, "step": 4402 }, { "epoch": 259.0, "grad_norm": 0.2918822765350342, "learning_rate": 5.839155369935407e-06, "loss": 0.0035, "step": 4403 }, { "epoch": 259.05882352941177, "grad_norm": 0.35316166281700134, "learning_rate": 5.834824835694881e-06, "loss": 0.0082, "step": 4404 }, { "epoch": 259.11764705882354, "grad_norm": 0.28736916184425354, "learning_rate": 5.830495246323604e-06, "loss": 0.0042, "step": 4405 }, { "epoch": 259.1764705882353, "grad_norm": 1.0570071935653687, "learning_rate": 5.826166602803736e-06, "loss": 0.0084, "step": 4406 }, { "epoch": 259.2352941176471, "grad_norm": 0.23449021577835083, "learning_rate": 5.821838906117232e-06, "loss": 0.0041, "step": 4407 }, { "epoch": 259.29411764705884, "grad_norm": 0.6125452518463135, "learning_rate": 5.817512157245828e-06, "loss": 0.0092, "step": 4408 }, { "epoch": 259.3529411764706, "grad_norm": 0.393696665763855, "learning_rate": 5.813186357171053e-06, "loss": 0.0068, "step": 4409 }, { "epoch": 259.4117647058824, "grad_norm": 0.3889011740684509, "learning_rate": 5.8088615068742035e-06, "loss": 0.0048, "step": 4410 }, { "epoch": 259.47058823529414, "grad_norm": 0.25573617219924927, "learning_rate": 5.804537607336378e-06, "loss": 0.004, "step": 4411 }, { "epoch": 259.52941176470586, "grad_norm": 0.29902660846710205, "learning_rate": 5.800214659538448e-06, "loss": 0.0035, "step": 4412 }, { "epoch": 259.5882352941176, "grad_norm": 0.28076761960983276, "learning_rate": 5.795892664461081e-06, "loss": 0.0042, "step": 4413 }, { "epoch": 259.6470588235294, "grad_norm": 0.3371311128139496, "learning_rate": 5.791571623084712e-06, "loss": 0.0035, "step": 4414 }, { "epoch": 259.70588235294116, "grad_norm": 0.9384758472442627, "learning_rate": 5.787251536389571e-06, "loss": 0.0044, "step": 4415 }, { "epoch": 259.7647058823529, "grad_norm": 0.21170096099376678, "learning_rate": 5.782932405355675e-06, "loss": 0.0032, "step": 4416 }, { "epoch": 259.8235294117647, "grad_norm": 0.32190555334091187, "learning_rate": 5.778614230962809e-06, "loss": 0.0058, "step": 4417 }, { "epoch": 259.88235294117646, "grad_norm": 0.8581772446632385, "learning_rate": 5.774297014190553e-06, "loss": 0.0064, "step": 4418 }, { "epoch": 259.94117647058823, "grad_norm": 0.2977462112903595, "learning_rate": 5.7699807560182695e-06, "loss": 0.0069, "step": 4419 }, { "epoch": 260.0, "grad_norm": 0.22902455925941467, "learning_rate": 5.765665457425102e-06, "loss": 0.0028, "step": 4420 }, { "epoch": 260.05882352941177, "grad_norm": 0.31789132952690125, "learning_rate": 5.7613511193899696e-06, "loss": 0.0042, "step": 4421 }, { "epoch": 260.11764705882354, "grad_norm": 0.3773115575313568, "learning_rate": 5.757037742891583e-06, "loss": 0.006, "step": 4422 }, { "epoch": 260.1764705882353, "grad_norm": 0.4546978771686554, "learning_rate": 5.752725328908428e-06, "loss": 0.0057, "step": 4423 }, { "epoch": 260.2352941176471, "grad_norm": 0.27399298548698425, "learning_rate": 5.748413878418781e-06, "loss": 0.0041, "step": 4424 }, { "epoch": 260.29411764705884, "grad_norm": 0.7138862609863281, "learning_rate": 5.744103392400688e-06, "loss": 0.0057, "step": 4425 }, { "epoch": 260.3529411764706, "grad_norm": 0.5646626353263855, "learning_rate": 5.739793871831982e-06, "loss": 0.0071, "step": 4426 }, { "epoch": 260.4117647058824, "grad_norm": 0.7414838671684265, "learning_rate": 5.735485317690284e-06, "loss": 0.0084, "step": 4427 }, { "epoch": 260.47058823529414, "grad_norm": 0.3256939649581909, "learning_rate": 5.73117773095298e-06, "loss": 0.0038, "step": 4428 }, { "epoch": 260.52941176470586, "grad_norm": 0.5195929408073425, "learning_rate": 5.726871112597248e-06, "loss": 0.007, "step": 4429 }, { "epoch": 260.5882352941176, "grad_norm": 0.5666784048080444, "learning_rate": 5.722565463600043e-06, "loss": 0.0044, "step": 4430 }, { "epoch": 260.6470588235294, "grad_norm": 0.528801679611206, "learning_rate": 5.718260784938103e-06, "loss": 0.0056, "step": 4431 }, { "epoch": 260.70588235294116, "grad_norm": 0.34043243527412415, "learning_rate": 5.713957077587944e-06, "loss": 0.0064, "step": 4432 }, { "epoch": 260.7647058823529, "grad_norm": 0.47609469294548035, "learning_rate": 5.709654342525859e-06, "loss": 0.0075, "step": 4433 }, { "epoch": 260.8235294117647, "grad_norm": 0.47676557302474976, "learning_rate": 5.7053525807279265e-06, "loss": 0.005, "step": 4434 }, { "epoch": 260.88235294117646, "grad_norm": 0.34879955649375916, "learning_rate": 5.701051793169994e-06, "loss": 0.0044, "step": 4435 }, { "epoch": 260.94117647058823, "grad_norm": 0.4015003442764282, "learning_rate": 5.6967519808276995e-06, "loss": 0.0046, "step": 4436 }, { "epoch": 261.0, "grad_norm": 0.3525693714618683, "learning_rate": 5.692453144676451e-06, "loss": 0.005, "step": 4437 }, { "epoch": 261.05882352941177, "grad_norm": 0.6139421463012695, "learning_rate": 5.688155285691446e-06, "loss": 0.0076, "step": 4438 }, { "epoch": 261.11764705882354, "grad_norm": 1.8955106735229492, "learning_rate": 5.683858404847644e-06, "loss": 0.0111, "step": 4439 }, { "epoch": 261.1764705882353, "grad_norm": 0.27306368947029114, "learning_rate": 5.679562503119796e-06, "loss": 0.0032, "step": 4440 }, { "epoch": 261.2352941176471, "grad_norm": 0.416256844997406, "learning_rate": 5.675267581482424e-06, "loss": 0.0042, "step": 4441 }, { "epoch": 261.29411764705884, "grad_norm": 0.30785706639289856, "learning_rate": 5.670973640909838e-06, "loss": 0.0049, "step": 4442 }, { "epoch": 261.3529411764706, "grad_norm": 0.479655921459198, "learning_rate": 5.666680682376107e-06, "loss": 0.0104, "step": 4443 }, { "epoch": 261.4117647058824, "grad_norm": 0.3466658592224121, "learning_rate": 5.662388706855093e-06, "loss": 0.0034, "step": 4444 }, { "epoch": 261.47058823529414, "grad_norm": 0.27219972014427185, "learning_rate": 5.658097715320434e-06, "loss": 0.003, "step": 4445 }, { "epoch": 261.52941176470586, "grad_norm": 0.3554270267486572, "learning_rate": 5.6538077087455326e-06, "loss": 0.0061, "step": 4446 }, { "epoch": 261.5882352941176, "grad_norm": 0.3298187553882599, "learning_rate": 5.649518688103579e-06, "loss": 0.0069, "step": 4447 }, { "epoch": 261.6470588235294, "grad_norm": 0.2759096026420593, "learning_rate": 5.645230654367537e-06, "loss": 0.0034, "step": 4448 }, { "epoch": 261.70588235294116, "grad_norm": 0.7598434686660767, "learning_rate": 5.640943608510152e-06, "loss": 0.0068, "step": 4449 }, { "epoch": 261.7647058823529, "grad_norm": 0.39463990926742554, "learning_rate": 5.63665755150393e-06, "loss": 0.0073, "step": 4450 }, { "epoch": 261.8235294117647, "grad_norm": 0.33141088485717773, "learning_rate": 5.632372484321167e-06, "loss": 0.0059, "step": 4451 }, { "epoch": 261.88235294117646, "grad_norm": 0.3673899173736572, "learning_rate": 5.6280884079339296e-06, "loss": 0.004, "step": 4452 }, { "epoch": 261.94117647058823, "grad_norm": 0.25675323605537415, "learning_rate": 5.623805323314062e-06, "loss": 0.0043, "step": 4453 }, { "epoch": 262.0, "grad_norm": 0.26019588112831116, "learning_rate": 5.619523231433177e-06, "loss": 0.0036, "step": 4454 }, { "epoch": 262.05882352941177, "grad_norm": 0.9034149646759033, "learning_rate": 5.615242133262667e-06, "loss": 0.0066, "step": 4455 }, { "epoch": 262.11764705882354, "grad_norm": 0.39277738332748413, "learning_rate": 5.610962029773703e-06, "loss": 0.0039, "step": 4456 }, { "epoch": 262.1764705882353, "grad_norm": 0.3760068416595459, "learning_rate": 5.6066829219372185e-06, "loss": 0.006, "step": 4457 }, { "epoch": 262.2352941176471, "grad_norm": 0.4006992280483246, "learning_rate": 5.602404810723934e-06, "loss": 0.0058, "step": 4458 }, { "epoch": 262.29411764705884, "grad_norm": 0.41155919432640076, "learning_rate": 5.598127697104334e-06, "loss": 0.0067, "step": 4459 }, { "epoch": 262.3529411764706, "grad_norm": 1.1323789358139038, "learning_rate": 5.593851582048688e-06, "loss": 0.0054, "step": 4460 }, { "epoch": 262.4117647058824, "grad_norm": 0.21458269655704498, "learning_rate": 5.589576466527024e-06, "loss": 0.0031, "step": 4461 }, { "epoch": 262.47058823529414, "grad_norm": 0.12995387613773346, "learning_rate": 5.5853023515091535e-06, "loss": 0.0024, "step": 4462 }, { "epoch": 262.52941176470586, "grad_norm": 0.23758606612682343, "learning_rate": 5.581029237964659e-06, "loss": 0.003, "step": 4463 }, { "epoch": 262.5882352941176, "grad_norm": 0.2976158559322357, "learning_rate": 5.5767571268629015e-06, "loss": 0.0062, "step": 4464 }, { "epoch": 262.6470588235294, "grad_norm": 0.42904189229011536, "learning_rate": 5.572486019173e-06, "loss": 0.006, "step": 4465 }, { "epoch": 262.70588235294116, "grad_norm": 0.3456061780452728, "learning_rate": 5.568215915863857e-06, "loss": 0.0042, "step": 4466 }, { "epoch": 262.7647058823529, "grad_norm": 0.38486742973327637, "learning_rate": 5.563946817904144e-06, "loss": 0.0047, "step": 4467 }, { "epoch": 262.8235294117647, "grad_norm": 2.4614098072052, "learning_rate": 5.559678726262313e-06, "loss": 0.0071, "step": 4468 }, { "epoch": 262.88235294117646, "grad_norm": 0.6276001930236816, "learning_rate": 5.555411641906569e-06, "loss": 0.007, "step": 4469 }, { "epoch": 262.94117647058823, "grad_norm": 0.31037813425064087, "learning_rate": 5.551145565804903e-06, "loss": 0.0058, "step": 4470 }, { "epoch": 263.0, "grad_norm": 0.3223218321800232, "learning_rate": 5.546880498925079e-06, "loss": 0.0056, "step": 4471 }, { "epoch": 263.05882352941177, "grad_norm": 0.5526258945465088, "learning_rate": 5.542616442234618e-06, "loss": 0.0049, "step": 4472 }, { "epoch": 263.11764705882354, "grad_norm": 0.3682248592376709, "learning_rate": 5.538353396700825e-06, "loss": 0.0037, "step": 4473 }, { "epoch": 263.1764705882353, "grad_norm": 0.1986178606748581, "learning_rate": 5.53409136329077e-06, "loss": 0.0025, "step": 4474 }, { "epoch": 263.2352941176471, "grad_norm": 0.47609660029411316, "learning_rate": 5.5298303429713004e-06, "loss": 0.0055, "step": 4475 }, { "epoch": 263.29411764705884, "grad_norm": 0.4071826636791229, "learning_rate": 5.5255703367090175e-06, "loss": 0.0062, "step": 4476 }, { "epoch": 263.3529411764706, "grad_norm": 0.545366644859314, "learning_rate": 5.521311345470308e-06, "loss": 0.007, "step": 4477 }, { "epoch": 263.4117647058824, "grad_norm": 0.2247590869665146, "learning_rate": 5.517053370221326e-06, "loss": 0.0036, "step": 4478 }, { "epoch": 263.47058823529414, "grad_norm": 0.6414405703544617, "learning_rate": 5.512796411927992e-06, "loss": 0.004, "step": 4479 }, { "epoch": 263.52941176470586, "grad_norm": 0.5729988217353821, "learning_rate": 5.50854047155599e-06, "loss": 0.0103, "step": 4480 }, { "epoch": 263.5882352941176, "grad_norm": 0.5008998513221741, "learning_rate": 5.5042855500707834e-06, "loss": 0.0094, "step": 4481 }, { "epoch": 263.6470588235294, "grad_norm": 0.31048911809921265, "learning_rate": 5.500031648437605e-06, "loss": 0.0046, "step": 4482 }, { "epoch": 263.70588235294116, "grad_norm": 0.5946264266967773, "learning_rate": 5.495778767621442e-06, "loss": 0.0071, "step": 4483 }, { "epoch": 263.7647058823529, "grad_norm": 2.3337502479553223, "learning_rate": 5.491526908587067e-06, "loss": 0.0103, "step": 4484 }, { "epoch": 263.8235294117647, "grad_norm": 0.16355963051319122, "learning_rate": 5.487276072299011e-06, "loss": 0.0025, "step": 4485 }, { "epoch": 263.88235294117646, "grad_norm": 0.689278244972229, "learning_rate": 5.4830262597215775e-06, "loss": 0.0046, "step": 4486 }, { "epoch": 263.94117647058823, "grad_norm": 0.3174322545528412, "learning_rate": 5.478777471818831e-06, "loss": 0.0037, "step": 4487 }, { "epoch": 264.0, "grad_norm": 0.46370261907577515, "learning_rate": 5.4745297095546125e-06, "loss": 0.0063, "step": 4488 }, { "epoch": 264.05882352941177, "grad_norm": 0.3506958484649658, "learning_rate": 5.470282973892522e-06, "loss": 0.0057, "step": 4489 }, { "epoch": 264.11764705882354, "grad_norm": 0.83467036485672, "learning_rate": 5.466037265795941e-06, "loss": 0.0102, "step": 4490 }, { "epoch": 264.1764705882353, "grad_norm": 0.7283598184585571, "learning_rate": 5.461792586227994e-06, "loss": 0.008, "step": 4491 }, { "epoch": 264.2352941176471, "grad_norm": 0.544580340385437, "learning_rate": 5.457548936151591e-06, "loss": 0.0046, "step": 4492 }, { "epoch": 264.29411764705884, "grad_norm": 0.5962522625923157, "learning_rate": 5.453306316529406e-06, "loss": 0.0071, "step": 4493 }, { "epoch": 264.3529411764706, "grad_norm": 0.4140423834323883, "learning_rate": 5.449064728323878e-06, "loss": 0.005, "step": 4494 }, { "epoch": 264.4117647058824, "grad_norm": 0.3966028094291687, "learning_rate": 5.444824172497204e-06, "loss": 0.0035, "step": 4495 }, { "epoch": 264.47058823529414, "grad_norm": 0.45744720101356506, "learning_rate": 5.440584650011355e-06, "loss": 0.0076, "step": 4496 }, { "epoch": 264.52941176470586, "grad_norm": 0.3669578433036804, "learning_rate": 5.436346161828075e-06, "loss": 0.0064, "step": 4497 }, { "epoch": 264.5882352941176, "grad_norm": 0.4741209149360657, "learning_rate": 5.43210870890885e-06, "loss": 0.0063, "step": 4498 }, { "epoch": 264.6470588235294, "grad_norm": 0.31673601269721985, "learning_rate": 5.427872292214953e-06, "loss": 0.0054, "step": 4499 }, { "epoch": 264.70588235294116, "grad_norm": 0.6144001483917236, "learning_rate": 5.423636912707413e-06, "loss": 0.0072, "step": 4500 }, { "epoch": 264.7647058823529, "grad_norm": 0.6427178978919983, "learning_rate": 5.41940257134703e-06, "loss": 0.0093, "step": 4501 }, { "epoch": 264.8235294117647, "grad_norm": 0.6333410739898682, "learning_rate": 5.415169269094356e-06, "loss": 0.0055, "step": 4502 }, { "epoch": 264.88235294117646, "grad_norm": 0.3083222210407257, "learning_rate": 5.4109370069097175e-06, "loss": 0.0046, "step": 4503 }, { "epoch": 264.94117647058823, "grad_norm": 0.7387703061103821, "learning_rate": 5.406705785753203e-06, "loss": 0.0032, "step": 4504 }, { "epoch": 265.0, "grad_norm": 0.4496006965637207, "learning_rate": 5.40247560658467e-06, "loss": 0.0053, "step": 4505 }, { "epoch": 265.05882352941177, "grad_norm": 0.6684120297431946, "learning_rate": 5.3982464703637215e-06, "loss": 0.0089, "step": 4506 }, { "epoch": 265.11764705882354, "grad_norm": 0.17878931760787964, "learning_rate": 5.3940183780497445e-06, "loss": 0.0024, "step": 4507 }, { "epoch": 265.1764705882353, "grad_norm": 0.3630753457546234, "learning_rate": 5.389791330601883e-06, "loss": 0.0052, "step": 4508 }, { "epoch": 265.2352941176471, "grad_norm": 0.5198594927787781, "learning_rate": 5.385565328979036e-06, "loss": 0.0044, "step": 4509 }, { "epoch": 265.29411764705884, "grad_norm": 0.24291642010211945, "learning_rate": 5.381340374139872e-06, "loss": 0.004, "step": 4510 }, { "epoch": 265.3529411764706, "grad_norm": 0.3306271433830261, "learning_rate": 5.3771164670428246e-06, "loss": 0.0043, "step": 4511 }, { "epoch": 265.4117647058824, "grad_norm": 0.3018468916416168, "learning_rate": 5.372893608646088e-06, "loss": 0.0041, "step": 4512 }, { "epoch": 265.47058823529414, "grad_norm": 0.26905694603919983, "learning_rate": 5.368671799907609e-06, "loss": 0.0033, "step": 4513 }, { "epoch": 265.52941176470586, "grad_norm": 0.45933809876441956, "learning_rate": 5.3644510417851096e-06, "loss": 0.0099, "step": 4514 }, { "epoch": 265.5882352941176, "grad_norm": 0.3772902190685272, "learning_rate": 5.360231335236067e-06, "loss": 0.0055, "step": 4515 }, { "epoch": 265.6470588235294, "grad_norm": 0.3712596297264099, "learning_rate": 5.3560126812177245e-06, "loss": 0.0037, "step": 4516 }, { "epoch": 265.70588235294116, "grad_norm": 0.19198302924633026, "learning_rate": 5.351795080687077e-06, "loss": 0.0034, "step": 4517 }, { "epoch": 265.7647058823529, "grad_norm": 0.6033285856246948, "learning_rate": 5.347578534600888e-06, "loss": 0.0114, "step": 4518 }, { "epoch": 265.8235294117647, "grad_norm": 0.39357998967170715, "learning_rate": 5.343363043915681e-06, "loss": 0.0078, "step": 4519 }, { "epoch": 265.88235294117646, "grad_norm": 0.6867234706878662, "learning_rate": 5.339148609587744e-06, "loss": 0.0048, "step": 4520 }, { "epoch": 265.94117647058823, "grad_norm": 0.4380769729614258, "learning_rate": 5.334935232573113e-06, "loss": 0.0056, "step": 4521 }, { "epoch": 266.0, "grad_norm": 0.36411187052726746, "learning_rate": 5.330722913827594e-06, "loss": 0.0038, "step": 4522 }, { "epoch": 266.05882352941177, "grad_norm": 0.6605702042579651, "learning_rate": 5.326511654306756e-06, "loss": 0.0088, "step": 4523 }, { "epoch": 266.11764705882354, "grad_norm": 0.1752685159444809, "learning_rate": 5.322301454965914e-06, "loss": 0.0026, "step": 4524 }, { "epoch": 266.1764705882353, "grad_norm": 0.6653823256492615, "learning_rate": 5.3180923167601575e-06, "loss": 0.01, "step": 4525 }, { "epoch": 266.2352941176471, "grad_norm": 0.4699031114578247, "learning_rate": 5.313884240644325e-06, "loss": 0.003, "step": 4526 }, { "epoch": 266.29411764705884, "grad_norm": 0.16922946274280548, "learning_rate": 5.309677227573023e-06, "loss": 0.0038, "step": 4527 }, { "epoch": 266.3529411764706, "grad_norm": 0.21806614100933075, "learning_rate": 5.305471278500605e-06, "loss": 0.0034, "step": 4528 }, { "epoch": 266.4117647058824, "grad_norm": 0.26509177684783936, "learning_rate": 5.3012663943811905e-06, "loss": 0.0051, "step": 4529 }, { "epoch": 266.47058823529414, "grad_norm": 0.32368749380111694, "learning_rate": 5.2970625761686615e-06, "loss": 0.0039, "step": 4530 }, { "epoch": 266.52941176470586, "grad_norm": 0.2562859058380127, "learning_rate": 5.292859824816654e-06, "loss": 0.0047, "step": 4531 }, { "epoch": 266.5882352941176, "grad_norm": 0.4690310060977936, "learning_rate": 5.288658141278556e-06, "loss": 0.0076, "step": 4532 }, { "epoch": 266.6470588235294, "grad_norm": 2.0785820484161377, "learning_rate": 5.284457526507518e-06, "loss": 0.0144, "step": 4533 }, { "epoch": 266.70588235294116, "grad_norm": 0.891944169998169, "learning_rate": 5.280257981456453e-06, "loss": 0.0106, "step": 4534 }, { "epoch": 266.7647058823529, "grad_norm": 0.19161804020404816, "learning_rate": 5.27605950707803e-06, "loss": 0.0027, "step": 4535 }, { "epoch": 266.8235294117647, "grad_norm": 0.28121864795684814, "learning_rate": 5.271862104324665e-06, "loss": 0.0035, "step": 4536 }, { "epoch": 266.88235294117646, "grad_norm": 0.3477640748023987, "learning_rate": 5.26766577414854e-06, "loss": 0.0044, "step": 4537 }, { "epoch": 266.94117647058823, "grad_norm": 0.41730138659477234, "learning_rate": 5.263470517501596e-06, "loss": 0.0071, "step": 4538 }, { "epoch": 267.0, "grad_norm": 0.3801240622997284, "learning_rate": 5.259276335335522e-06, "loss": 0.0077, "step": 4539 }, { "epoch": 267.05882352941177, "grad_norm": 0.6724411845207214, "learning_rate": 5.255083228601765e-06, "loss": 0.0092, "step": 4540 }, { "epoch": 267.11764705882354, "grad_norm": 0.378040075302124, "learning_rate": 5.250891198251535e-06, "loss": 0.0071, "step": 4541 }, { "epoch": 267.1764705882353, "grad_norm": 0.5976911187171936, "learning_rate": 5.246700245235797e-06, "loss": 0.0037, "step": 4542 }, { "epoch": 267.2352941176471, "grad_norm": 0.5347166061401367, "learning_rate": 5.2425103705052606e-06, "loss": 0.0036, "step": 4543 }, { "epoch": 267.29411764705884, "grad_norm": 0.47438156604766846, "learning_rate": 5.2383215750103985e-06, "loss": 0.0053, "step": 4544 }, { "epoch": 267.3529411764706, "grad_norm": 0.4163222312927246, "learning_rate": 5.234133859701441e-06, "loss": 0.0077, "step": 4545 }, { "epoch": 267.4117647058824, "grad_norm": 0.1849965900182724, "learning_rate": 5.229947225528374e-06, "loss": 0.0029, "step": 4546 }, { "epoch": 267.47058823529414, "grad_norm": 0.44824936985969543, "learning_rate": 5.2257616734409274e-06, "loss": 0.0079, "step": 4547 }, { "epoch": 267.52941176470586, "grad_norm": 0.28922539949417114, "learning_rate": 5.221577204388597e-06, "loss": 0.0051, "step": 4548 }, { "epoch": 267.5882352941176, "grad_norm": 0.35723453760147095, "learning_rate": 5.217393819320631e-06, "loss": 0.0058, "step": 4549 }, { "epoch": 267.6470588235294, "grad_norm": 0.4069269299507141, "learning_rate": 5.213211519186023e-06, "loss": 0.0037, "step": 4550 }, { "epoch": 267.70588235294116, "grad_norm": 0.47279810905456543, "learning_rate": 5.209030304933531e-06, "loss": 0.007, "step": 4551 }, { "epoch": 267.7647058823529, "grad_norm": 0.32035142183303833, "learning_rate": 5.204850177511662e-06, "loss": 0.0044, "step": 4552 }, { "epoch": 267.8235294117647, "grad_norm": 0.48364463448524475, "learning_rate": 5.200671137868682e-06, "loss": 0.0042, "step": 4553 }, { "epoch": 267.88235294117646, "grad_norm": 0.3519763648509979, "learning_rate": 5.1964931869525984e-06, "loss": 0.0039, "step": 4554 }, { "epoch": 267.94117647058823, "grad_norm": 0.5258862376213074, "learning_rate": 5.192316325711181e-06, "loss": 0.0034, "step": 4555 }, { "epoch": 268.0, "grad_norm": 0.3467824161052704, "learning_rate": 5.18814055509195e-06, "loss": 0.0057, "step": 4556 }, { "epoch": 268.05882352941177, "grad_norm": 0.33102524280548096, "learning_rate": 5.1839658760421815e-06, "loss": 0.0033, "step": 4557 }, { "epoch": 268.11764705882354, "grad_norm": 0.9684524536132812, "learning_rate": 5.179792289508895e-06, "loss": 0.0063, "step": 4558 }, { "epoch": 268.1764705882353, "grad_norm": 0.3275294005870819, "learning_rate": 5.17561979643887e-06, "loss": 0.007, "step": 4559 }, { "epoch": 268.2352941176471, "grad_norm": 0.5015406012535095, "learning_rate": 5.171448397778637e-06, "loss": 0.0049, "step": 4560 }, { "epoch": 268.29411764705884, "grad_norm": 0.927415668964386, "learning_rate": 5.167278094474479e-06, "loss": 0.01, "step": 4561 }, { "epoch": 268.3529411764706, "grad_norm": 0.4056146740913391, "learning_rate": 5.1631088874724245e-06, "loss": 0.0065, "step": 4562 }, { "epoch": 268.4117647058824, "grad_norm": 0.37053418159484863, "learning_rate": 5.1589407777182575e-06, "loss": 0.0064, "step": 4563 }, { "epoch": 268.47058823529414, "grad_norm": 0.3233446478843689, "learning_rate": 5.1547737661575194e-06, "loss": 0.0057, "step": 4564 }, { "epoch": 268.52941176470586, "grad_norm": 0.19883206486701965, "learning_rate": 5.150607853735485e-06, "loss": 0.0027, "step": 4565 }, { "epoch": 268.5882352941176, "grad_norm": 0.33206549286842346, "learning_rate": 5.146443041397198e-06, "loss": 0.0045, "step": 4566 }, { "epoch": 268.6470588235294, "grad_norm": 0.2667998671531677, "learning_rate": 5.142279330087445e-06, "loss": 0.0035, "step": 4567 }, { "epoch": 268.70588235294116, "grad_norm": 0.31451907753944397, "learning_rate": 5.138116720750766e-06, "loss": 0.0039, "step": 4568 }, { "epoch": 268.7647058823529, "grad_norm": 0.25118276476860046, "learning_rate": 5.133955214331439e-06, "loss": 0.0025, "step": 4569 }, { "epoch": 268.8235294117647, "grad_norm": 1.5809189081192017, "learning_rate": 5.1297948117735074e-06, "loss": 0.0086, "step": 4570 }, { "epoch": 268.88235294117646, "grad_norm": 1.151594877243042, "learning_rate": 5.125635514020757e-06, "loss": 0.006, "step": 4571 }, { "epoch": 268.94117647058823, "grad_norm": 0.5397429466247559, "learning_rate": 5.121477322016728e-06, "loss": 0.0078, "step": 4572 }, { "epoch": 269.0, "grad_norm": 0.40945231914520264, "learning_rate": 5.117320236704697e-06, "loss": 0.0081, "step": 4573 }, { "epoch": 269.05882352941177, "grad_norm": 0.2735345959663391, "learning_rate": 5.113164259027702e-06, "loss": 0.0044, "step": 4574 }, { "epoch": 269.11764705882354, "grad_norm": 0.7755652666091919, "learning_rate": 5.10900938992853e-06, "loss": 0.0053, "step": 4575 }, { "epoch": 269.1764705882353, "grad_norm": 0.4428684413433075, "learning_rate": 5.1048556303497046e-06, "loss": 0.0048, "step": 4576 }, { "epoch": 269.2352941176471, "grad_norm": 1.0519956350326538, "learning_rate": 5.100702981233508e-06, "loss": 0.0063, "step": 4577 }, { "epoch": 269.29411764705884, "grad_norm": 0.9271718859672546, "learning_rate": 5.096551443521971e-06, "loss": 0.0066, "step": 4578 }, { "epoch": 269.3529411764706, "grad_norm": 0.5285544395446777, "learning_rate": 5.0924010181568695e-06, "loss": 0.0092, "step": 4579 }, { "epoch": 269.4117647058824, "grad_norm": 0.32061049342155457, "learning_rate": 5.088251706079721e-06, "loss": 0.0035, "step": 4580 }, { "epoch": 269.47058823529414, "grad_norm": 0.17273695766925812, "learning_rate": 5.0841035082318005e-06, "loss": 0.0041, "step": 4581 }, { "epoch": 269.52941176470586, "grad_norm": 0.5429202318191528, "learning_rate": 5.079956425554124e-06, "loss": 0.0089, "step": 4582 }, { "epoch": 269.5882352941176, "grad_norm": 0.36099886894226074, "learning_rate": 5.075810458987463e-06, "loss": 0.0065, "step": 4583 }, { "epoch": 269.6470588235294, "grad_norm": 0.42555028200149536, "learning_rate": 5.0716656094723184e-06, "loss": 0.0032, "step": 4584 }, { "epoch": 269.70588235294116, "grad_norm": 0.456055223941803, "learning_rate": 5.067521877948955e-06, "loss": 0.0078, "step": 4585 }, { "epoch": 269.7647058823529, "grad_norm": 0.5528336763381958, "learning_rate": 5.063379265357378e-06, "loss": 0.0064, "step": 4586 }, { "epoch": 269.8235294117647, "grad_norm": 0.3707013428211212, "learning_rate": 5.059237772637339e-06, "loss": 0.0043, "step": 4587 }, { "epoch": 269.88235294117646, "grad_norm": 0.36621779203414917, "learning_rate": 5.055097400728328e-06, "loss": 0.0052, "step": 4588 }, { "epoch": 269.94117647058823, "grad_norm": 0.30357682704925537, "learning_rate": 5.0509581505695945e-06, "loss": 0.0036, "step": 4589 }, { "epoch": 270.0, "grad_norm": 0.2826789319515228, "learning_rate": 5.046820023100129e-06, "loss": 0.0035, "step": 4590 }, { "epoch": 270.05882352941177, "grad_norm": 0.5089850425720215, "learning_rate": 5.042683019258655e-06, "loss": 0.0069, "step": 4591 }, { "epoch": 270.11764705882354, "grad_norm": 0.25330451130867004, "learning_rate": 5.038547139983657e-06, "loss": 0.0034, "step": 4592 }, { "epoch": 270.1764705882353, "grad_norm": 0.3731345534324646, "learning_rate": 5.0344123862133586e-06, "loss": 0.0059, "step": 4593 }, { "epoch": 270.2352941176471, "grad_norm": 0.3625530004501343, "learning_rate": 5.0302787588857315e-06, "loss": 0.0052, "step": 4594 }, { "epoch": 270.29411764705884, "grad_norm": 0.455990195274353, "learning_rate": 5.02614625893848e-06, "loss": 0.007, "step": 4595 }, { "epoch": 270.3529411764706, "grad_norm": 2.216853380203247, "learning_rate": 5.022014887309067e-06, "loss": 0.0082, "step": 4596 }, { "epoch": 270.4117647058824, "grad_norm": 0.3871670365333557, "learning_rate": 5.0178846449346895e-06, "loss": 0.0053, "step": 4597 }, { "epoch": 270.47058823529414, "grad_norm": 0.32086989283561707, "learning_rate": 5.0137555327523e-06, "loss": 0.0031, "step": 4598 }, { "epoch": 270.52941176470586, "grad_norm": 0.40179571509361267, "learning_rate": 5.009627551698577e-06, "loss": 0.0067, "step": 4599 }, { "epoch": 270.5882352941176, "grad_norm": 0.35495415329933167, "learning_rate": 5.005500702709958e-06, "loss": 0.0041, "step": 4600 }, { "epoch": 270.6470588235294, "grad_norm": 0.3185846507549286, "learning_rate": 5.001374986722618e-06, "loss": 0.007, "step": 4601 }, { "epoch": 270.70588235294116, "grad_norm": 0.40272924304008484, "learning_rate": 4.997250404672471e-06, "loss": 0.0033, "step": 4602 }, { "epoch": 270.7647058823529, "grad_norm": 0.24506635963916779, "learning_rate": 4.9931269574951805e-06, "loss": 0.003, "step": 4603 }, { "epoch": 270.8235294117647, "grad_norm": 0.8838512897491455, "learning_rate": 4.98900464612615e-06, "loss": 0.0047, "step": 4604 }, { "epoch": 270.88235294117646, "grad_norm": 0.4520651400089264, "learning_rate": 4.984883471500526e-06, "loss": 0.0041, "step": 4605 }, { "epoch": 270.94117647058823, "grad_norm": 0.6656501293182373, "learning_rate": 4.980763434553194e-06, "loss": 0.0078, "step": 4606 }, { "epoch": 271.0, "grad_norm": 0.6626049876213074, "learning_rate": 4.976644536218783e-06, "loss": 0.0093, "step": 4607 }, { "epoch": 271.05882352941177, "grad_norm": 0.3105538785457611, "learning_rate": 4.972526777431667e-06, "loss": 0.0034, "step": 4608 }, { "epoch": 271.11764705882354, "grad_norm": 0.4444364905357361, "learning_rate": 4.9684101591259596e-06, "loss": 0.004, "step": 4609 }, { "epoch": 271.1764705882353, "grad_norm": 0.3695511221885681, "learning_rate": 4.964294682235511e-06, "loss": 0.0105, "step": 4610 }, { "epoch": 271.2352941176471, "grad_norm": 0.2813516855239868, "learning_rate": 4.960180347693917e-06, "loss": 0.0036, "step": 4611 }, { "epoch": 271.29411764705884, "grad_norm": 0.40827929973602295, "learning_rate": 4.956067156434517e-06, "loss": 0.0024, "step": 4612 }, { "epoch": 271.3529411764706, "grad_norm": 0.6273137331008911, "learning_rate": 4.951955109390387e-06, "loss": 0.0069, "step": 4613 }, { "epoch": 271.4117647058824, "grad_norm": 0.3182682394981384, "learning_rate": 4.947844207494341e-06, "loss": 0.0057, "step": 4614 }, { "epoch": 271.47058823529414, "grad_norm": 0.5031815767288208, "learning_rate": 4.943734451678938e-06, "loss": 0.0048, "step": 4615 }, { "epoch": 271.52941176470586, "grad_norm": 0.7128885984420776, "learning_rate": 4.939625842876479e-06, "loss": 0.0048, "step": 4616 }, { "epoch": 271.5882352941176, "grad_norm": 0.5319861769676208, "learning_rate": 4.935518382018994e-06, "loss": 0.0109, "step": 4617 }, { "epoch": 271.6470588235294, "grad_norm": 0.3140183091163635, "learning_rate": 4.9314120700382654e-06, "loss": 0.0056, "step": 4618 }, { "epoch": 271.70588235294116, "grad_norm": 0.27597296237945557, "learning_rate": 4.927306907865806e-06, "loss": 0.0038, "step": 4619 }, { "epoch": 271.7647058823529, "grad_norm": 0.3481399416923523, "learning_rate": 4.923202896432876e-06, "loss": 0.0037, "step": 4620 }, { "epoch": 271.8235294117647, "grad_norm": 0.21966181695461273, "learning_rate": 4.919100036670464e-06, "loss": 0.0036, "step": 4621 }, { "epoch": 271.88235294117646, "grad_norm": 0.36480051279067993, "learning_rate": 4.914998329509304e-06, "loss": 0.0046, "step": 4622 }, { "epoch": 271.94117647058823, "grad_norm": 0.1723824441432953, "learning_rate": 4.910897775879869e-06, "loss": 0.0029, "step": 4623 }, { "epoch": 272.0, "grad_norm": 0.20573408901691437, "learning_rate": 4.9067983767123736e-06, "loss": 0.0039, "step": 4624 }, { "epoch": 272.05882352941177, "grad_norm": 0.41588765382766724, "learning_rate": 4.902700132936756e-06, "loss": 0.0035, "step": 4625 }, { "epoch": 272.11764705882354, "grad_norm": 0.23154564201831818, "learning_rate": 4.898603045482708e-06, "loss": 0.0038, "step": 4626 }, { "epoch": 272.1764705882353, "grad_norm": 0.29864344000816345, "learning_rate": 4.894507115279652e-06, "loss": 0.0048, "step": 4627 }, { "epoch": 272.2352941176471, "grad_norm": 0.5043613910675049, "learning_rate": 4.890412343256753e-06, "loss": 0.0044, "step": 4628 }, { "epoch": 272.29411764705884, "grad_norm": 0.446936696767807, "learning_rate": 4.886318730342902e-06, "loss": 0.0068, "step": 4629 }, { "epoch": 272.3529411764706, "grad_norm": 0.5195885300636292, "learning_rate": 4.882226277466737e-06, "loss": 0.0063, "step": 4630 }, { "epoch": 272.4117647058824, "grad_norm": 0.3596270680427551, "learning_rate": 4.8781349855566354e-06, "loss": 0.0044, "step": 4631 }, { "epoch": 272.47058823529414, "grad_norm": 0.4051353633403778, "learning_rate": 4.8740448555406976e-06, "loss": 0.0061, "step": 4632 }, { "epoch": 272.52941176470586, "grad_norm": 0.4312669336795807, "learning_rate": 4.869955888346772e-06, "loss": 0.0071, "step": 4633 }, { "epoch": 272.5882352941176, "grad_norm": 0.5864137411117554, "learning_rate": 4.8658680849024434e-06, "loss": 0.0115, "step": 4634 }, { "epoch": 272.6470588235294, "grad_norm": 0.29659032821655273, "learning_rate": 4.8617814461350285e-06, "loss": 0.0035, "step": 4635 }, { "epoch": 272.70588235294116, "grad_norm": 0.5190169811248779, "learning_rate": 4.857695972971573e-06, "loss": 0.0073, "step": 4636 }, { "epoch": 272.7647058823529, "grad_norm": 0.505478024482727, "learning_rate": 4.853611666338875e-06, "loss": 0.0058, "step": 4637 }, { "epoch": 272.8235294117647, "grad_norm": 0.5742133259773254, "learning_rate": 4.849528527163459e-06, "loss": 0.007, "step": 4638 }, { "epoch": 272.88235294117646, "grad_norm": 0.15222857892513275, "learning_rate": 4.845446556371578e-06, "loss": 0.0025, "step": 4639 }, { "epoch": 272.94117647058823, "grad_norm": 0.21996641159057617, "learning_rate": 4.841365754889229e-06, "loss": 0.003, "step": 4640 }, { "epoch": 273.0, "grad_norm": 0.36922457814216614, "learning_rate": 4.837286123642141e-06, "loss": 0.0039, "step": 4641 }, { "epoch": 273.05882352941177, "grad_norm": 0.3757924437522888, "learning_rate": 4.8332076635557825e-06, "loss": 0.0054, "step": 4642 }, { "epoch": 273.11764705882354, "grad_norm": 0.4076545536518097, "learning_rate": 4.829130375555342e-06, "loss": 0.0066, "step": 4643 }, { "epoch": 273.1764705882353, "grad_norm": 0.7369382381439209, "learning_rate": 4.8250542605657565e-06, "loss": 0.0037, "step": 4644 }, { "epoch": 273.2352941176471, "grad_norm": 0.2701355814933777, "learning_rate": 4.820979319511696e-06, "loss": 0.0061, "step": 4645 }, { "epoch": 273.29411764705884, "grad_norm": 0.18937963247299194, "learning_rate": 4.816905553317553e-06, "loss": 0.0033, "step": 4646 }, { "epoch": 273.3529411764706, "grad_norm": 0.3299879729747772, "learning_rate": 4.8128329629074635e-06, "loss": 0.0036, "step": 4647 }, { "epoch": 273.4117647058824, "grad_norm": 0.48650312423706055, "learning_rate": 4.808761549205294e-06, "loss": 0.0072, "step": 4648 }, { "epoch": 273.47058823529414, "grad_norm": 0.35799136757850647, "learning_rate": 4.804691313134647e-06, "loss": 0.0038, "step": 4649 }, { "epoch": 273.52941176470586, "grad_norm": 0.22085294127464294, "learning_rate": 4.80062225561885e-06, "loss": 0.0044, "step": 4650 }, { "epoch": 273.5882352941176, "grad_norm": 0.43981143832206726, "learning_rate": 4.796554377580969e-06, "loss": 0.0056, "step": 4651 }, { "epoch": 273.6470588235294, "grad_norm": 0.5586402416229248, "learning_rate": 4.7924876799438015e-06, "loss": 0.0078, "step": 4652 }, { "epoch": 273.70588235294116, "grad_norm": 0.41176745295524597, "learning_rate": 4.788422163629883e-06, "loss": 0.0063, "step": 4653 }, { "epoch": 273.7647058823529, "grad_norm": 0.19981524348258972, "learning_rate": 4.784357829561466e-06, "loss": 0.0042, "step": 4654 }, { "epoch": 273.8235294117647, "grad_norm": 0.1964406967163086, "learning_rate": 4.780294678660548e-06, "loss": 0.0031, "step": 4655 }, { "epoch": 273.88235294117646, "grad_norm": 0.21795900166034698, "learning_rate": 4.776232711848857e-06, "loss": 0.0033, "step": 4656 }, { "epoch": 273.94117647058823, "grad_norm": 0.28374573588371277, "learning_rate": 4.772171930047844e-06, "loss": 0.0059, "step": 4657 }, { "epoch": 274.0, "grad_norm": 0.38872504234313965, "learning_rate": 4.7681123341787e-06, "loss": 0.0032, "step": 4658 }, { "epoch": 274.05882352941177, "grad_norm": 0.4002649486064911, "learning_rate": 4.764053925162341e-06, "loss": 0.0041, "step": 4659 }, { "epoch": 274.11764705882354, "grad_norm": 0.5469279885292053, "learning_rate": 4.759996703919423e-06, "loss": 0.0054, "step": 4660 }, { "epoch": 274.1764705882353, "grad_norm": 0.3141525685787201, "learning_rate": 4.755940671370316e-06, "loss": 0.0061, "step": 4661 }, { "epoch": 274.2352941176471, "grad_norm": 0.4197852611541748, "learning_rate": 4.751885828435137e-06, "loss": 0.0051, "step": 4662 }, { "epoch": 274.29411764705884, "grad_norm": 0.42690223455429077, "learning_rate": 4.747832176033724e-06, "loss": 0.0079, "step": 4663 }, { "epoch": 274.3529411764706, "grad_norm": 0.18060651421546936, "learning_rate": 4.743779715085652e-06, "loss": 0.0027, "step": 4664 }, { "epoch": 274.4117647058824, "grad_norm": 0.297025591135025, "learning_rate": 4.7397284465102134e-06, "loss": 0.0042, "step": 4665 }, { "epoch": 274.47058823529414, "grad_norm": 0.5241997838020325, "learning_rate": 4.7356783712264405e-06, "loss": 0.0047, "step": 4666 }, { "epoch": 274.52941176470586, "grad_norm": 0.3446527123451233, "learning_rate": 4.731629490153094e-06, "loss": 0.0029, "step": 4667 }, { "epoch": 274.5882352941176, "grad_norm": 0.42766329646110535, "learning_rate": 4.7275818042086665e-06, "loss": 0.0036, "step": 4668 }, { "epoch": 274.6470588235294, "grad_norm": 0.42375144362449646, "learning_rate": 4.723535314311362e-06, "loss": 0.0049, "step": 4669 }, { "epoch": 274.70588235294116, "grad_norm": 0.34055349230766296, "learning_rate": 4.719490021379136e-06, "loss": 0.0046, "step": 4670 }, { "epoch": 274.7647058823529, "grad_norm": 0.27031660079956055, "learning_rate": 4.715445926329664e-06, "loss": 0.0043, "step": 4671 }, { "epoch": 274.8235294117647, "grad_norm": 0.293197363615036, "learning_rate": 4.711403030080338e-06, "loss": 0.005, "step": 4672 }, { "epoch": 274.88235294117646, "grad_norm": 0.3389621078968048, "learning_rate": 4.707361333548296e-06, "loss": 0.0087, "step": 4673 }, { "epoch": 274.94117647058823, "grad_norm": 0.4978761672973633, "learning_rate": 4.703320837650395e-06, "loss": 0.0082, "step": 4674 }, { "epoch": 275.0, "grad_norm": 0.1860722452402115, "learning_rate": 4.699281543303222e-06, "loss": 0.0027, "step": 4675 }, { "epoch": 275.05882352941177, "grad_norm": 0.34727951884269714, "learning_rate": 4.695243451423086e-06, "loss": 0.0033, "step": 4676 }, { "epoch": 275.11764705882354, "grad_norm": 0.8580659031867981, "learning_rate": 4.691206562926031e-06, "loss": 0.0069, "step": 4677 }, { "epoch": 275.1764705882353, "grad_norm": 0.5279880166053772, "learning_rate": 4.687170878727822e-06, "loss": 0.0082, "step": 4678 }, { "epoch": 275.2352941176471, "grad_norm": 0.5794119238853455, "learning_rate": 4.683136399743958e-06, "loss": 0.0058, "step": 4679 }, { "epoch": 275.29411764705884, "grad_norm": 0.21312497556209564, "learning_rate": 4.679103126889653e-06, "loss": 0.0026, "step": 4680 }, { "epoch": 275.3529411764706, "grad_norm": 0.30003032088279724, "learning_rate": 4.675071061079858e-06, "loss": 0.0043, "step": 4681 }, { "epoch": 275.4117647058824, "grad_norm": 0.37402403354644775, "learning_rate": 4.671040203229245e-06, "loss": 0.0033, "step": 4682 }, { "epoch": 275.47058823529414, "grad_norm": 0.4000922739505768, "learning_rate": 4.6670105542522205e-06, "loss": 0.0074, "step": 4683 }, { "epoch": 275.52941176470586, "grad_norm": 0.5172215700149536, "learning_rate": 4.662982115062899e-06, "loss": 0.0067, "step": 4684 }, { "epoch": 275.5882352941176, "grad_norm": 0.3536931574344635, "learning_rate": 4.658954886575134e-06, "loss": 0.0051, "step": 4685 }, { "epoch": 275.6470588235294, "grad_norm": 0.2592652440071106, "learning_rate": 4.6549288697025085e-06, "loss": 0.005, "step": 4686 }, { "epoch": 275.70588235294116, "grad_norm": 0.3510950207710266, "learning_rate": 4.650904065358315e-06, "loss": 0.0049, "step": 4687 }, { "epoch": 275.7647058823529, "grad_norm": 0.29930979013442993, "learning_rate": 4.646880474455584e-06, "loss": 0.0056, "step": 4688 }, { "epoch": 275.8235294117647, "grad_norm": 0.2733294665813446, "learning_rate": 4.642858097907065e-06, "loss": 0.0033, "step": 4689 }, { "epoch": 275.88235294117646, "grad_norm": 0.4630732536315918, "learning_rate": 4.638836936625237e-06, "loss": 0.0036, "step": 4690 }, { "epoch": 275.94117647058823, "grad_norm": 0.3201211392879486, "learning_rate": 4.634816991522294e-06, "loss": 0.0051, "step": 4691 }, { "epoch": 276.0, "grad_norm": 0.32351183891296387, "learning_rate": 4.630798263510162e-06, "loss": 0.0039, "step": 4692 }, { "epoch": 276.05882352941177, "grad_norm": 0.2336491346359253, "learning_rate": 4.626780753500489e-06, "loss": 0.0022, "step": 4693 }, { "epoch": 276.11764705882354, "grad_norm": 0.38042891025543213, "learning_rate": 4.622764462404652e-06, "loss": 0.0074, "step": 4694 }, { "epoch": 276.1764705882353, "grad_norm": 0.3276858329772949, "learning_rate": 4.618749391133736e-06, "loss": 0.0078, "step": 4695 }, { "epoch": 276.2352941176471, "grad_norm": 0.8233224749565125, "learning_rate": 4.614735540598564e-06, "loss": 0.0053, "step": 4696 }, { "epoch": 276.29411764705884, "grad_norm": 0.3197932839393616, "learning_rate": 4.610722911709681e-06, "loss": 0.0055, "step": 4697 }, { "epoch": 276.3529411764706, "grad_norm": 0.35319021344184875, "learning_rate": 4.606711505377342e-06, "loss": 0.0035, "step": 4698 }, { "epoch": 276.4117647058824, "grad_norm": 0.2741345465183258, "learning_rate": 4.602701322511541e-06, "loss": 0.0034, "step": 4699 }, { "epoch": 276.47058823529414, "grad_norm": 0.31011733412742615, "learning_rate": 4.598692364021985e-06, "loss": 0.0039, "step": 4700 }, { "epoch": 276.52941176470586, "grad_norm": 0.47461313009262085, "learning_rate": 4.59468463081811e-06, "loss": 0.0051, "step": 4701 }, { "epoch": 276.5882352941176, "grad_norm": 0.6257925629615784, "learning_rate": 4.590678123809062e-06, "loss": 0.0092, "step": 4702 }, { "epoch": 276.6470588235294, "grad_norm": 0.3711144030094147, "learning_rate": 4.586672843903719e-06, "loss": 0.0047, "step": 4703 }, { "epoch": 276.70588235294116, "grad_norm": 0.3062863051891327, "learning_rate": 4.582668792010679e-06, "loss": 0.0071, "step": 4704 }, { "epoch": 276.7647058823529, "grad_norm": 0.21864593029022217, "learning_rate": 4.578665969038264e-06, "loss": 0.0028, "step": 4705 }, { "epoch": 276.8235294117647, "grad_norm": 0.29754602909088135, "learning_rate": 4.574664375894508e-06, "loss": 0.0043, "step": 4706 }, { "epoch": 276.88235294117646, "grad_norm": 0.7330380082130432, "learning_rate": 4.570664013487172e-06, "loss": 0.0039, "step": 4707 }, { "epoch": 276.94117647058823, "grad_norm": 0.812976598739624, "learning_rate": 4.566664882723739e-06, "loss": 0.0053, "step": 4708 }, { "epoch": 277.0, "grad_norm": 0.18166960775852203, "learning_rate": 4.562666984511416e-06, "loss": 0.0025, "step": 4709 }, { "epoch": 277.05882352941177, "grad_norm": 0.4193521738052368, "learning_rate": 4.558670319757117e-06, "loss": 0.0052, "step": 4710 }, { "epoch": 277.11764705882354, "grad_norm": 0.2502206563949585, "learning_rate": 4.554674889367488e-06, "loss": 0.0035, "step": 4711 }, { "epoch": 277.1764705882353, "grad_norm": 0.5591453313827515, "learning_rate": 4.550680694248897e-06, "loss": 0.0055, "step": 4712 }, { "epoch": 277.2352941176471, "grad_norm": 1.061416745185852, "learning_rate": 4.546687735307418e-06, "loss": 0.0089, "step": 4713 }, { "epoch": 277.29411764705884, "grad_norm": 0.18966317176818848, "learning_rate": 4.542696013448856e-06, "loss": 0.0035, "step": 4714 }, { "epoch": 277.3529411764706, "grad_norm": 0.6407148241996765, "learning_rate": 4.538705529578733e-06, "loss": 0.0042, "step": 4715 }, { "epoch": 277.4117647058824, "grad_norm": 0.12989787757396698, "learning_rate": 4.534716284602294e-06, "loss": 0.0021, "step": 4716 }, { "epoch": 277.47058823529414, "grad_norm": 0.42440858483314514, "learning_rate": 4.53072827942449e-06, "loss": 0.0102, "step": 4717 }, { "epoch": 277.52941176470586, "grad_norm": 0.31987011432647705, "learning_rate": 4.526741514950004e-06, "loss": 0.0079, "step": 4718 }, { "epoch": 277.5882352941176, "grad_norm": 0.2529148757457733, "learning_rate": 4.522755992083231e-06, "loss": 0.0028, "step": 4719 }, { "epoch": 277.6470588235294, "grad_norm": 0.46303698420524597, "learning_rate": 4.518771711728293e-06, "loss": 0.0052, "step": 4720 }, { "epoch": 277.70588235294116, "grad_norm": 0.5681918859481812, "learning_rate": 4.514788674789013e-06, "loss": 0.0035, "step": 4721 }, { "epoch": 277.7647058823529, "grad_norm": 0.2670925557613373, "learning_rate": 4.510806882168945e-06, "loss": 0.0059, "step": 4722 }, { "epoch": 277.8235294117647, "grad_norm": 0.31958019733428955, "learning_rate": 4.506826334771366e-06, "loss": 0.0038, "step": 4723 }, { "epoch": 277.88235294117646, "grad_norm": 0.5605677962303162, "learning_rate": 4.502847033499252e-06, "loss": 0.0043, "step": 4724 }, { "epoch": 277.94117647058823, "grad_norm": 0.34518933296203613, "learning_rate": 4.498868979255312e-06, "loss": 0.0055, "step": 4725 }, { "epoch": 278.0, "grad_norm": 0.3970787525177002, "learning_rate": 4.494892172941965e-06, "loss": 0.0046, "step": 4726 }, { "epoch": 278.05882352941177, "grad_norm": 0.4495895206928253, "learning_rate": 4.490916615461354e-06, "loss": 0.006, "step": 4727 }, { "epoch": 278.11764705882354, "grad_norm": 0.5467249155044556, "learning_rate": 4.486942307715325e-06, "loss": 0.0063, "step": 4728 }, { "epoch": 278.1764705882353, "grad_norm": 0.5228304266929626, "learning_rate": 4.482969250605453e-06, "loss": 0.004, "step": 4729 }, { "epoch": 278.2352941176471, "grad_norm": 0.3474713861942291, "learning_rate": 4.478997445033026e-06, "loss": 0.0081, "step": 4730 }, { "epoch": 278.29411764705884, "grad_norm": 0.43537381291389465, "learning_rate": 4.4750268918990515e-06, "loss": 0.0038, "step": 4731 }, { "epoch": 278.3529411764706, "grad_norm": 0.46765032410621643, "learning_rate": 4.47105759210424e-06, "loss": 0.0052, "step": 4732 }, { "epoch": 278.4117647058824, "grad_norm": 0.22775743901729584, "learning_rate": 4.467089546549031e-06, "loss": 0.0036, "step": 4733 }, { "epoch": 278.47058823529414, "grad_norm": 0.29267698526382446, "learning_rate": 4.463122756133574e-06, "loss": 0.0028, "step": 4734 }, { "epoch": 278.52941176470586, "grad_norm": 0.2975819706916809, "learning_rate": 4.459157221757741e-06, "loss": 0.0073, "step": 4735 }, { "epoch": 278.5882352941176, "grad_norm": 0.28934210538864136, "learning_rate": 4.455192944321101e-06, "loss": 0.0045, "step": 4736 }, { "epoch": 278.6470588235294, "grad_norm": 0.3859359920024872, "learning_rate": 4.451229924722957e-06, "loss": 0.0069, "step": 4737 }, { "epoch": 278.70588235294116, "grad_norm": 0.5035818815231323, "learning_rate": 4.4472681638623215e-06, "loss": 0.0048, "step": 4738 }, { "epoch": 278.7647058823529, "grad_norm": 0.44675540924072266, "learning_rate": 4.443307662637913e-06, "loss": 0.0038, "step": 4739 }, { "epoch": 278.8235294117647, "grad_norm": 0.2946089804172516, "learning_rate": 4.439348421948172e-06, "loss": 0.0049, "step": 4740 }, { "epoch": 278.88235294117646, "grad_norm": 0.17443470656871796, "learning_rate": 4.435390442691253e-06, "loss": 0.0022, "step": 4741 }, { "epoch": 278.94117647058823, "grad_norm": 0.27297767996788025, "learning_rate": 4.4314337257650265e-06, "loss": 0.0061, "step": 4742 }, { "epoch": 279.0, "grad_norm": 0.3788112998008728, "learning_rate": 4.427478272067066e-06, "loss": 0.0045, "step": 4743 }, { "epoch": 279.05882352941177, "grad_norm": 0.2689777612686157, "learning_rate": 4.423524082494669e-06, "loss": 0.0033, "step": 4744 }, { "epoch": 279.11764705882354, "grad_norm": 0.5118383765220642, "learning_rate": 4.419571157944842e-06, "loss": 0.0057, "step": 4745 }, { "epoch": 279.1764705882353, "grad_norm": 0.32322925329208374, "learning_rate": 4.41561949931431e-06, "loss": 0.0054, "step": 4746 }, { "epoch": 279.2352941176471, "grad_norm": 0.464174747467041, "learning_rate": 4.4116691074994965e-06, "loss": 0.0075, "step": 4747 }, { "epoch": 279.29411764705884, "grad_norm": 0.4734838902950287, "learning_rate": 4.407719983396555e-06, "loss": 0.0106, "step": 4748 }, { "epoch": 279.3529411764706, "grad_norm": 0.21263723075389862, "learning_rate": 4.4037721279013425e-06, "loss": 0.0025, "step": 4749 }, { "epoch": 279.4117647058824, "grad_norm": 0.22706907987594604, "learning_rate": 4.3998255419094246e-06, "loss": 0.0035, "step": 4750 }, { "epoch": 279.47058823529414, "grad_norm": 0.22381195425987244, "learning_rate": 4.395880226316087e-06, "loss": 0.0025, "step": 4751 }, { "epoch": 279.52941176470586, "grad_norm": 0.4337393641471863, "learning_rate": 4.3919361820163244e-06, "loss": 0.0044, "step": 4752 }, { "epoch": 279.5882352941176, "grad_norm": 0.27173399925231934, "learning_rate": 4.387993409904845e-06, "loss": 0.0046, "step": 4753 }, { "epoch": 279.6470588235294, "grad_norm": 0.8448789119720459, "learning_rate": 4.3840519108760586e-06, "loss": 0.0063, "step": 4754 }, { "epoch": 279.70588235294116, "grad_norm": 0.2998133897781372, "learning_rate": 4.3801116858240976e-06, "loss": 0.0054, "step": 4755 }, { "epoch": 279.7647058823529, "grad_norm": 0.2939333915710449, "learning_rate": 4.376172735642803e-06, "loss": 0.0038, "step": 4756 }, { "epoch": 279.8235294117647, "grad_norm": 0.46620213985443115, "learning_rate": 4.372235061225726e-06, "loss": 0.0065, "step": 4757 }, { "epoch": 279.88235294117646, "grad_norm": 0.6137439012527466, "learning_rate": 4.3682986634661215e-06, "loss": 0.0078, "step": 4758 }, { "epoch": 279.94117647058823, "grad_norm": 0.3716279864311218, "learning_rate": 4.364363543256964e-06, "loss": 0.0048, "step": 4759 }, { "epoch": 280.0, "grad_norm": 0.3192727267742157, "learning_rate": 4.360429701490935e-06, "loss": 0.003, "step": 4760 }, { "epoch": 280.05882352941177, "grad_norm": 0.19583359360694885, "learning_rate": 4.356497139060429e-06, "loss": 0.0033, "step": 4761 }, { "epoch": 280.11764705882354, "grad_norm": 0.43421345949172974, "learning_rate": 4.352565856857539e-06, "loss": 0.0039, "step": 4762 }, { "epoch": 280.1764705882353, "grad_norm": 0.6571620106697083, "learning_rate": 4.348635855774082e-06, "loss": 0.0057, "step": 4763 }, { "epoch": 280.2352941176471, "grad_norm": 0.23494933545589447, "learning_rate": 4.344707136701579e-06, "loss": 0.0042, "step": 4764 }, { "epoch": 280.29411764705884, "grad_norm": 0.16410255432128906, "learning_rate": 4.340779700531253e-06, "loss": 0.0031, "step": 4765 }, { "epoch": 280.3529411764706, "grad_norm": 0.6273171901702881, "learning_rate": 4.336853548154046e-06, "loss": 0.0065, "step": 4766 }, { "epoch": 280.4117647058824, "grad_norm": 0.21776320040225983, "learning_rate": 4.332928680460604e-06, "loss": 0.0026, "step": 4767 }, { "epoch": 280.47058823529414, "grad_norm": 0.2546521723270416, "learning_rate": 4.329005098341288e-06, "loss": 0.0052, "step": 4768 }, { "epoch": 280.52941176470586, "grad_norm": 0.6166321635246277, "learning_rate": 4.325082802686154e-06, "loss": 0.01, "step": 4769 }, { "epoch": 280.5882352941176, "grad_norm": 0.1856747567653656, "learning_rate": 4.321161794384976e-06, "loss": 0.0024, "step": 4770 }, { "epoch": 280.6470588235294, "grad_norm": 0.642157793045044, "learning_rate": 4.317242074327236e-06, "loss": 0.0075, "step": 4771 }, { "epoch": 280.70588235294116, "grad_norm": 0.3614545166492462, "learning_rate": 4.313323643402124e-06, "loss": 0.0073, "step": 4772 }, { "epoch": 280.7647058823529, "grad_norm": 0.5013439059257507, "learning_rate": 4.3094065024985296e-06, "loss": 0.0053, "step": 4773 }, { "epoch": 280.8235294117647, "grad_norm": 0.4705585837364197, "learning_rate": 4.305490652505058e-06, "loss": 0.0068, "step": 4774 }, { "epoch": 280.88235294117646, "grad_norm": 0.26536354422569275, "learning_rate": 4.3015760943100195e-06, "loss": 0.0037, "step": 4775 }, { "epoch": 280.94117647058823, "grad_norm": 0.26365339756011963, "learning_rate": 4.297662828801434e-06, "loss": 0.0032, "step": 4776 }, { "epoch": 281.0, "grad_norm": 0.33534693717956543, "learning_rate": 4.2937508568670194e-06, "loss": 0.0039, "step": 4777 }, { "epoch": 281.05882352941177, "grad_norm": 0.27215835452079773, "learning_rate": 4.289840179394208e-06, "loss": 0.0045, "step": 4778 }, { "epoch": 281.11764705882354, "grad_norm": 0.38642024993896484, "learning_rate": 4.2859307972701405e-06, "loss": 0.0057, "step": 4779 }, { "epoch": 281.1764705882353, "grad_norm": 0.6574080586433411, "learning_rate": 4.2820227113816525e-06, "loss": 0.0082, "step": 4780 }, { "epoch": 281.2352941176471, "grad_norm": 0.36546385288238525, "learning_rate": 4.278115922615295e-06, "loss": 0.0036, "step": 4781 }, { "epoch": 281.29411764705884, "grad_norm": 0.4312707483768463, "learning_rate": 4.274210431857323e-06, "loss": 0.0068, "step": 4782 }, { "epoch": 281.3529411764706, "grad_norm": 0.22541570663452148, "learning_rate": 4.270306239993701e-06, "loss": 0.0032, "step": 4783 }, { "epoch": 281.4117647058824, "grad_norm": 0.19928160309791565, "learning_rate": 4.266403347910086e-06, "loss": 0.003, "step": 4784 }, { "epoch": 281.47058823529414, "grad_norm": 0.8936469554901123, "learning_rate": 4.262501756491851e-06, "loss": 0.0061, "step": 4785 }, { "epoch": 281.52941176470586, "grad_norm": 0.3203256130218506, "learning_rate": 4.258601466624074e-06, "loss": 0.0048, "step": 4786 }, { "epoch": 281.5882352941176, "grad_norm": 0.3624825179576874, "learning_rate": 4.254702479191537e-06, "loss": 0.0051, "step": 4787 }, { "epoch": 281.6470588235294, "grad_norm": 1.406180739402771, "learning_rate": 4.250804795078717e-06, "loss": 0.0068, "step": 4788 }, { "epoch": 281.70588235294116, "grad_norm": 0.19786491990089417, "learning_rate": 4.246908415169807e-06, "loss": 0.0031, "step": 4789 }, { "epoch": 281.7647058823529, "grad_norm": 0.23475050926208496, "learning_rate": 4.2430133403487035e-06, "loss": 0.0028, "step": 4790 }, { "epoch": 281.8235294117647, "grad_norm": 0.22234033048152924, "learning_rate": 4.239119571498998e-06, "loss": 0.0052, "step": 4791 }, { "epoch": 281.88235294117646, "grad_norm": 0.29064062237739563, "learning_rate": 4.235227109503992e-06, "loss": 0.0053, "step": 4792 }, { "epoch": 281.94117647058823, "grad_norm": 0.529978334903717, "learning_rate": 4.231335955246693e-06, "loss": 0.0061, "step": 4793 }, { "epoch": 282.0, "grad_norm": 0.3683474659919739, "learning_rate": 4.2274461096098085e-06, "loss": 0.0047, "step": 4794 }, { "epoch": 282.05882352941177, "grad_norm": 0.20022077858448029, "learning_rate": 4.223557573475745e-06, "loss": 0.0043, "step": 4795 }, { "epoch": 282.11764705882354, "grad_norm": 0.526553213596344, "learning_rate": 4.2196703477266185e-06, "loss": 0.0061, "step": 4796 }, { "epoch": 282.1764705882353, "grad_norm": 1.0181515216827393, "learning_rate": 4.215784433244248e-06, "loss": 0.0047, "step": 4797 }, { "epoch": 282.2352941176471, "grad_norm": 0.35313552618026733, "learning_rate": 4.211899830910152e-06, "loss": 0.0043, "step": 4798 }, { "epoch": 282.29411764705884, "grad_norm": 0.7598456144332886, "learning_rate": 4.208016541605547e-06, "loss": 0.0081, "step": 4799 }, { "epoch": 282.3529411764706, "grad_norm": 0.33903583884239197, "learning_rate": 4.20413456621136e-06, "loss": 0.0075, "step": 4800 }, { "epoch": 282.4117647058824, "grad_norm": 0.5685099363327026, "learning_rate": 4.200253905608216e-06, "loss": 0.0056, "step": 4801 }, { "epoch": 282.47058823529414, "grad_norm": 0.4849443733692169, "learning_rate": 4.196374560676446e-06, "loss": 0.0063, "step": 4802 }, { "epoch": 282.52941176470586, "grad_norm": 0.32874131202697754, "learning_rate": 4.192496532296072e-06, "loss": 0.0035, "step": 4803 }, { "epoch": 282.5882352941176, "grad_norm": 0.6178326606750488, "learning_rate": 4.188619821346828e-06, "loss": 0.0046, "step": 4804 }, { "epoch": 282.6470588235294, "grad_norm": 0.22828973829746246, "learning_rate": 4.184744428708146e-06, "loss": 0.0034, "step": 4805 }, { "epoch": 282.70588235294116, "grad_norm": 0.3215126693248749, "learning_rate": 4.180870355259153e-06, "loss": 0.0031, "step": 4806 }, { "epoch": 282.7647058823529, "grad_norm": 0.2986789345741272, "learning_rate": 4.176997601878686e-06, "loss": 0.0032, "step": 4807 }, { "epoch": 282.8235294117647, "grad_norm": 0.3350735902786255, "learning_rate": 4.173126169445279e-06, "loss": 0.0047, "step": 4808 }, { "epoch": 282.88235294117646, "grad_norm": 0.3223409056663513, "learning_rate": 4.169256058837166e-06, "loss": 0.005, "step": 4809 }, { "epoch": 282.94117647058823, "grad_norm": 0.3007447123527527, "learning_rate": 4.165387270932277e-06, "loss": 0.004, "step": 4810 }, { "epoch": 283.0, "grad_norm": 0.35804784297943115, "learning_rate": 4.1615198066082475e-06, "loss": 0.006, "step": 4811 }, { "epoch": 283.05882352941177, "grad_norm": 0.5340114831924438, "learning_rate": 4.157653666742413e-06, "loss": 0.0063, "step": 4812 }, { "epoch": 283.11764705882354, "grad_norm": 0.5116007924079895, "learning_rate": 4.153788852211807e-06, "loss": 0.0056, "step": 4813 }, { "epoch": 283.1764705882353, "grad_norm": 0.40244224667549133, "learning_rate": 4.1499253638931595e-06, "loss": 0.0044, "step": 4814 }, { "epoch": 283.2352941176471, "grad_norm": 0.3387017846107483, "learning_rate": 4.1460632026629025e-06, "loss": 0.0065, "step": 4815 }, { "epoch": 283.29411764705884, "grad_norm": 0.3474509119987488, "learning_rate": 4.142202369397169e-06, "loss": 0.0056, "step": 4816 }, { "epoch": 283.3529411764706, "grad_norm": 0.3256286382675171, "learning_rate": 4.138342864971785e-06, "loss": 0.0046, "step": 4817 }, { "epoch": 283.4117647058824, "grad_norm": 0.2811211943626404, "learning_rate": 4.134484690262278e-06, "loss": 0.0046, "step": 4818 }, { "epoch": 283.47058823529414, "grad_norm": 0.2368965893983841, "learning_rate": 4.130627846143877e-06, "loss": 0.0046, "step": 4819 }, { "epoch": 283.52941176470586, "grad_norm": 0.3895733058452606, "learning_rate": 4.12677233349151e-06, "loss": 0.0043, "step": 4820 }, { "epoch": 283.5882352941176, "grad_norm": 0.5017222166061401, "learning_rate": 4.122918153179791e-06, "loss": 0.0053, "step": 4821 }, { "epoch": 283.6470588235294, "grad_norm": 0.5060060620307922, "learning_rate": 4.119065306083044e-06, "loss": 0.0066, "step": 4822 }, { "epoch": 283.70588235294116, "grad_norm": 0.39193257689476013, "learning_rate": 4.115213793075286e-06, "loss": 0.007, "step": 4823 }, { "epoch": 283.7647058823529, "grad_norm": 0.43208566308021545, "learning_rate": 4.111363615030238e-06, "loss": 0.0033, "step": 4824 }, { "epoch": 283.8235294117647, "grad_norm": 0.3018188774585724, "learning_rate": 4.1075147728213025e-06, "loss": 0.0023, "step": 4825 }, { "epoch": 283.88235294117646, "grad_norm": 0.6844426989555359, "learning_rate": 4.103667267321592e-06, "loss": 0.0051, "step": 4826 }, { "epoch": 283.94117647058823, "grad_norm": 0.16778838634490967, "learning_rate": 4.099821099403916e-06, "loss": 0.0023, "step": 4827 }, { "epoch": 284.0, "grad_norm": 0.25500768423080444, "learning_rate": 4.095976269940777e-06, "loss": 0.0047, "step": 4828 }, { "epoch": 284.05882352941177, "grad_norm": 0.20998641848564148, "learning_rate": 4.092132779804368e-06, "loss": 0.0024, "step": 4829 }, { "epoch": 284.11764705882354, "grad_norm": 0.36407431960105896, "learning_rate": 4.088290629866587e-06, "loss": 0.0039, "step": 4830 }, { "epoch": 284.1764705882353, "grad_norm": 0.7676168084144592, "learning_rate": 4.084449820999029e-06, "loss": 0.0064, "step": 4831 }, { "epoch": 284.2352941176471, "grad_norm": 0.1878315657377243, "learning_rate": 4.080610354072975e-06, "loss": 0.0031, "step": 4832 }, { "epoch": 284.29411764705884, "grad_norm": 0.43727537989616394, "learning_rate": 4.076772229959409e-06, "loss": 0.0068, "step": 4833 }, { "epoch": 284.3529411764706, "grad_norm": 0.5443737506866455, "learning_rate": 4.07293544952901e-06, "loss": 0.0096, "step": 4834 }, { "epoch": 284.4117647058824, "grad_norm": 0.3550522029399872, "learning_rate": 4.069100013652153e-06, "loss": 0.0096, "step": 4835 }, { "epoch": 284.47058823529414, "grad_norm": 0.5136362314224243, "learning_rate": 4.065265923198901e-06, "loss": 0.0035, "step": 4836 }, { "epoch": 284.52941176470586, "grad_norm": 0.2771886885166168, "learning_rate": 4.061433179039019e-06, "loss": 0.0042, "step": 4837 }, { "epoch": 284.5882352941176, "grad_norm": 0.34124207496643066, "learning_rate": 4.057601782041964e-06, "loss": 0.0047, "step": 4838 }, { "epoch": 284.6470588235294, "grad_norm": 0.6031988859176636, "learning_rate": 4.053771733076892e-06, "loss": 0.0042, "step": 4839 }, { "epoch": 284.70588235294116, "grad_norm": 0.3177085816860199, "learning_rate": 4.0499430330126425e-06, "loss": 0.0062, "step": 4840 }, { "epoch": 284.7647058823529, "grad_norm": 0.2103460431098938, "learning_rate": 4.046115682717757e-06, "loss": 0.0031, "step": 4841 }, { "epoch": 284.8235294117647, "grad_norm": 0.6233839988708496, "learning_rate": 4.042289683060471e-06, "loss": 0.0068, "step": 4842 }, { "epoch": 284.88235294117646, "grad_norm": 0.23099510371685028, "learning_rate": 4.038465034908711e-06, "loss": 0.0027, "step": 4843 }, { "epoch": 284.94117647058823, "grad_norm": 0.19109418988227844, "learning_rate": 4.0346417391300985e-06, "loss": 0.0035, "step": 4844 }, { "epoch": 285.0, "grad_norm": 0.22279050946235657, "learning_rate": 4.03081979659195e-06, "loss": 0.0027, "step": 4845 }, { "epoch": 285.05882352941177, "grad_norm": 0.8062074780464172, "learning_rate": 4.026999208161265e-06, "loss": 0.0055, "step": 4846 }, { "epoch": 285.11764705882354, "grad_norm": 0.7819082736968994, "learning_rate": 4.023179974704749e-06, "loss": 0.0034, "step": 4847 }, { "epoch": 285.1764705882353, "grad_norm": 0.578463613986969, "learning_rate": 4.019362097088793e-06, "loss": 0.0033, "step": 4848 }, { "epoch": 285.2352941176471, "grad_norm": 0.31872978806495667, "learning_rate": 4.015545576179484e-06, "loss": 0.0047, "step": 4849 }, { "epoch": 285.29411764705884, "grad_norm": 0.2757866680622101, "learning_rate": 4.011730412842595e-06, "loss": 0.0041, "step": 4850 }, { "epoch": 285.3529411764706, "grad_norm": 0.18139463663101196, "learning_rate": 4.007916607943596e-06, "loss": 0.0023, "step": 4851 }, { "epoch": 285.4117647058824, "grad_norm": 0.23655667901039124, "learning_rate": 4.00410416234765e-06, "loss": 0.0032, "step": 4852 }, { "epoch": 285.47058823529414, "grad_norm": 0.49075186252593994, "learning_rate": 4.000293076919612e-06, "loss": 0.0069, "step": 4853 }, { "epoch": 285.52941176470586, "grad_norm": 0.7609533667564392, "learning_rate": 3.99648335252402e-06, "loss": 0.0062, "step": 4854 }, { "epoch": 285.5882352941176, "grad_norm": 0.6384546160697937, "learning_rate": 3.992674990025112e-06, "loss": 0.0075, "step": 4855 }, { "epoch": 285.6470588235294, "grad_norm": 0.3132208287715912, "learning_rate": 3.988867990286814e-06, "loss": 0.0053, "step": 4856 }, { "epoch": 285.70588235294116, "grad_norm": 0.40188562870025635, "learning_rate": 3.985062354172747e-06, "loss": 0.007, "step": 4857 }, { "epoch": 285.7647058823529, "grad_norm": 0.3393852710723877, "learning_rate": 3.981258082546213e-06, "loss": 0.0031, "step": 4858 }, { "epoch": 285.8235294117647, "grad_norm": 0.3568188548088074, "learning_rate": 3.977455176270212e-06, "loss": 0.01, "step": 4859 }, { "epoch": 285.88235294117646, "grad_norm": 0.24453595280647278, "learning_rate": 3.973653636207437e-06, "loss": 0.0038, "step": 4860 }, { "epoch": 285.94117647058823, "grad_norm": 0.26623696088790894, "learning_rate": 3.96985346322026e-06, "loss": 0.0049, "step": 4861 }, { "epoch": 286.0, "grad_norm": 0.41117727756500244, "learning_rate": 3.966054658170754e-06, "loss": 0.0047, "step": 4862 }, { "epoch": 286.05882352941177, "grad_norm": 0.2416749894618988, "learning_rate": 3.962257221920675e-06, "loss": 0.0045, "step": 4863 }, { "epoch": 286.11764705882354, "grad_norm": 0.21127891540527344, "learning_rate": 3.958461155331475e-06, "loss": 0.0038, "step": 4864 }, { "epoch": 286.1764705882353, "grad_norm": 0.44773417711257935, "learning_rate": 3.954666459264284e-06, "loss": 0.005, "step": 4865 }, { "epoch": 286.2352941176471, "grad_norm": 0.4055315852165222, "learning_rate": 3.950873134579932e-06, "loss": 0.0057, "step": 4866 }, { "epoch": 286.29411764705884, "grad_norm": 0.35707035660743713, "learning_rate": 3.947081182138934e-06, "loss": 0.0078, "step": 4867 }, { "epoch": 286.3529411764706, "grad_norm": 0.6812667846679688, "learning_rate": 3.943290602801499e-06, "loss": 0.0054, "step": 4868 }, { "epoch": 286.4117647058824, "grad_norm": 0.360656201839447, "learning_rate": 3.939501397427508e-06, "loss": 0.0063, "step": 4869 }, { "epoch": 286.47058823529414, "grad_norm": 0.3033202290534973, "learning_rate": 3.935713566876549e-06, "loss": 0.003, "step": 4870 }, { "epoch": 286.52941176470586, "grad_norm": 1.173028826713562, "learning_rate": 3.9319271120078915e-06, "loss": 0.004, "step": 4871 }, { "epoch": 286.5882352941176, "grad_norm": 0.3901248276233673, "learning_rate": 3.928142033680487e-06, "loss": 0.0065, "step": 4872 }, { "epoch": 286.6470588235294, "grad_norm": 0.15226471424102783, "learning_rate": 3.924358332752982e-06, "loss": 0.0021, "step": 4873 }, { "epoch": 286.70588235294116, "grad_norm": 0.40197667479515076, "learning_rate": 3.92057601008371e-06, "loss": 0.004, "step": 4874 }, { "epoch": 286.7647058823529, "grad_norm": 0.358369916677475, "learning_rate": 3.916795066530693e-06, "loss": 0.0053, "step": 4875 }, { "epoch": 286.8235294117647, "grad_norm": 0.172002911567688, "learning_rate": 3.913015502951629e-06, "loss": 0.0038, "step": 4876 }, { "epoch": 286.88235294117646, "grad_norm": 0.2427750676870346, "learning_rate": 3.909237320203917e-06, "loss": 0.0037, "step": 4877 }, { "epoch": 286.94117647058823, "grad_norm": 0.18357662856578827, "learning_rate": 3.905460519144635e-06, "loss": 0.0031, "step": 4878 }, { "epoch": 287.0, "grad_norm": 0.3847479522228241, "learning_rate": 3.901685100630554e-06, "loss": 0.0051, "step": 4879 }, { "epoch": 287.05882352941177, "grad_norm": 0.23606924712657928, "learning_rate": 3.897911065518122e-06, "loss": 0.0047, "step": 4880 }, { "epoch": 287.11764705882354, "grad_norm": 0.3386688530445099, "learning_rate": 3.894138414663479e-06, "loss": 0.0049, "step": 4881 }, { "epoch": 287.1764705882353, "grad_norm": 0.26513364911079407, "learning_rate": 3.89036714892245e-06, "loss": 0.0046, "step": 4882 }, { "epoch": 287.2352941176471, "grad_norm": 0.3656056523323059, "learning_rate": 3.886597269150552e-06, "loss": 0.0033, "step": 4883 }, { "epoch": 287.29411764705884, "grad_norm": 0.5165929198265076, "learning_rate": 3.8828287762029725e-06, "loss": 0.0038, "step": 4884 }, { "epoch": 287.3529411764706, "grad_norm": 0.6942642331123352, "learning_rate": 3.8790616709345974e-06, "loss": 0.0088, "step": 4885 }, { "epoch": 287.4117647058824, "grad_norm": 0.3134743273258209, "learning_rate": 3.8752959541999975e-06, "loss": 0.0065, "step": 4886 }, { "epoch": 287.47058823529414, "grad_norm": 0.588962197303772, "learning_rate": 3.871531626853419e-06, "loss": 0.0083, "step": 4887 }, { "epoch": 287.52941176470586, "grad_norm": 0.3108294606208801, "learning_rate": 3.8677686897488e-06, "loss": 0.0029, "step": 4888 }, { "epoch": 287.5882352941176, "grad_norm": 0.19241422414779663, "learning_rate": 3.864007143739764e-06, "loss": 0.002, "step": 4889 }, { "epoch": 287.6470588235294, "grad_norm": 0.8894075155258179, "learning_rate": 3.86024698967962e-06, "loss": 0.0048, "step": 4890 }, { "epoch": 287.70588235294116, "grad_norm": 0.3712492883205414, "learning_rate": 3.856488228421353e-06, "loss": 0.0042, "step": 4891 }, { "epoch": 287.7647058823529, "grad_norm": 0.3713703751564026, "learning_rate": 3.852730860817636e-06, "loss": 0.0062, "step": 4892 }, { "epoch": 287.8235294117647, "grad_norm": 0.5931300520896912, "learning_rate": 3.848974887720832e-06, "loss": 0.0062, "step": 4893 }, { "epoch": 287.88235294117646, "grad_norm": 0.33087870478630066, "learning_rate": 3.845220309982985e-06, "loss": 0.0032, "step": 4894 }, { "epoch": 287.94117647058823, "grad_norm": 0.22205013036727905, "learning_rate": 3.841467128455813e-06, "loss": 0.0033, "step": 4895 }, { "epoch": 288.0, "grad_norm": 0.24407102167606354, "learning_rate": 3.837715343990727e-06, "loss": 0.0049, "step": 4896 }, { "epoch": 288.05882352941177, "grad_norm": 0.40510544180870056, "learning_rate": 3.833964957438824e-06, "loss": 0.0039, "step": 4897 }, { "epoch": 288.11764705882354, "grad_norm": 0.3957953155040741, "learning_rate": 3.83021596965087e-06, "loss": 0.0053, "step": 4898 }, { "epoch": 288.1764705882353, "grad_norm": 0.4219454526901245, "learning_rate": 3.826468381477326e-06, "loss": 0.0028, "step": 4899 }, { "epoch": 288.2352941176471, "grad_norm": 0.2770370841026306, "learning_rate": 3.822722193768334e-06, "loss": 0.0035, "step": 4900 }, { "epoch": 288.29411764705884, "grad_norm": 0.24208110570907593, "learning_rate": 3.818977407373715e-06, "loss": 0.0037, "step": 4901 }, { "epoch": 288.3529411764706, "grad_norm": 0.4072802662849426, "learning_rate": 3.81523402314297e-06, "loss": 0.0046, "step": 4902 }, { "epoch": 288.4117647058824, "grad_norm": 0.3979363441467285, "learning_rate": 3.8114920419252874e-06, "loss": 0.0067, "step": 4903 }, { "epoch": 288.47058823529414, "grad_norm": 0.24264079332351685, "learning_rate": 3.8077514645695347e-06, "loss": 0.0027, "step": 4904 }, { "epoch": 288.52941176470586, "grad_norm": 0.260361909866333, "learning_rate": 3.804012291924265e-06, "loss": 0.0034, "step": 4905 }, { "epoch": 288.5882352941176, "grad_norm": 1.019217848777771, "learning_rate": 3.8002745248377014e-06, "loss": 0.0095, "step": 4906 }, { "epoch": 288.6470588235294, "grad_norm": 0.26503294706344604, "learning_rate": 3.7965381641577602e-06, "loss": 0.0041, "step": 4907 }, { "epoch": 288.70588235294116, "grad_norm": 0.5410755276679993, "learning_rate": 3.7928032107320333e-06, "loss": 0.0041, "step": 4908 }, { "epoch": 288.7647058823529, "grad_norm": 0.37587442994117737, "learning_rate": 3.7890696654077975e-06, "loss": 0.0064, "step": 4909 }, { "epoch": 288.8235294117647, "grad_norm": 0.33954694867134094, "learning_rate": 3.7853375290320006e-06, "loss": 0.0056, "step": 4910 }, { "epoch": 288.88235294117646, "grad_norm": 0.2817603647708893, "learning_rate": 3.7816068024512808e-06, "loss": 0.0052, "step": 4911 }, { "epoch": 288.94117647058823, "grad_norm": 0.14518892765045166, "learning_rate": 3.777877486511955e-06, "loss": 0.0023, "step": 4912 }, { "epoch": 289.0, "grad_norm": 0.4070386588573456, "learning_rate": 3.7741495820600128e-06, "loss": 0.0032, "step": 4913 }, { "epoch": 289.05882352941177, "grad_norm": 1.4536324739456177, "learning_rate": 3.77042308994113e-06, "loss": 0.0067, "step": 4914 }, { "epoch": 289.11764705882354, "grad_norm": 0.34143897891044617, "learning_rate": 3.7666980110006623e-06, "loss": 0.0068, "step": 4915 }, { "epoch": 289.1764705882353, "grad_norm": 0.22222821414470673, "learning_rate": 3.7629743460836455e-06, "loss": 0.003, "step": 4916 }, { "epoch": 289.2352941176471, "grad_norm": 0.3406202495098114, "learning_rate": 3.759252096034787e-06, "loss": 0.0033, "step": 4917 }, { "epoch": 289.29411764705884, "grad_norm": 0.356577068567276, "learning_rate": 3.7555312616984795e-06, "loss": 0.0067, "step": 4918 }, { "epoch": 289.3529411764706, "grad_norm": 2.2736637592315674, "learning_rate": 3.7518118439187966e-06, "loss": 0.006, "step": 4919 }, { "epoch": 289.4117647058824, "grad_norm": 0.2984713613986969, "learning_rate": 3.748093843539489e-06, "loss": 0.0052, "step": 4920 }, { "epoch": 289.47058823529414, "grad_norm": 0.3535454571247101, "learning_rate": 3.7443772614039776e-06, "loss": 0.006, "step": 4921 }, { "epoch": 289.52941176470586, "grad_norm": 0.49731969833374023, "learning_rate": 3.7406620983553722e-06, "loss": 0.0058, "step": 4922 }, { "epoch": 289.5882352941176, "grad_norm": 0.24816840887069702, "learning_rate": 3.7369483552364572e-06, "loss": 0.0026, "step": 4923 }, { "epoch": 289.6470588235294, "grad_norm": 0.1675892174243927, "learning_rate": 3.7332360328896975e-06, "loss": 0.0022, "step": 4924 }, { "epoch": 289.70588235294116, "grad_norm": 0.7895986437797546, "learning_rate": 3.729525132157227e-06, "loss": 0.0057, "step": 4925 }, { "epoch": 289.7647058823529, "grad_norm": 0.6309917569160461, "learning_rate": 3.725815653880864e-06, "loss": 0.0046, "step": 4926 }, { "epoch": 289.8235294117647, "grad_norm": 0.3374854326248169, "learning_rate": 3.7221075989021093e-06, "loss": 0.0045, "step": 4927 }, { "epoch": 289.88235294117646, "grad_norm": 0.34605127573013306, "learning_rate": 3.7184009680621256e-06, "loss": 0.0042, "step": 4928 }, { "epoch": 289.94117647058823, "grad_norm": 0.28431081771850586, "learning_rate": 3.7146957622017652e-06, "loss": 0.0052, "step": 4929 }, { "epoch": 290.0, "grad_norm": 0.4310051202774048, "learning_rate": 3.7109919821615546e-06, "loss": 0.005, "step": 4930 }, { "epoch": 290.05882352941177, "grad_norm": 0.30761680006980896, "learning_rate": 3.707289628781697e-06, "loss": 0.0054, "step": 4931 }, { "epoch": 290.11764705882354, "grad_norm": 0.25463151931762695, "learning_rate": 3.703588702902066e-06, "loss": 0.0029, "step": 4932 }, { "epoch": 290.1764705882353, "grad_norm": 0.4018896818161011, "learning_rate": 3.699889205362218e-06, "loss": 0.0046, "step": 4933 }, { "epoch": 290.2352941176471, "grad_norm": 0.26059576869010925, "learning_rate": 3.696191137001384e-06, "loss": 0.0051, "step": 4934 }, { "epoch": 290.29411764705884, "grad_norm": 0.23658467829227448, "learning_rate": 3.692494498658472e-06, "loss": 0.0046, "step": 4935 }, { "epoch": 290.3529411764706, "grad_norm": 0.2389429658651352, "learning_rate": 3.6887992911720593e-06, "loss": 0.0033, "step": 4936 }, { "epoch": 290.4117647058824, "grad_norm": 0.4078180491924286, "learning_rate": 3.685105515380405e-06, "loss": 0.0058, "step": 4937 }, { "epoch": 290.47058823529414, "grad_norm": 0.4476725459098816, "learning_rate": 3.6814131721214454e-06, "loss": 0.0057, "step": 4938 }, { "epoch": 290.52941176470586, "grad_norm": 0.318683922290802, "learning_rate": 3.6777222622327804e-06, "loss": 0.0064, "step": 4939 }, { "epoch": 290.5882352941176, "grad_norm": 0.306680291891098, "learning_rate": 3.6740327865516967e-06, "loss": 0.0033, "step": 4940 }, { "epoch": 290.6470588235294, "grad_norm": 0.1952824592590332, "learning_rate": 3.67034474591515e-06, "loss": 0.0024, "step": 4941 }, { "epoch": 290.70588235294116, "grad_norm": 0.2531048655509949, "learning_rate": 3.6666581411597767e-06, "loss": 0.0035, "step": 4942 }, { "epoch": 290.7647058823529, "grad_norm": 0.5835723876953125, "learning_rate": 3.6629729731218745e-06, "loss": 0.0086, "step": 4943 }, { "epoch": 290.8235294117647, "grad_norm": 0.4324885606765747, "learning_rate": 3.659289242637426e-06, "loss": 0.003, "step": 4944 }, { "epoch": 290.88235294117646, "grad_norm": 0.37241098284721375, "learning_rate": 3.6556069505420856e-06, "loss": 0.0027, "step": 4945 }, { "epoch": 290.94117647058823, "grad_norm": 0.3961423933506012, "learning_rate": 3.6519260976711845e-06, "loss": 0.0043, "step": 4946 }, { "epoch": 291.0, "grad_norm": 0.9465956091880798, "learning_rate": 3.6482466848597164e-06, "loss": 0.0064, "step": 4947 }, { "epoch": 291.05882352941177, "grad_norm": 0.7645736932754517, "learning_rate": 3.644568712942358e-06, "loss": 0.0048, "step": 4948 }, { "epoch": 291.11764705882354, "grad_norm": 0.7285260558128357, "learning_rate": 3.640892182753457e-06, "loss": 0.009, "step": 4949 }, { "epoch": 291.1764705882353, "grad_norm": 0.3321298360824585, "learning_rate": 3.637217095127037e-06, "loss": 0.0067, "step": 4950 }, { "epoch": 291.2352941176471, "grad_norm": 0.3918488323688507, "learning_rate": 3.633543450896785e-06, "loss": 0.0054, "step": 4951 }, { "epoch": 291.29411764705884, "grad_norm": 0.37821611762046814, "learning_rate": 3.629871250896069e-06, "loss": 0.0063, "step": 4952 }, { "epoch": 291.3529411764706, "grad_norm": 0.3178195059299469, "learning_rate": 3.62620049595793e-06, "loss": 0.0038, "step": 4953 }, { "epoch": 291.4117647058824, "grad_norm": 0.30227845907211304, "learning_rate": 3.6225311869150724e-06, "loss": 0.004, "step": 4954 }, { "epoch": 291.47058823529414, "grad_norm": 0.3428025543689728, "learning_rate": 3.6188633245998816e-06, "loss": 0.0048, "step": 4955 }, { "epoch": 291.52941176470586, "grad_norm": 0.2932335138320923, "learning_rate": 3.6151969098444108e-06, "loss": 0.0037, "step": 4956 }, { "epoch": 291.5882352941176, "grad_norm": 0.717197835445404, "learning_rate": 3.6115319434803897e-06, "loss": 0.0034, "step": 4957 }, { "epoch": 291.6470588235294, "grad_norm": 0.5986688733100891, "learning_rate": 3.6078684263392073e-06, "loss": 0.0054, "step": 4958 }, { "epoch": 291.70588235294116, "grad_norm": 0.23264014720916748, "learning_rate": 3.604206359251936e-06, "loss": 0.0035, "step": 4959 }, { "epoch": 291.7647058823529, "grad_norm": 0.15091371536254883, "learning_rate": 3.600545743049315e-06, "loss": 0.0031, "step": 4960 }, { "epoch": 291.8235294117647, "grad_norm": 0.1864081621170044, "learning_rate": 3.596886578561758e-06, "loss": 0.0025, "step": 4961 }, { "epoch": 291.88235294117646, "grad_norm": 0.19982010126113892, "learning_rate": 3.5932288666193395e-06, "loss": 0.0033, "step": 4962 }, { "epoch": 291.94117647058823, "grad_norm": 0.2376984804868698, "learning_rate": 3.5895726080518122e-06, "loss": 0.0049, "step": 4963 }, { "epoch": 292.0, "grad_norm": 0.2380775809288025, "learning_rate": 3.585917803688603e-06, "loss": 0.0032, "step": 4964 }, { "epoch": 292.05882352941177, "grad_norm": 0.1338592916727066, "learning_rate": 3.582264454358797e-06, "loss": 0.0018, "step": 4965 }, { "epoch": 292.11764705882354, "grad_norm": 0.2769288122653961, "learning_rate": 3.578612560891158e-06, "loss": 0.0048, "step": 4966 }, { "epoch": 292.1764705882353, "grad_norm": 0.3298592269420624, "learning_rate": 3.5749621241141185e-06, "loss": 0.0059, "step": 4967 }, { "epoch": 292.2352941176471, "grad_norm": 0.41475018858909607, "learning_rate": 3.5713131448557814e-06, "loss": 0.0055, "step": 4968 }, { "epoch": 292.29411764705884, "grad_norm": 0.2692807912826538, "learning_rate": 3.567665623943911e-06, "loss": 0.0051, "step": 4969 }, { "epoch": 292.3529411764706, "grad_norm": 0.35486602783203125, "learning_rate": 3.5640195622059503e-06, "loss": 0.0042, "step": 4970 }, { "epoch": 292.4117647058824, "grad_norm": 0.3385829031467438, "learning_rate": 3.5603749604690085e-06, "loss": 0.004, "step": 4971 }, { "epoch": 292.47058823529414, "grad_norm": 0.2548176348209381, "learning_rate": 3.5567318195598643e-06, "loss": 0.0032, "step": 4972 }, { "epoch": 292.52941176470586, "grad_norm": 0.3115980625152588, "learning_rate": 3.553090140304959e-06, "loss": 0.0042, "step": 4973 }, { "epoch": 292.5882352941176, "grad_norm": 0.3431241810321808, "learning_rate": 3.549449923530408e-06, "loss": 0.0032, "step": 4974 }, { "epoch": 292.6470588235294, "grad_norm": 0.18292851746082306, "learning_rate": 3.5458111700619957e-06, "loss": 0.0022, "step": 4975 }, { "epoch": 292.70588235294116, "grad_norm": 0.29608461260795593, "learning_rate": 3.542173880725176e-06, "loss": 0.0067, "step": 4976 }, { "epoch": 292.7647058823529, "grad_norm": 0.269024133682251, "learning_rate": 3.538538056345059e-06, "loss": 0.0057, "step": 4977 }, { "epoch": 292.8235294117647, "grad_norm": 0.17287348210811615, "learning_rate": 3.534903697746436e-06, "loss": 0.0024, "step": 4978 }, { "epoch": 292.88235294117646, "grad_norm": 0.2043771892786026, "learning_rate": 3.5312708057537613e-06, "loss": 0.0035, "step": 4979 }, { "epoch": 292.94117647058823, "grad_norm": 0.5585170984268188, "learning_rate": 3.5276393811911524e-06, "loss": 0.0079, "step": 4980 }, { "epoch": 293.0, "grad_norm": 0.23796385526657104, "learning_rate": 3.5240094248824e-06, "loss": 0.0032, "step": 4981 }, { "epoch": 293.05882352941177, "grad_norm": 0.34245502948760986, "learning_rate": 3.5203809376509567e-06, "loss": 0.0059, "step": 4982 }, { "epoch": 293.11764705882354, "grad_norm": 0.4581116735935211, "learning_rate": 3.5167539203199486e-06, "loss": 0.0094, "step": 4983 }, { "epoch": 293.1764705882353, "grad_norm": 0.2199900895357132, "learning_rate": 3.5131283737121592e-06, "loss": 0.003, "step": 4984 }, { "epoch": 293.2352941176471, "grad_norm": 0.2835700511932373, "learning_rate": 3.5095042986500437e-06, "loss": 0.0039, "step": 4985 }, { "epoch": 293.29411764705884, "grad_norm": 0.4262095093727112, "learning_rate": 3.505881695955725e-06, "loss": 0.0028, "step": 4986 }, { "epoch": 293.3529411764706, "grad_norm": 0.27176252007484436, "learning_rate": 3.5022605664509923e-06, "loss": 0.0053, "step": 4987 }, { "epoch": 293.4117647058824, "grad_norm": 0.33669745922088623, "learning_rate": 3.4986409109572906e-06, "loss": 0.0059, "step": 4988 }, { "epoch": 293.47058823529414, "grad_norm": 0.33765801787376404, "learning_rate": 3.495022730295743e-06, "loss": 0.0034, "step": 4989 }, { "epoch": 293.52941176470586, "grad_norm": 0.2737879455089569, "learning_rate": 3.4914060252871364e-06, "loss": 0.0033, "step": 4990 }, { "epoch": 293.5882352941176, "grad_norm": 0.42379266023635864, "learning_rate": 3.487790796751912e-06, "loss": 0.0044, "step": 4991 }, { "epoch": 293.6470588235294, "grad_norm": 0.22473087906837463, "learning_rate": 3.4841770455101875e-06, "loss": 0.0032, "step": 4992 }, { "epoch": 293.70588235294116, "grad_norm": 0.22720028460025787, "learning_rate": 3.4805647723817415e-06, "loss": 0.0047, "step": 4993 }, { "epoch": 293.7647058823529, "grad_norm": 0.4239369034767151, "learning_rate": 3.4769539781860216e-06, "loss": 0.0039, "step": 4994 }, { "epoch": 293.8235294117647, "grad_norm": 0.48713254928588867, "learning_rate": 3.4733446637421285e-06, "loss": 0.0045, "step": 4995 }, { "epoch": 293.88235294117646, "grad_norm": 0.3434387445449829, "learning_rate": 3.4697368298688374e-06, "loss": 0.0066, "step": 4996 }, { "epoch": 293.94117647058823, "grad_norm": 0.27064889669418335, "learning_rate": 3.466130477384586e-06, "loss": 0.0027, "step": 4997 }, { "epoch": 294.0, "grad_norm": 0.23694559931755066, "learning_rate": 3.4625256071074776e-06, "loss": 0.0035, "step": 4998 }, { "epoch": 294.05882352941177, "grad_norm": 0.22384975850582123, "learning_rate": 3.4589222198552698e-06, "loss": 0.0021, "step": 4999 }, { "epoch": 294.11764705882354, "grad_norm": 0.3421096205711365, "learning_rate": 3.455320316445393e-06, "loss": 0.006, "step": 5000 }, { "epoch": 294.1764705882353, "grad_norm": 0.24973218142986298, "learning_rate": 3.4517198976949397e-06, "loss": 0.0034, "step": 5001 }, { "epoch": 294.2352941176471, "grad_norm": 0.2979520559310913, "learning_rate": 3.448120964420666e-06, "loss": 0.0044, "step": 5002 }, { "epoch": 294.29411764705884, "grad_norm": 0.2936684191226959, "learning_rate": 3.4445235174389836e-06, "loss": 0.0051, "step": 5003 }, { "epoch": 294.3529411764706, "grad_norm": 0.6321770548820496, "learning_rate": 3.4409275575659763e-06, "loss": 0.0041, "step": 5004 }, { "epoch": 294.4117647058824, "grad_norm": 0.3878605365753174, "learning_rate": 3.437333085617388e-06, "loss": 0.0046, "step": 5005 }, { "epoch": 294.47058823529414, "grad_norm": 0.39267852902412415, "learning_rate": 3.4337401024086212e-06, "loss": 0.0053, "step": 5006 }, { "epoch": 294.52941176470586, "grad_norm": 0.14154483377933502, "learning_rate": 3.430148608754743e-06, "loss": 0.0022, "step": 5007 }, { "epoch": 294.5882352941176, "grad_norm": 0.3644152581691742, "learning_rate": 3.4265586054704857e-06, "loss": 0.0044, "step": 5008 }, { "epoch": 294.6470588235294, "grad_norm": 0.38959410786628723, "learning_rate": 3.4229700933702435e-06, "loss": 0.0052, "step": 5009 }, { "epoch": 294.70588235294116, "grad_norm": 0.4728477895259857, "learning_rate": 3.419383073268063e-06, "loss": 0.0038, "step": 5010 }, { "epoch": 294.7647058823529, "grad_norm": 0.48801225423812866, "learning_rate": 3.4157975459776626e-06, "loss": 0.0079, "step": 5011 }, { "epoch": 294.8235294117647, "grad_norm": 0.20593592524528503, "learning_rate": 3.412213512312418e-06, "loss": 0.0027, "step": 5012 }, { "epoch": 294.88235294117646, "grad_norm": 0.3721805214881897, "learning_rate": 3.4086309730853706e-06, "loss": 0.004, "step": 5013 }, { "epoch": 294.94117647058823, "grad_norm": 0.3329039216041565, "learning_rate": 3.405049929109211e-06, "loss": 0.0059, "step": 5014 }, { "epoch": 295.0, "grad_norm": 0.239998459815979, "learning_rate": 3.4014703811963024e-06, "loss": 0.0027, "step": 5015 }, { "epoch": 295.05882352941177, "grad_norm": 0.5439473986625671, "learning_rate": 3.3978923301586654e-06, "loss": 0.0033, "step": 5016 }, { "epoch": 295.11764705882354, "grad_norm": 0.34678375720977783, "learning_rate": 3.3943157768079825e-06, "loss": 0.0037, "step": 5017 }, { "epoch": 295.1764705882353, "grad_norm": 0.5214689373970032, "learning_rate": 3.3907407219555876e-06, "loss": 0.0054, "step": 5018 }, { "epoch": 295.2352941176471, "grad_norm": 0.45792728662490845, "learning_rate": 3.387167166412486e-06, "loss": 0.008, "step": 5019 }, { "epoch": 295.29411764705884, "grad_norm": 0.20462609827518463, "learning_rate": 3.3835951109893395e-06, "loss": 0.003, "step": 5020 }, { "epoch": 295.3529411764706, "grad_norm": 0.20568746328353882, "learning_rate": 3.3800245564964616e-06, "loss": 0.0033, "step": 5021 }, { "epoch": 295.4117647058824, "grad_norm": 0.8798657059669495, "learning_rate": 3.3764555037438375e-06, "loss": 0.007, "step": 5022 }, { "epoch": 295.47058823529414, "grad_norm": 0.3873843848705292, "learning_rate": 3.3728879535411043e-06, "loss": 0.0064, "step": 5023 }, { "epoch": 295.52941176470586, "grad_norm": 0.19472602009773254, "learning_rate": 3.3693219066975635e-06, "loss": 0.003, "step": 5024 }, { "epoch": 295.5882352941176, "grad_norm": 0.25824832916259766, "learning_rate": 3.3657573640221664e-06, "loss": 0.0034, "step": 5025 }, { "epoch": 295.6470588235294, "grad_norm": 0.2966550588607788, "learning_rate": 3.3621943263235324e-06, "loss": 0.005, "step": 5026 }, { "epoch": 295.70588235294116, "grad_norm": 0.30883949995040894, "learning_rate": 3.358632794409934e-06, "loss": 0.0042, "step": 5027 }, { "epoch": 295.7647058823529, "grad_norm": 0.25303512811660767, "learning_rate": 3.3550727690893104e-06, "loss": 0.0028, "step": 5028 }, { "epoch": 295.8235294117647, "grad_norm": 0.6095011234283447, "learning_rate": 3.3515142511692433e-06, "loss": 0.0091, "step": 5029 }, { "epoch": 295.88235294117646, "grad_norm": 0.3221907615661621, "learning_rate": 3.347957241456986e-06, "loss": 0.0056, "step": 5030 }, { "epoch": 295.94117647058823, "grad_norm": 0.2830073833465576, "learning_rate": 3.34440174075945e-06, "loss": 0.0045, "step": 5031 }, { "epoch": 296.0, "grad_norm": 0.21347053349018097, "learning_rate": 3.3408477498831917e-06, "loss": 0.0025, "step": 5032 }, { "epoch": 296.05882352941177, "grad_norm": 0.44081488251686096, "learning_rate": 3.3372952696344373e-06, "loss": 0.0076, "step": 5033 }, { "epoch": 296.11764705882354, "grad_norm": 0.2730230689048767, "learning_rate": 3.3337443008190674e-06, "loss": 0.0025, "step": 5034 }, { "epoch": 296.1764705882353, "grad_norm": 0.9115239381790161, "learning_rate": 3.33019484424262e-06, "loss": 0.0047, "step": 5035 }, { "epoch": 296.2352941176471, "grad_norm": 0.24623934924602509, "learning_rate": 3.326646900710284e-06, "loss": 0.0033, "step": 5036 }, { "epoch": 296.29411764705884, "grad_norm": 0.2540058493614197, "learning_rate": 3.323100471026912e-06, "loss": 0.0032, "step": 5037 }, { "epoch": 296.3529411764706, "grad_norm": 0.18820305168628693, "learning_rate": 3.3195555559970105e-06, "loss": 0.0027, "step": 5038 }, { "epoch": 296.4117647058824, "grad_norm": 0.4041934907436371, "learning_rate": 3.3160121564247474e-06, "loss": 0.008, "step": 5039 }, { "epoch": 296.47058823529414, "grad_norm": 0.22862762212753296, "learning_rate": 3.3124702731139345e-06, "loss": 0.0035, "step": 5040 }, { "epoch": 296.52941176470586, "grad_norm": 0.35262587666511536, "learning_rate": 3.3089299068680513e-06, "loss": 0.0073, "step": 5041 }, { "epoch": 296.5882352941176, "grad_norm": 0.31616488099098206, "learning_rate": 3.3053910584902293e-06, "loss": 0.0044, "step": 5042 }, { "epoch": 296.6470588235294, "grad_norm": 0.36122044920921326, "learning_rate": 3.3018537287832585e-06, "loss": 0.0038, "step": 5043 }, { "epoch": 296.70588235294116, "grad_norm": 0.3117813766002655, "learning_rate": 3.298317918549575e-06, "loss": 0.006, "step": 5044 }, { "epoch": 296.7647058823529, "grad_norm": 0.22167141735553741, "learning_rate": 3.2947836285912803e-06, "loss": 0.0029, "step": 5045 }, { "epoch": 296.8235294117647, "grad_norm": 0.3622724711894989, "learning_rate": 3.291250859710131e-06, "loss": 0.0044, "step": 5046 }, { "epoch": 296.88235294117646, "grad_norm": 0.5625202059745789, "learning_rate": 3.287719612707528e-06, "loss": 0.0049, "step": 5047 }, { "epoch": 296.94117647058823, "grad_norm": 0.26941755414009094, "learning_rate": 3.2841898883845335e-06, "loss": 0.0029, "step": 5048 }, { "epoch": 297.0, "grad_norm": 0.32957038283348083, "learning_rate": 3.280661687541876e-06, "loss": 0.0066, "step": 5049 }, { "epoch": 297.05882352941177, "grad_norm": 0.2667076587677002, "learning_rate": 3.2771350109799173e-06, "loss": 0.0033, "step": 5050 }, { "epoch": 297.11764705882354, "grad_norm": 0.320818156003952, "learning_rate": 3.2736098594986853e-06, "loss": 0.0049, "step": 5051 }, { "epoch": 297.1764705882353, "grad_norm": 0.2643278241157532, "learning_rate": 3.270086233897861e-06, "loss": 0.0055, "step": 5052 }, { "epoch": 297.2352941176471, "grad_norm": 0.3261861205101013, "learning_rate": 3.2665641349767818e-06, "loss": 0.0041, "step": 5053 }, { "epoch": 297.29411764705884, "grad_norm": 0.22410330176353455, "learning_rate": 3.2630435635344283e-06, "loss": 0.0029, "step": 5054 }, { "epoch": 297.3529411764706, "grad_norm": 0.2443872094154358, "learning_rate": 3.2595245203694435e-06, "loss": 0.0023, "step": 5055 }, { "epoch": 297.4117647058824, "grad_norm": 0.22941479086875916, "learning_rate": 3.256007006280124e-06, "loss": 0.0027, "step": 5056 }, { "epoch": 297.47058823529414, "grad_norm": 0.288858562707901, "learning_rate": 3.252491022064419e-06, "loss": 0.0043, "step": 5057 }, { "epoch": 297.52941176470586, "grad_norm": 0.4385441541671753, "learning_rate": 3.2489765685199227e-06, "loss": 0.0042, "step": 5058 }, { "epoch": 297.5882352941176, "grad_norm": 0.25460848212242126, "learning_rate": 3.2454636464438916e-06, "loss": 0.0042, "step": 5059 }, { "epoch": 297.6470588235294, "grad_norm": 0.530293881893158, "learning_rate": 3.241952256633234e-06, "loss": 0.0068, "step": 5060 }, { "epoch": 297.70588235294116, "grad_norm": 1.2022805213928223, "learning_rate": 3.2384423998845027e-06, "loss": 0.0049, "step": 5061 }, { "epoch": 297.7647058823529, "grad_norm": 0.314973384141922, "learning_rate": 3.2349340769939107e-06, "loss": 0.0028, "step": 5062 }, { "epoch": 297.8235294117647, "grad_norm": 0.2776920795440674, "learning_rate": 3.2314272887573216e-06, "loss": 0.0048, "step": 5063 }, { "epoch": 297.88235294117646, "grad_norm": 0.37969014048576355, "learning_rate": 3.2279220359702513e-06, "loss": 0.0071, "step": 5064 }, { "epoch": 297.94117647058823, "grad_norm": 0.21562278270721436, "learning_rate": 3.22441831942786e-06, "loss": 0.0025, "step": 5065 }, { "epoch": 298.0, "grad_norm": 0.41813215613365173, "learning_rate": 3.2209161399249677e-06, "loss": 0.006, "step": 5066 }, { "epoch": 298.05882352941177, "grad_norm": 0.46210306882858276, "learning_rate": 3.2174154982560445e-06, "loss": 0.0056, "step": 5067 }, { "epoch": 298.11764705882354, "grad_norm": 0.3438449203968048, "learning_rate": 3.2139163952152117e-06, "loss": 0.0065, "step": 5068 }, { "epoch": 298.1764705882353, "grad_norm": 0.24953021109104156, "learning_rate": 3.210418831596236e-06, "loss": 0.0032, "step": 5069 }, { "epoch": 298.2352941176471, "grad_norm": 0.26152554154396057, "learning_rate": 3.206922808192542e-06, "loss": 0.0032, "step": 5070 }, { "epoch": 298.29411764705884, "grad_norm": 0.24983376264572144, "learning_rate": 3.2034283257971997e-06, "loss": 0.0018, "step": 5071 }, { "epoch": 298.3529411764706, "grad_norm": 0.3913223445415497, "learning_rate": 3.199935385202937e-06, "loss": 0.0041, "step": 5072 }, { "epoch": 298.4117647058824, "grad_norm": 0.48729267716407776, "learning_rate": 3.1964439872021203e-06, "loss": 0.005, "step": 5073 }, { "epoch": 298.47058823529414, "grad_norm": 0.2577756941318512, "learning_rate": 3.192954132586775e-06, "loss": 0.0032, "step": 5074 }, { "epoch": 298.52941176470586, "grad_norm": 0.3616645336151123, "learning_rate": 3.1894658221485785e-06, "loss": 0.0056, "step": 5075 }, { "epoch": 298.5882352941176, "grad_norm": 0.18583618104457855, "learning_rate": 3.1859790566788464e-06, "loss": 0.0018, "step": 5076 }, { "epoch": 298.6470588235294, "grad_norm": 0.39654138684272766, "learning_rate": 3.182493836968553e-06, "loss": 0.0038, "step": 5077 }, { "epoch": 298.70588235294116, "grad_norm": 0.32300955057144165, "learning_rate": 3.1790101638083203e-06, "loss": 0.0078, "step": 5078 }, { "epoch": 298.7647058823529, "grad_norm": 0.36271941661834717, "learning_rate": 3.1755280379884234e-06, "loss": 0.0057, "step": 5079 }, { "epoch": 298.8235294117647, "grad_norm": 0.2814142107963562, "learning_rate": 3.172047460298774e-06, "loss": 0.0027, "step": 5080 }, { "epoch": 298.88235294117646, "grad_norm": 0.3731245696544647, "learning_rate": 3.1685684315289445e-06, "loss": 0.0095, "step": 5081 }, { "epoch": 298.94117647058823, "grad_norm": 0.1557496041059494, "learning_rate": 3.1650909524681515e-06, "loss": 0.0026, "step": 5082 }, { "epoch": 299.0, "grad_norm": 0.1861059069633484, "learning_rate": 3.1616150239052647e-06, "loss": 0.0031, "step": 5083 }, { "epoch": 299.05882352941177, "grad_norm": 0.3548262417316437, "learning_rate": 3.15814064662879e-06, "loss": 0.0032, "step": 5084 }, { "epoch": 299.11764705882354, "grad_norm": 0.45091503858566284, "learning_rate": 3.1546678214268933e-06, "loss": 0.005, "step": 5085 }, { "epoch": 299.1764705882353, "grad_norm": 0.2446107417345047, "learning_rate": 3.1511965490873876e-06, "loss": 0.0034, "step": 5086 }, { "epoch": 299.2352941176471, "grad_norm": 0.7371914386749268, "learning_rate": 3.1477268303977236e-06, "loss": 0.0035, "step": 5087 }, { "epoch": 299.29411764705884, "grad_norm": 1.3087167739868164, "learning_rate": 3.1442586661450102e-06, "loss": 0.0079, "step": 5088 }, { "epoch": 299.3529411764706, "grad_norm": 0.44898417592048645, "learning_rate": 3.140792057116e-06, "loss": 0.008, "step": 5089 }, { "epoch": 299.4117647058824, "grad_norm": 0.18012621998786926, "learning_rate": 3.137327004097095e-06, "loss": 0.0029, "step": 5090 }, { "epoch": 299.47058823529414, "grad_norm": 0.2167489230632782, "learning_rate": 3.133863507874335e-06, "loss": 0.0028, "step": 5091 }, { "epoch": 299.52941176470586, "grad_norm": 0.3843235969543457, "learning_rate": 3.130401569233418e-06, "loss": 0.0035, "step": 5092 }, { "epoch": 299.5882352941176, "grad_norm": 0.837277889251709, "learning_rate": 3.1269411889596845e-06, "loss": 0.0048, "step": 5093 }, { "epoch": 299.6470588235294, "grad_norm": 0.5502575635910034, "learning_rate": 3.123482367838122e-06, "loss": 0.0046, "step": 5094 }, { "epoch": 299.70588235294116, "grad_norm": 0.46360301971435547, "learning_rate": 3.1200251066533606e-06, "loss": 0.0043, "step": 5095 }, { "epoch": 299.7647058823529, "grad_norm": 0.29449301958084106, "learning_rate": 3.1165694061896788e-06, "loss": 0.0046, "step": 5096 }, { "epoch": 299.8235294117647, "grad_norm": 0.3558734357357025, "learning_rate": 3.113115267231004e-06, "loss": 0.0092, "step": 5097 }, { "epoch": 299.88235294117646, "grad_norm": 0.23418685793876648, "learning_rate": 3.1096626905609095e-06, "loss": 0.0043, "step": 5098 }, { "epoch": 299.94117647058823, "grad_norm": 0.1631009429693222, "learning_rate": 3.106211676962606e-06, "loss": 0.0034, "step": 5099 }, { "epoch": 300.0, "grad_norm": 1.1248925924301147, "learning_rate": 3.1027622272189572e-06, "loss": 0.0056, "step": 5100 }, { "epoch": 300.05882352941177, "grad_norm": 0.4873630404472351, "learning_rate": 3.099314342112475e-06, "loss": 0.0062, "step": 5101 }, { "epoch": 300.11764705882354, "grad_norm": 0.36406609416007996, "learning_rate": 3.0958680224253034e-06, "loss": 0.0068, "step": 5102 }, { "epoch": 300.1764705882353, "grad_norm": 0.3879725933074951, "learning_rate": 3.092423268939243e-06, "loss": 0.0048, "step": 5103 }, { "epoch": 300.2352941176471, "grad_norm": 0.35239148139953613, "learning_rate": 3.0889800824357374e-06, "loss": 0.0049, "step": 5104 }, { "epoch": 300.29411764705884, "grad_norm": 0.5288870930671692, "learning_rate": 3.0855384636958742e-06, "loss": 0.0054, "step": 5105 }, { "epoch": 300.3529411764706, "grad_norm": 0.24803736805915833, "learning_rate": 3.0820984135003773e-06, "loss": 0.0048, "step": 5106 }, { "epoch": 300.4117647058824, "grad_norm": 0.2523373067378998, "learning_rate": 3.078659932629625e-06, "loss": 0.0028, "step": 5107 }, { "epoch": 300.47058823529414, "grad_norm": 0.13963212072849274, "learning_rate": 3.075223021863638e-06, "loss": 0.0019, "step": 5108 }, { "epoch": 300.52941176470586, "grad_norm": 0.2204269915819168, "learning_rate": 3.0717876819820793e-06, "loss": 0.0028, "step": 5109 }, { "epoch": 300.5882352941176, "grad_norm": 0.20477457344532013, "learning_rate": 3.0683539137642504e-06, "loss": 0.0037, "step": 5110 }, { "epoch": 300.6470588235294, "grad_norm": 0.30093926191329956, "learning_rate": 3.0649217179891045e-06, "loss": 0.003, "step": 5111 }, { "epoch": 300.70588235294116, "grad_norm": 0.2777624726295471, "learning_rate": 3.0614910954352363e-06, "loss": 0.0046, "step": 5112 }, { "epoch": 300.7647058823529, "grad_norm": 1.3247343301773071, "learning_rate": 3.0580620468808763e-06, "loss": 0.0058, "step": 5113 }, { "epoch": 300.8235294117647, "grad_norm": 0.2604488730430603, "learning_rate": 3.0546345731039063e-06, "loss": 0.0049, "step": 5114 }, { "epoch": 300.88235294117646, "grad_norm": 0.3197847902774811, "learning_rate": 3.0512086748818494e-06, "loss": 0.0036, "step": 5115 }, { "epoch": 300.94117647058823, "grad_norm": 0.27862465381622314, "learning_rate": 3.047784352991873e-06, "loss": 0.0044, "step": 5116 }, { "epoch": 301.0, "grad_norm": 0.56915283203125, "learning_rate": 3.0443616082107753e-06, "loss": 0.0046, "step": 5117 }, { "epoch": 301.05882352941177, "grad_norm": 0.3749302923679352, "learning_rate": 3.040940441315011e-06, "loss": 0.0046, "step": 5118 }, { "epoch": 301.11764705882354, "grad_norm": 0.15893767774105072, "learning_rate": 3.03752085308067e-06, "loss": 0.0026, "step": 5119 }, { "epoch": 301.1764705882353, "grad_norm": 0.3658201992511749, "learning_rate": 3.0341028442834897e-06, "loss": 0.0039, "step": 5120 }, { "epoch": 301.2352941176471, "grad_norm": 1.7125252485275269, "learning_rate": 3.030686415698837e-06, "loss": 0.0045, "step": 5121 }, { "epoch": 301.29411764705884, "grad_norm": 0.34323880076408386, "learning_rate": 3.0272715681017308e-06, "loss": 0.0047, "step": 5122 }, { "epoch": 301.3529411764706, "grad_norm": 0.17215201258659363, "learning_rate": 3.023858302266831e-06, "loss": 0.0029, "step": 5123 }, { "epoch": 301.4117647058824, "grad_norm": 0.3302001357078552, "learning_rate": 3.020446618968438e-06, "loss": 0.0052, "step": 5124 }, { "epoch": 301.47058823529414, "grad_norm": 0.3681977093219757, "learning_rate": 3.0170365189804853e-06, "loss": 0.0073, "step": 5125 }, { "epoch": 301.52941176470586, "grad_norm": 0.9341127276420593, "learning_rate": 3.0136280030765564e-06, "loss": 0.0028, "step": 5126 }, { "epoch": 301.5882352941176, "grad_norm": 0.42712005972862244, "learning_rate": 3.0102210720298763e-06, "loss": 0.0055, "step": 5127 }, { "epoch": 301.6470588235294, "grad_norm": 0.26539814472198486, "learning_rate": 3.0068157266133004e-06, "loss": 0.0041, "step": 5128 }, { "epoch": 301.70588235294116, "grad_norm": 0.3621842861175537, "learning_rate": 3.003411967599333e-06, "loss": 0.0055, "step": 5129 }, { "epoch": 301.7647058823529, "grad_norm": 0.20520097017288208, "learning_rate": 3.000009795760118e-06, "loss": 0.0037, "step": 5130 }, { "epoch": 301.8235294117647, "grad_norm": 0.1802523136138916, "learning_rate": 2.9966092118674383e-06, "loss": 0.0025, "step": 5131 }, { "epoch": 301.88235294117646, "grad_norm": 0.3101455569267273, "learning_rate": 2.9932102166927114e-06, "loss": 0.0033, "step": 5132 }, { "epoch": 301.94117647058823, "grad_norm": 0.37225431203842163, "learning_rate": 2.9898128110070013e-06, "loss": 0.0042, "step": 5133 }, { "epoch": 302.0, "grad_norm": 0.22695836424827576, "learning_rate": 2.9864169955810085e-06, "loss": 0.0046, "step": 5134 }, { "epoch": 302.05882352941177, "grad_norm": 0.20497460663318634, "learning_rate": 2.9830227711850756e-06, "loss": 0.0043, "step": 5135 }, { "epoch": 302.11764705882354, "grad_norm": 0.2203642874956131, "learning_rate": 2.9796301385891767e-06, "loss": 0.004, "step": 5136 }, { "epoch": 302.1764705882353, "grad_norm": 0.33191797137260437, "learning_rate": 2.976239098562933e-06, "loss": 0.0033, "step": 5137 }, { "epoch": 302.2352941176471, "grad_norm": 0.37129512429237366, "learning_rate": 2.9728496518756046e-06, "loss": 0.004, "step": 5138 }, { "epoch": 302.29411764705884, "grad_norm": 0.22902603447437286, "learning_rate": 2.9694617992960797e-06, "loss": 0.0033, "step": 5139 }, { "epoch": 302.3529411764706, "grad_norm": 0.4476611912250519, "learning_rate": 2.9660755415928955e-06, "loss": 0.0052, "step": 5140 }, { "epoch": 302.4117647058824, "grad_norm": 0.3820974826812744, "learning_rate": 2.9626908795342246e-06, "loss": 0.0064, "step": 5141 }, { "epoch": 302.47058823529414, "grad_norm": 0.3100299835205078, "learning_rate": 2.9593078138878795e-06, "loss": 0.0055, "step": 5142 }, { "epoch": 302.52941176470586, "grad_norm": 0.20964114367961884, "learning_rate": 2.9559263454213016e-06, "loss": 0.0048, "step": 5143 }, { "epoch": 302.5882352941176, "grad_norm": 0.3121068477630615, "learning_rate": 2.9525464749015797e-06, "loss": 0.0034, "step": 5144 }, { "epoch": 302.6470588235294, "grad_norm": 0.18130141496658325, "learning_rate": 2.949168203095438e-06, "loss": 0.0026, "step": 5145 }, { "epoch": 302.70588235294116, "grad_norm": 0.4094892144203186, "learning_rate": 2.9457915307692387e-06, "loss": 0.0044, "step": 5146 }, { "epoch": 302.7647058823529, "grad_norm": 0.2983984649181366, "learning_rate": 2.9424164586889725e-06, "loss": 0.0031, "step": 5147 }, { "epoch": 302.8235294117647, "grad_norm": 0.6380226612091064, "learning_rate": 2.9390429876202788e-06, "loss": 0.0095, "step": 5148 }, { "epoch": 302.88235294117646, "grad_norm": 0.18526583909988403, "learning_rate": 2.9356711183284283e-06, "loss": 0.0024, "step": 5149 }, { "epoch": 302.94117647058823, "grad_norm": 0.3079933226108551, "learning_rate": 2.93230085157833e-06, "loss": 0.0038, "step": 5150 }, { "epoch": 303.0, "grad_norm": 0.4308571517467499, "learning_rate": 2.9289321881345257e-06, "loss": 0.0052, "step": 5151 }, { "epoch": 303.05882352941177, "grad_norm": 0.21601830422878265, "learning_rate": 2.9255651287611964e-06, "loss": 0.0035, "step": 5152 }, { "epoch": 303.11764705882354, "grad_norm": 0.4933680593967438, "learning_rate": 2.9221996742221636e-06, "loss": 0.0086, "step": 5153 }, { "epoch": 303.1764705882353, "grad_norm": 0.29560741782188416, "learning_rate": 2.9188358252808725e-06, "loss": 0.0034, "step": 5154 }, { "epoch": 303.2352941176471, "grad_norm": 0.41519370675086975, "learning_rate": 2.9154735827004166e-06, "loss": 0.0028, "step": 5155 }, { "epoch": 303.29411764705884, "grad_norm": 0.23358629643917084, "learning_rate": 2.9121129472435183e-06, "loss": 0.0043, "step": 5156 }, { "epoch": 303.3529411764706, "grad_norm": 0.4842175841331482, "learning_rate": 2.90875391967254e-06, "loss": 0.004, "step": 5157 }, { "epoch": 303.4117647058824, "grad_norm": 0.49429893493652344, "learning_rate": 2.9053965007494723e-06, "loss": 0.005, "step": 5158 }, { "epoch": 303.47058823529414, "grad_norm": 0.27337905764579773, "learning_rate": 2.9020406912359468e-06, "loss": 0.0053, "step": 5159 }, { "epoch": 303.52941176470586, "grad_norm": 0.2576807141304016, "learning_rate": 2.898686491893229e-06, "loss": 0.0032, "step": 5160 }, { "epoch": 303.5882352941176, "grad_norm": 0.5979758501052856, "learning_rate": 2.8953339034822204e-06, "loss": 0.0058, "step": 5161 }, { "epoch": 303.6470588235294, "grad_norm": 0.389848530292511, "learning_rate": 2.891982926763449e-06, "loss": 0.005, "step": 5162 }, { "epoch": 303.70588235294116, "grad_norm": 0.1633218675851822, "learning_rate": 2.8886335624970886e-06, "loss": 0.0031, "step": 5163 }, { "epoch": 303.7647058823529, "grad_norm": 0.3926038146018982, "learning_rate": 2.885285811442938e-06, "loss": 0.0056, "step": 5164 }, { "epoch": 303.8235294117647, "grad_norm": 0.28602761030197144, "learning_rate": 2.881939674360441e-06, "loss": 0.0029, "step": 5165 }, { "epoch": 303.88235294117646, "grad_norm": 0.2817990183830261, "learning_rate": 2.8785951520086597e-06, "loss": 0.0038, "step": 5166 }, { "epoch": 303.94117647058823, "grad_norm": 0.24772058427333832, "learning_rate": 2.8752522451463026e-06, "loss": 0.0023, "step": 5167 }, { "epoch": 304.0, "grad_norm": 0.4616028964519501, "learning_rate": 2.8719109545317102e-06, "loss": 0.0068, "step": 5168 }, { "epoch": 304.05882352941177, "grad_norm": 0.3397826552391052, "learning_rate": 2.868571280922846e-06, "loss": 0.0025, "step": 5169 }, { "epoch": 304.11764705882354, "grad_norm": 0.515708327293396, "learning_rate": 2.8652332250773195e-06, "loss": 0.0043, "step": 5170 }, { "epoch": 304.1764705882353, "grad_norm": 0.2610168755054474, "learning_rate": 2.8618967877523685e-06, "loss": 0.0031, "step": 5171 }, { "epoch": 304.2352941176471, "grad_norm": 0.4320281744003296, "learning_rate": 2.858561969704864e-06, "loss": 0.0032, "step": 5172 }, { "epoch": 304.29411764705884, "grad_norm": 0.24279795587062836, "learning_rate": 2.855228771691304e-06, "loss": 0.0031, "step": 5173 }, { "epoch": 304.3529411764706, "grad_norm": 0.39744552969932556, "learning_rate": 2.8518971944678277e-06, "loss": 0.0103, "step": 5174 }, { "epoch": 304.4117647058824, "grad_norm": 0.365173876285553, "learning_rate": 2.848567238790201e-06, "loss": 0.0047, "step": 5175 }, { "epoch": 304.47058823529414, "grad_norm": 0.26051560044288635, "learning_rate": 2.8452389054138284e-06, "loss": 0.0048, "step": 5176 }, { "epoch": 304.52941176470586, "grad_norm": 0.49173352122306824, "learning_rate": 2.8419121950937345e-06, "loss": 0.0037, "step": 5177 }, { "epoch": 304.5882352941176, "grad_norm": 0.3936510682106018, "learning_rate": 2.8385871085845875e-06, "loss": 0.0056, "step": 5178 }, { "epoch": 304.6470588235294, "grad_norm": 0.31817176938056946, "learning_rate": 2.8352636466406837e-06, "loss": 0.0045, "step": 5179 }, { "epoch": 304.70588235294116, "grad_norm": 0.19305072724819183, "learning_rate": 2.8319418100159456e-06, "loss": 0.0024, "step": 5180 }, { "epoch": 304.7647058823529, "grad_norm": 0.16933231055736542, "learning_rate": 2.828621599463933e-06, "loss": 0.0032, "step": 5181 }, { "epoch": 304.8235294117647, "grad_norm": 0.3434338867664337, "learning_rate": 2.825303015737836e-06, "loss": 0.004, "step": 5182 }, { "epoch": 304.88235294117646, "grad_norm": 0.2800523638725281, "learning_rate": 2.821986059590477e-06, "loss": 0.0047, "step": 5183 }, { "epoch": 304.94117647058823, "grad_norm": 0.32590511441230774, "learning_rate": 2.8186707317743e-06, "loss": 0.0042, "step": 5184 }, { "epoch": 305.0, "grad_norm": 0.18348319828510284, "learning_rate": 2.8153570330413925e-06, "loss": 0.0026, "step": 5185 }, { "epoch": 305.05882352941177, "grad_norm": 0.2918071746826172, "learning_rate": 2.8120449641434645e-06, "loss": 0.0047, "step": 5186 }, { "epoch": 305.11764705882354, "grad_norm": 0.199892058968544, "learning_rate": 2.808734525831861e-06, "loss": 0.0029, "step": 5187 }, { "epoch": 305.1764705882353, "grad_norm": 0.237819641828537, "learning_rate": 2.8054257188575486e-06, "loss": 0.0024, "step": 5188 }, { "epoch": 305.2352941176471, "grad_norm": 0.22059649229049683, "learning_rate": 2.802118543971133e-06, "loss": 0.0041, "step": 5189 }, { "epoch": 305.29411764705884, "grad_norm": 0.5753943920135498, "learning_rate": 2.7988130019228456e-06, "loss": 0.0053, "step": 5190 }, { "epoch": 305.3529411764706, "grad_norm": 0.1279798001050949, "learning_rate": 2.795509093462553e-06, "loss": 0.0016, "step": 5191 }, { "epoch": 305.4117647058824, "grad_norm": 0.1541050672531128, "learning_rate": 2.792206819339737e-06, "loss": 0.0026, "step": 5192 }, { "epoch": 305.47058823529414, "grad_norm": 0.5050917863845825, "learning_rate": 2.7889061803035233e-06, "loss": 0.0032, "step": 5193 }, { "epoch": 305.52941176470586, "grad_norm": 0.4057146906852722, "learning_rate": 2.7856071771026627e-06, "loss": 0.0073, "step": 5194 }, { "epoch": 305.5882352941176, "grad_norm": 0.3848285675048828, "learning_rate": 2.7823098104855297e-06, "loss": 0.0047, "step": 5195 }, { "epoch": 305.6470588235294, "grad_norm": 0.4459334909915924, "learning_rate": 2.779014081200132e-06, "loss": 0.006, "step": 5196 }, { "epoch": 305.70588235294116, "grad_norm": 0.2622518539428711, "learning_rate": 2.7757199899941066e-06, "loss": 0.0024, "step": 5197 }, { "epoch": 305.7647058823529, "grad_norm": 0.3845638632774353, "learning_rate": 2.772427537614719e-06, "loss": 0.005, "step": 5198 }, { "epoch": 305.8235294117647, "grad_norm": 0.3739514648914337, "learning_rate": 2.7691367248088573e-06, "loss": 0.0037, "step": 5199 }, { "epoch": 305.88235294117646, "grad_norm": 0.29463398456573486, "learning_rate": 2.7658475523230444e-06, "loss": 0.0041, "step": 5200 }, { "epoch": 305.94117647058823, "grad_norm": 0.9807157516479492, "learning_rate": 2.7625600209034287e-06, "loss": 0.0102, "step": 5201 }, { "epoch": 306.0, "grad_norm": 0.2644161283969879, "learning_rate": 2.759274131295787e-06, "loss": 0.0041, "step": 5202 }, { "epoch": 306.05882352941177, "grad_norm": 0.3798728883266449, "learning_rate": 2.755989884245519e-06, "loss": 0.0034, "step": 5203 }, { "epoch": 306.11764705882354, "grad_norm": 0.35397985577583313, "learning_rate": 2.7527072804976586e-06, "loss": 0.0048, "step": 5204 }, { "epoch": 306.1764705882353, "grad_norm": 0.2940855920314789, "learning_rate": 2.7494263207968654e-06, "loss": 0.0046, "step": 5205 }, { "epoch": 306.2352941176471, "grad_norm": 0.3309803307056427, "learning_rate": 2.7461470058874186e-06, "loss": 0.0077, "step": 5206 }, { "epoch": 306.29411764705884, "grad_norm": 0.44603854417800903, "learning_rate": 2.742869336513234e-06, "loss": 0.0041, "step": 5207 }, { "epoch": 306.3529411764706, "grad_norm": 0.14429710805416107, "learning_rate": 2.739593313417851e-06, "loss": 0.002, "step": 5208 }, { "epoch": 306.4117647058824, "grad_norm": 0.2626956105232239, "learning_rate": 2.7363189373444366e-06, "loss": 0.0027, "step": 5209 }, { "epoch": 306.47058823529414, "grad_norm": 0.25145214796066284, "learning_rate": 2.7330462090357767e-06, "loss": 0.0045, "step": 5210 }, { "epoch": 306.52941176470586, "grad_norm": 0.31656596064567566, "learning_rate": 2.7297751292342935e-06, "loss": 0.0038, "step": 5211 }, { "epoch": 306.5882352941176, "grad_norm": 0.3281380832195282, "learning_rate": 2.726505698682028e-06, "loss": 0.0053, "step": 5212 }, { "epoch": 306.6470588235294, "grad_norm": 0.30381447076797485, "learning_rate": 2.7232379181206558e-06, "loss": 0.0061, "step": 5213 }, { "epoch": 306.70588235294116, "grad_norm": 0.2950480580329895, "learning_rate": 2.7199717882914644e-06, "loss": 0.0036, "step": 5214 }, { "epoch": 306.7647058823529, "grad_norm": 0.19774901866912842, "learning_rate": 2.716707309935378e-06, "loss": 0.0032, "step": 5215 }, { "epoch": 306.8235294117647, "grad_norm": 0.35038474202156067, "learning_rate": 2.713444483792945e-06, "loss": 0.0026, "step": 5216 }, { "epoch": 306.88235294117646, "grad_norm": 0.22540079057216644, "learning_rate": 2.7101833106043374e-06, "loss": 0.0026, "step": 5217 }, { "epoch": 306.94117647058823, "grad_norm": 0.16963407397270203, "learning_rate": 2.706923791109347e-06, "loss": 0.0036, "step": 5218 }, { "epoch": 307.0, "grad_norm": 0.4933556020259857, "learning_rate": 2.7036659260473973e-06, "loss": 0.0031, "step": 5219 }, { "epoch": 307.05882352941177, "grad_norm": 0.34285515546798706, "learning_rate": 2.7004097161575393e-06, "loss": 0.0062, "step": 5220 }, { "epoch": 307.11764705882354, "grad_norm": 0.2658463418483734, "learning_rate": 2.6971551621784355e-06, "loss": 0.002, "step": 5221 }, { "epoch": 307.1764705882353, "grad_norm": 0.3463625907897949, "learning_rate": 2.693902264848386e-06, "loss": 0.0052, "step": 5222 }, { "epoch": 307.2352941176471, "grad_norm": 0.357298880815506, "learning_rate": 2.6906510249053077e-06, "loss": 0.0064, "step": 5223 }, { "epoch": 307.29411764705884, "grad_norm": 0.26889994740486145, "learning_rate": 2.6874014430867478e-06, "loss": 0.0053, "step": 5224 }, { "epoch": 307.3529411764706, "grad_norm": 0.16359084844589233, "learning_rate": 2.6841535201298675e-06, "loss": 0.0028, "step": 5225 }, { "epoch": 307.4117647058824, "grad_norm": 0.2300378382205963, "learning_rate": 2.680907256771461e-06, "loss": 0.004, "step": 5226 }, { "epoch": 307.47058823529414, "grad_norm": 0.4503929316997528, "learning_rate": 2.6776626537479413e-06, "loss": 0.0045, "step": 5227 }, { "epoch": 307.52941176470586, "grad_norm": 0.9923350811004639, "learning_rate": 2.6744197117953506e-06, "loss": 0.0039, "step": 5228 }, { "epoch": 307.5882352941176, "grad_norm": 0.19319751858711243, "learning_rate": 2.671178431649343e-06, "loss": 0.0021, "step": 5229 }, { "epoch": 307.6470588235294, "grad_norm": 0.22922910749912262, "learning_rate": 2.6679388140452035e-06, "loss": 0.0039, "step": 5230 }, { "epoch": 307.70588235294116, "grad_norm": 0.4227076768875122, "learning_rate": 2.6647008597178447e-06, "loss": 0.0037, "step": 5231 }, { "epoch": 307.7647058823529, "grad_norm": 0.19046780467033386, "learning_rate": 2.661464569401789e-06, "loss": 0.0031, "step": 5232 }, { "epoch": 307.8235294117647, "grad_norm": 0.3350561261177063, "learning_rate": 2.658229943831191e-06, "loss": 0.0075, "step": 5233 }, { "epoch": 307.88235294117646, "grad_norm": 0.32816192507743835, "learning_rate": 2.654996983739825e-06, "loss": 0.0037, "step": 5234 }, { "epoch": 307.94117647058823, "grad_norm": 0.15731066465377808, "learning_rate": 2.65176568986109e-06, "loss": 0.0021, "step": 5235 }, { "epoch": 308.0, "grad_norm": 0.5065334439277649, "learning_rate": 2.648536062927999e-06, "loss": 0.0076, "step": 5236 }, { "epoch": 308.05882352941177, "grad_norm": 0.5073394179344177, "learning_rate": 2.6453081036731944e-06, "loss": 0.0082, "step": 5237 }, { "epoch": 308.11764705882354, "grad_norm": 0.36296942830085754, "learning_rate": 2.6420818128289392e-06, "loss": 0.0033, "step": 5238 }, { "epoch": 308.1764705882353, "grad_norm": 0.43084484338760376, "learning_rate": 2.63885719112712e-06, "loss": 0.0039, "step": 5239 }, { "epoch": 308.2352941176471, "grad_norm": 0.7327412366867065, "learning_rate": 2.635634239299235e-06, "loss": 0.0048, "step": 5240 }, { "epoch": 308.29411764705884, "grad_norm": 0.2862010896205902, "learning_rate": 2.6324129580764136e-06, "loss": 0.0037, "step": 5241 }, { "epoch": 308.3529411764706, "grad_norm": 0.26919737458229065, "learning_rate": 2.629193348189404e-06, "loss": 0.0043, "step": 5242 }, { "epoch": 308.4117647058824, "grad_norm": 0.22636699676513672, "learning_rate": 2.625975410368574e-06, "loss": 0.0025, "step": 5243 }, { "epoch": 308.47058823529414, "grad_norm": 0.2863548696041107, "learning_rate": 2.6227591453439094e-06, "loss": 0.0039, "step": 5244 }, { "epoch": 308.52941176470586, "grad_norm": 0.22916802763938904, "learning_rate": 2.61954455384502e-06, "loss": 0.0034, "step": 5245 }, { "epoch": 308.5882352941176, "grad_norm": 0.24543508887290955, "learning_rate": 2.6163316366011404e-06, "loss": 0.0031, "step": 5246 }, { "epoch": 308.6470588235294, "grad_norm": 0.2251710146665573, "learning_rate": 2.6131203943411134e-06, "loss": 0.0043, "step": 5247 }, { "epoch": 308.70588235294116, "grad_norm": 0.2736024260520935, "learning_rate": 2.6099108277934105e-06, "loss": 0.0043, "step": 5248 }, { "epoch": 308.7647058823529, "grad_norm": 0.33515864610671997, "learning_rate": 2.606702937686123e-06, "loss": 0.0035, "step": 5249 }, { "epoch": 308.8235294117647, "grad_norm": 0.11970935761928558, "learning_rate": 2.6034967247469633e-06, "loss": 0.002, "step": 5250 }, { "epoch": 308.88235294117646, "grad_norm": 0.2273358702659607, "learning_rate": 2.6002921897032528e-06, "loss": 0.0062, "step": 5251 }, { "epoch": 308.94117647058823, "grad_norm": 0.349353551864624, "learning_rate": 2.5970893332819436e-06, "loss": 0.0067, "step": 5252 }, { "epoch": 309.0, "grad_norm": 0.33355244994163513, "learning_rate": 2.593888156209603e-06, "loss": 0.0032, "step": 5253 }, { "epoch": 309.05882352941177, "grad_norm": 0.2139597088098526, "learning_rate": 2.5906886592124203e-06, "loss": 0.0036, "step": 5254 }, { "epoch": 309.11764705882354, "grad_norm": 0.35271701216697693, "learning_rate": 2.587490843016193e-06, "loss": 0.0054, "step": 5255 }, { "epoch": 309.1764705882353, "grad_norm": 0.8772379159927368, "learning_rate": 2.584294708346352e-06, "loss": 0.0055, "step": 5256 }, { "epoch": 309.2352941176471, "grad_norm": 0.29882606863975525, "learning_rate": 2.5811002559279406e-06, "loss": 0.004, "step": 5257 }, { "epoch": 309.29411764705884, "grad_norm": 0.3549642860889435, "learning_rate": 2.5779074864856156e-06, "loss": 0.0047, "step": 5258 }, { "epoch": 309.3529411764706, "grad_norm": 0.261014848947525, "learning_rate": 2.5747164007436567e-06, "loss": 0.0047, "step": 5259 }, { "epoch": 309.4117647058824, "grad_norm": 0.8714029788970947, "learning_rate": 2.571526999425965e-06, "loss": 0.0061, "step": 5260 }, { "epoch": 309.47058823529414, "grad_norm": 0.1398642212152481, "learning_rate": 2.5683392832560504e-06, "loss": 0.0022, "step": 5261 }, { "epoch": 309.52941176470586, "grad_norm": 0.2682422697544098, "learning_rate": 2.5651532529570476e-06, "loss": 0.0048, "step": 5262 }, { "epoch": 309.5882352941176, "grad_norm": 0.19890160858631134, "learning_rate": 2.5619689092517085e-06, "loss": 0.0018, "step": 5263 }, { "epoch": 309.6470588235294, "grad_norm": 0.45883429050445557, "learning_rate": 2.558786252862402e-06, "loss": 0.0033, "step": 5264 }, { "epoch": 309.70588235294116, "grad_norm": 0.5085426568984985, "learning_rate": 2.555605284511109e-06, "loss": 0.003, "step": 5265 }, { "epoch": 309.7647058823529, "grad_norm": 0.5022804737091064, "learning_rate": 2.552426004919433e-06, "loss": 0.0041, "step": 5266 }, { "epoch": 309.8235294117647, "grad_norm": 0.5651147961616516, "learning_rate": 2.549248414808594e-06, "loss": 0.0046, "step": 5267 }, { "epoch": 309.88235294117646, "grad_norm": 0.2087274044752121, "learning_rate": 2.5460725148994292e-06, "loss": 0.0038, "step": 5268 }, { "epoch": 309.94117647058823, "grad_norm": 0.28819540143013, "learning_rate": 2.542898305912388e-06, "loss": 0.0064, "step": 5269 }, { "epoch": 310.0, "grad_norm": 0.23982146382331848, "learning_rate": 2.5397257885675396e-06, "loss": 0.0029, "step": 5270 }, { "epoch": 310.05882352941177, "grad_norm": 0.28828859329223633, "learning_rate": 2.5365549635845686e-06, "loss": 0.0056, "step": 5271 }, { "epoch": 310.11764705882354, "grad_norm": 0.463187038898468, "learning_rate": 2.533385831682782e-06, "loss": 0.0029, "step": 5272 }, { "epoch": 310.1764705882353, "grad_norm": 0.17037427425384521, "learning_rate": 2.5302183935810875e-06, "loss": 0.0024, "step": 5273 }, { "epoch": 310.2352941176471, "grad_norm": 0.0942346528172493, "learning_rate": 2.527052649998023e-06, "loss": 0.0014, "step": 5274 }, { "epoch": 310.29411764705884, "grad_norm": 0.4438548684120178, "learning_rate": 2.5238886016517395e-06, "loss": 0.0041, "step": 5275 }, { "epoch": 310.3529411764706, "grad_norm": 0.46890607476234436, "learning_rate": 2.5207262492599947e-06, "loss": 0.0026, "step": 5276 }, { "epoch": 310.4117647058824, "grad_norm": 0.3637562096118927, "learning_rate": 2.51756559354017e-06, "loss": 0.0061, "step": 5277 }, { "epoch": 310.47058823529414, "grad_norm": 0.34508925676345825, "learning_rate": 2.5144066352092612e-06, "loss": 0.0039, "step": 5278 }, { "epoch": 310.52941176470586, "grad_norm": 0.16676367819309235, "learning_rate": 2.5112493749838774e-06, "loss": 0.0021, "step": 5279 }, { "epoch": 310.5882352941176, "grad_norm": 0.31995639204978943, "learning_rate": 2.50809381358024e-06, "loss": 0.0039, "step": 5280 }, { "epoch": 310.6470588235294, "grad_norm": 0.13086190819740295, "learning_rate": 2.5049399517141882e-06, "loss": 0.0017, "step": 5281 }, { "epoch": 310.70588235294116, "grad_norm": 0.252900630235672, "learning_rate": 2.5017877901011767e-06, "loss": 0.0058, "step": 5282 }, { "epoch": 310.7647058823529, "grad_norm": 0.36878886818885803, "learning_rate": 2.4986373294562737e-06, "loss": 0.0063, "step": 5283 }, { "epoch": 310.8235294117647, "grad_norm": 0.5634629726409912, "learning_rate": 2.495488570494157e-06, "loss": 0.006, "step": 5284 }, { "epoch": 310.88235294117646, "grad_norm": 0.5303972363471985, "learning_rate": 2.492341513929123e-06, "loss": 0.0061, "step": 5285 }, { "epoch": 310.94117647058823, "grad_norm": 0.8723196387290955, "learning_rate": 2.489196160475086e-06, "loss": 0.0059, "step": 5286 }, { "epoch": 311.0, "grad_norm": 0.23384512960910797, "learning_rate": 2.48605251084556e-06, "loss": 0.0043, "step": 5287 }, { "epoch": 311.05882352941177, "grad_norm": 0.3828165531158447, "learning_rate": 2.4829105657536866e-06, "loss": 0.0089, "step": 5288 }, { "epoch": 311.11764705882354, "grad_norm": 0.3147643506526947, "learning_rate": 2.479770325912216e-06, "loss": 0.0048, "step": 5289 }, { "epoch": 311.1764705882353, "grad_norm": 0.4227308928966522, "learning_rate": 2.4766317920335114e-06, "loss": 0.0042, "step": 5290 }, { "epoch": 311.2352941176471, "grad_norm": 0.23997561633586884, "learning_rate": 2.473494964829546e-06, "loss": 0.0039, "step": 5291 }, { "epoch": 311.29411764705884, "grad_norm": 0.2150198072195053, "learning_rate": 2.470359845011908e-06, "loss": 0.0044, "step": 5292 }, { "epoch": 311.3529411764706, "grad_norm": 1.0310693979263306, "learning_rate": 2.4672264332918015e-06, "loss": 0.0073, "step": 5293 }, { "epoch": 311.4117647058824, "grad_norm": 0.27947667241096497, "learning_rate": 2.464094730380042e-06, "loss": 0.0026, "step": 5294 }, { "epoch": 311.47058823529414, "grad_norm": 0.6158035397529602, "learning_rate": 2.4609647369870494e-06, "loss": 0.0047, "step": 5295 }, { "epoch": 311.52941176470586, "grad_norm": 0.5446978211402893, "learning_rate": 2.457836453822867e-06, "loss": 0.0076, "step": 5296 }, { "epoch": 311.5882352941176, "grad_norm": 0.2948988974094391, "learning_rate": 2.4547098815971435e-06, "loss": 0.0048, "step": 5297 }, { "epoch": 311.6470588235294, "grad_norm": 0.5521571636199951, "learning_rate": 2.4515850210191448e-06, "loss": 0.003, "step": 5298 }, { "epoch": 311.70588235294116, "grad_norm": 0.6982037425041199, "learning_rate": 2.4484618727977396e-06, "loss": 0.0035, "step": 5299 }, { "epoch": 311.7647058823529, "grad_norm": 0.37375712394714355, "learning_rate": 2.445340437641416e-06, "loss": 0.0051, "step": 5300 }, { "epoch": 311.8235294117647, "grad_norm": 0.264909952878952, "learning_rate": 2.4422207162582733e-06, "loss": 0.0038, "step": 5301 }, { "epoch": 311.88235294117646, "grad_norm": 0.5827420353889465, "learning_rate": 2.4391027093560148e-06, "loss": 0.0036, "step": 5302 }, { "epoch": 311.94117647058823, "grad_norm": 0.4522259831428528, "learning_rate": 2.4359864176419623e-06, "loss": 0.0041, "step": 5303 }, { "epoch": 312.0, "grad_norm": 0.32752111554145813, "learning_rate": 2.432871841823047e-06, "loss": 0.0068, "step": 5304 }, { "epoch": 312.05882352941177, "grad_norm": 0.23520439863204956, "learning_rate": 2.429758982605811e-06, "loss": 0.0031, "step": 5305 }, { "epoch": 312.11764705882354, "grad_norm": 0.33280566334724426, "learning_rate": 2.426647840696401e-06, "loss": 0.0046, "step": 5306 }, { "epoch": 312.1764705882353, "grad_norm": 0.33857396245002747, "learning_rate": 2.4235384168005827e-06, "loss": 0.0033, "step": 5307 }, { "epoch": 312.2352941176471, "grad_norm": 0.7208428382873535, "learning_rate": 2.420430711623727e-06, "loss": 0.01, "step": 5308 }, { "epoch": 312.29411764705884, "grad_norm": 0.297274649143219, "learning_rate": 2.41732472587082e-06, "loss": 0.0065, "step": 5309 }, { "epoch": 312.3529411764706, "grad_norm": 0.24991822242736816, "learning_rate": 2.414220460246447e-06, "loss": 0.0045, "step": 5310 }, { "epoch": 312.4117647058824, "grad_norm": 0.24296991527080536, "learning_rate": 2.4111179154548147e-06, "loss": 0.0028, "step": 5311 }, { "epoch": 312.47058823529414, "grad_norm": 1.1294608116149902, "learning_rate": 2.408017092199735e-06, "loss": 0.0029, "step": 5312 }, { "epoch": 312.52941176470586, "grad_norm": 0.40245378017425537, "learning_rate": 2.404917991184629e-06, "loss": 0.004, "step": 5313 }, { "epoch": 312.5882352941176, "grad_norm": 0.2880787253379822, "learning_rate": 2.4018206131125242e-06, "loss": 0.0036, "step": 5314 }, { "epoch": 312.6470588235294, "grad_norm": 0.4787363111972809, "learning_rate": 2.398724958686063e-06, "loss": 0.0055, "step": 5315 }, { "epoch": 312.70588235294116, "grad_norm": 0.5714297294616699, "learning_rate": 2.395631028607497e-06, "loss": 0.0054, "step": 5316 }, { "epoch": 312.7647058823529, "grad_norm": 0.8474244475364685, "learning_rate": 2.3925388235786763e-06, "loss": 0.005, "step": 5317 }, { "epoch": 312.8235294117647, "grad_norm": 0.4188757836818695, "learning_rate": 2.3894483443010706e-06, "loss": 0.0053, "step": 5318 }, { "epoch": 312.88235294117646, "grad_norm": 0.33732160925865173, "learning_rate": 2.3863595914757553e-06, "loss": 0.0041, "step": 5319 }, { "epoch": 312.94117647058823, "grad_norm": 1.901012659072876, "learning_rate": 2.3832725658034163e-06, "loss": 0.0044, "step": 5320 }, { "epoch": 313.0, "grad_norm": 0.30865710973739624, "learning_rate": 2.3801872679843384e-06, "loss": 0.0037, "step": 5321 }, { "epoch": 313.05882352941177, "grad_norm": 0.19753049314022064, "learning_rate": 2.377103698718424e-06, "loss": 0.0026, "step": 5322 }, { "epoch": 313.11764705882354, "grad_norm": 0.20881333947181702, "learning_rate": 2.37402185870518e-06, "loss": 0.0041, "step": 5323 }, { "epoch": 313.1764705882353, "grad_norm": 0.3334757685661316, "learning_rate": 2.3709417486437246e-06, "loss": 0.0026, "step": 5324 }, { "epoch": 313.2352941176471, "grad_norm": 0.25241583585739136, "learning_rate": 2.3678633692327746e-06, "loss": 0.0043, "step": 5325 }, { "epoch": 313.29411764705884, "grad_norm": 0.3721078336238861, "learning_rate": 2.3647867211706634e-06, "loss": 0.0074, "step": 5326 }, { "epoch": 313.3529411764706, "grad_norm": 0.2098521590232849, "learning_rate": 2.3617118051553278e-06, "loss": 0.0028, "step": 5327 }, { "epoch": 313.4117647058824, "grad_norm": 0.19831685721874237, "learning_rate": 2.3586386218843096e-06, "loss": 0.0027, "step": 5328 }, { "epoch": 313.47058823529414, "grad_norm": 0.4191913604736328, "learning_rate": 2.3555671720547614e-06, "loss": 0.0071, "step": 5329 }, { "epoch": 313.52941176470586, "grad_norm": 0.7420517802238464, "learning_rate": 2.3524974563634407e-06, "loss": 0.0045, "step": 5330 }, { "epoch": 313.5882352941176, "grad_norm": 0.14252737164497375, "learning_rate": 2.349429475506716e-06, "loss": 0.0025, "step": 5331 }, { "epoch": 313.6470588235294, "grad_norm": 0.26871466636657715, "learning_rate": 2.3463632301805505e-06, "loss": 0.0056, "step": 5332 }, { "epoch": 313.70588235294116, "grad_norm": 0.11679290235042572, "learning_rate": 2.343298721080527e-06, "loss": 0.0018, "step": 5333 }, { "epoch": 313.7647058823529, "grad_norm": 0.4693509638309479, "learning_rate": 2.3402359489018254e-06, "loss": 0.0053, "step": 5334 }, { "epoch": 313.8235294117647, "grad_norm": 0.2930677831172943, "learning_rate": 2.337174914339242e-06, "loss": 0.0031, "step": 5335 }, { "epoch": 313.88235294117646, "grad_norm": 0.2952576279640198, "learning_rate": 2.3341156180871626e-06, "loss": 0.0053, "step": 5336 }, { "epoch": 313.94117647058823, "grad_norm": 0.28299492597579956, "learning_rate": 2.331058060839593e-06, "loss": 0.003, "step": 5337 }, { "epoch": 314.0, "grad_norm": 0.20735161006450653, "learning_rate": 2.328002243290138e-06, "loss": 0.0023, "step": 5338 }, { "epoch": 314.05882352941177, "grad_norm": 0.38191545009613037, "learning_rate": 2.324948166132014e-06, "loss": 0.0039, "step": 5339 }, { "epoch": 314.11764705882354, "grad_norm": 0.2435711920261383, "learning_rate": 2.3218958300580307e-06, "loss": 0.005, "step": 5340 }, { "epoch": 314.1764705882353, "grad_norm": 0.24130530655384064, "learning_rate": 2.3188452357606127e-06, "loss": 0.0036, "step": 5341 }, { "epoch": 314.2352941176471, "grad_norm": 0.20193997025489807, "learning_rate": 2.315796383931791e-06, "loss": 0.0022, "step": 5342 }, { "epoch": 314.29411764705884, "grad_norm": 0.7032435536384583, "learning_rate": 2.312749275263191e-06, "loss": 0.0045, "step": 5343 }, { "epoch": 314.3529411764706, "grad_norm": 0.3534391224384308, "learning_rate": 2.3097039104460506e-06, "loss": 0.0041, "step": 5344 }, { "epoch": 314.4117647058824, "grad_norm": 0.6855913996696472, "learning_rate": 2.306660290171211e-06, "loss": 0.0044, "step": 5345 }, { "epoch": 314.47058823529414, "grad_norm": 0.2544494569301605, "learning_rate": 2.3036184151291186e-06, "loss": 0.0042, "step": 5346 }, { "epoch": 314.52941176470586, "grad_norm": 0.17943310737609863, "learning_rate": 2.300578286009819e-06, "loss": 0.0031, "step": 5347 }, { "epoch": 314.5882352941176, "grad_norm": 0.5455299615859985, "learning_rate": 2.2975399035029657e-06, "loss": 0.0093, "step": 5348 }, { "epoch": 314.6470588235294, "grad_norm": 0.32670092582702637, "learning_rate": 2.294503268297816e-06, "loss": 0.0034, "step": 5349 }, { "epoch": 314.70588235294116, "grad_norm": 0.34445032477378845, "learning_rate": 2.291468381083233e-06, "loss": 0.0046, "step": 5350 }, { "epoch": 314.7647058823529, "grad_norm": 0.34532567858695984, "learning_rate": 2.2884352425476743e-06, "loss": 0.0032, "step": 5351 }, { "epoch": 314.8235294117647, "grad_norm": 0.3115244209766388, "learning_rate": 2.285403853379209e-06, "loss": 0.0041, "step": 5352 }, { "epoch": 314.88235294117646, "grad_norm": 0.4528082311153412, "learning_rate": 2.2823742142655116e-06, "loss": 0.0052, "step": 5353 }, { "epoch": 314.94117647058823, "grad_norm": 0.6873390674591064, "learning_rate": 2.2793463258938486e-06, "loss": 0.0051, "step": 5354 }, { "epoch": 315.0, "grad_norm": 0.4239301085472107, "learning_rate": 2.2763201889510987e-06, "loss": 0.0034, "step": 5355 }, { "epoch": 315.05882352941177, "grad_norm": 0.20275411009788513, "learning_rate": 2.2732958041237417e-06, "loss": 0.003, "step": 5356 }, { "epoch": 315.11764705882354, "grad_norm": 0.30899694561958313, "learning_rate": 2.27027317209786e-06, "loss": 0.0045, "step": 5357 }, { "epoch": 315.1764705882353, "grad_norm": 0.15857893228530884, "learning_rate": 2.267252293559131e-06, "loss": 0.0021, "step": 5358 }, { "epoch": 315.2352941176471, "grad_norm": 0.18374891579151154, "learning_rate": 2.2642331691928463e-06, "loss": 0.0033, "step": 5359 }, { "epoch": 315.29411764705884, "grad_norm": 0.5429837107658386, "learning_rate": 2.2612157996838913e-06, "loss": 0.005, "step": 5360 }, { "epoch": 315.3529411764706, "grad_norm": 0.31568488478660583, "learning_rate": 2.258200185716759e-06, "loss": 0.0046, "step": 5361 }, { "epoch": 315.4117647058824, "grad_norm": 0.37784743309020996, "learning_rate": 2.2551863279755347e-06, "loss": 0.0041, "step": 5362 }, { "epoch": 315.47058823529414, "grad_norm": 0.23775289952754974, "learning_rate": 2.252174227143915e-06, "loss": 0.0045, "step": 5363 }, { "epoch": 315.52941176470586, "grad_norm": 0.18961311876773834, "learning_rate": 2.2491638839051953e-06, "loss": 0.0026, "step": 5364 }, { "epoch": 315.5882352941176, "grad_norm": 0.37804970145225525, "learning_rate": 2.246155298942273e-06, "loss": 0.0061, "step": 5365 }, { "epoch": 315.6470588235294, "grad_norm": 0.2835599184036255, "learning_rate": 2.2431484729376395e-06, "loss": 0.0047, "step": 5366 }, { "epoch": 315.70588235294116, "grad_norm": 0.3164699375629425, "learning_rate": 2.240143406573396e-06, "loss": 0.0042, "step": 5367 }, { "epoch": 315.7647058823529, "grad_norm": 0.4821835160255432, "learning_rate": 2.237140100531244e-06, "loss": 0.0065, "step": 5368 }, { "epoch": 315.8235294117647, "grad_norm": 0.4759807586669922, "learning_rate": 2.2341385554924765e-06, "loss": 0.0047, "step": 5369 }, { "epoch": 315.88235294117646, "grad_norm": 0.19174104928970337, "learning_rate": 2.2311387721379983e-06, "loss": 0.002, "step": 5370 }, { "epoch": 315.94117647058823, "grad_norm": 0.23376256227493286, "learning_rate": 2.228140751148308e-06, "loss": 0.0025, "step": 5371 }, { "epoch": 316.0, "grad_norm": 0.1689656674861908, "learning_rate": 2.2251444932035094e-06, "loss": 0.0022, "step": 5372 }, { "epoch": 316.05882352941177, "grad_norm": 0.39646339416503906, "learning_rate": 2.222149998983296e-06, "loss": 0.0043, "step": 5373 }, { "epoch": 316.11764705882354, "grad_norm": 0.2603587210178375, "learning_rate": 2.2191572691669737e-06, "loss": 0.0031, "step": 5374 }, { "epoch": 316.1764705882353, "grad_norm": 0.13013491034507751, "learning_rate": 2.2161663044334427e-06, "loss": 0.0016, "step": 5375 }, { "epoch": 316.2352941176471, "grad_norm": 0.3577929735183716, "learning_rate": 2.2131771054612028e-06, "loss": 0.0057, "step": 5376 }, { "epoch": 316.29411764705884, "grad_norm": 0.1750059574842453, "learning_rate": 2.21018967292835e-06, "loss": 0.0033, "step": 5377 }, { "epoch": 316.3529411764706, "grad_norm": 0.4133460521697998, "learning_rate": 2.2072040075125846e-06, "loss": 0.0075, "step": 5378 }, { "epoch": 316.4117647058824, "grad_norm": 0.320113867521286, "learning_rate": 2.2042201098912074e-06, "loss": 0.0037, "step": 5379 }, { "epoch": 316.47058823529414, "grad_norm": 0.1161593496799469, "learning_rate": 2.20123798074111e-06, "loss": 0.0019, "step": 5380 }, { "epoch": 316.52941176470586, "grad_norm": 0.3388668894767761, "learning_rate": 2.1982576207387895e-06, "loss": 0.0048, "step": 5381 }, { "epoch": 316.5882352941176, "grad_norm": 0.32620659470558167, "learning_rate": 2.195279030560341e-06, "loss": 0.0042, "step": 5382 }, { "epoch": 316.6470588235294, "grad_norm": 0.5037583708763123, "learning_rate": 2.1923022108814597e-06, "loss": 0.0035, "step": 5383 }, { "epoch": 316.70588235294116, "grad_norm": 0.43369898200035095, "learning_rate": 2.1893271623774303e-06, "loss": 0.004, "step": 5384 }, { "epoch": 316.7647058823529, "grad_norm": 0.24424147605895996, "learning_rate": 2.186353885723147e-06, "loss": 0.0034, "step": 5385 }, { "epoch": 316.8235294117647, "grad_norm": 0.2659348249435425, "learning_rate": 2.1833823815930942e-06, "loss": 0.0038, "step": 5386 }, { "epoch": 316.88235294117646, "grad_norm": 0.2680765390396118, "learning_rate": 2.180412650661361e-06, "loss": 0.0042, "step": 5387 }, { "epoch": 316.94117647058823, "grad_norm": 0.21428737044334412, "learning_rate": 2.177444693601626e-06, "loss": 0.0026, "step": 5388 }, { "epoch": 317.0, "grad_norm": 0.2518102824687958, "learning_rate": 2.1744785110871713e-06, "loss": 0.0046, "step": 5389 }, { "epoch": 317.05882352941177, "grad_norm": 0.2741566002368927, "learning_rate": 2.171514103790875e-06, "loss": 0.005, "step": 5390 }, { "epoch": 317.11764705882354, "grad_norm": 0.16618545353412628, "learning_rate": 2.168551472385214e-06, "loss": 0.0024, "step": 5391 }, { "epoch": 317.1764705882353, "grad_norm": 0.23563450574874878, "learning_rate": 2.165590617542258e-06, "loss": 0.0022, "step": 5392 }, { "epoch": 317.2352941176471, "grad_norm": 0.19268473982810974, "learning_rate": 2.1626315399336772e-06, "loss": 0.0021, "step": 5393 }, { "epoch": 317.29411764705884, "grad_norm": 0.1635253131389618, "learning_rate": 2.1596742402307402e-06, "loss": 0.0025, "step": 5394 }, { "epoch": 317.3529411764706, "grad_norm": 0.31314969062805176, "learning_rate": 2.156718719104306e-06, "loss": 0.0045, "step": 5395 }, { "epoch": 317.4117647058824, "grad_norm": 0.48121002316474915, "learning_rate": 2.153764977224836e-06, "loss": 0.0062, "step": 5396 }, { "epoch": 317.47058823529414, "grad_norm": 0.4185941517353058, "learning_rate": 2.150813015262385e-06, "loss": 0.0037, "step": 5397 }, { "epoch": 317.52941176470586, "grad_norm": 0.6797035336494446, "learning_rate": 2.1478628338866093e-06, "loss": 0.0098, "step": 5398 }, { "epoch": 317.5882352941176, "grad_norm": 0.153807133436203, "learning_rate": 2.1449144337667505e-06, "loss": 0.002, "step": 5399 }, { "epoch": 317.6470588235294, "grad_norm": 0.29059311747550964, "learning_rate": 2.141967815571656e-06, "loss": 0.0054, "step": 5400 }, { "epoch": 317.70588235294116, "grad_norm": 0.3214621841907501, "learning_rate": 2.139022979969766e-06, "loss": 0.0044, "step": 5401 }, { "epoch": 317.7647058823529, "grad_norm": 0.15251456201076508, "learning_rate": 2.136079927629118e-06, "loss": 0.0023, "step": 5402 }, { "epoch": 317.8235294117647, "grad_norm": 0.37414827942848206, "learning_rate": 2.1331386592173373e-06, "loss": 0.0046, "step": 5403 }, { "epoch": 317.88235294117646, "grad_norm": 0.2790202498435974, "learning_rate": 2.1301991754016537e-06, "loss": 0.005, "step": 5404 }, { "epoch": 317.94117647058823, "grad_norm": 0.3218321204185486, "learning_rate": 2.1272614768488875e-06, "loss": 0.0047, "step": 5405 }, { "epoch": 318.0, "grad_norm": 0.31217968463897705, "learning_rate": 2.124325564225458e-06, "loss": 0.0066, "step": 5406 }, { "epoch": 318.05882352941177, "grad_norm": 0.3014160394668579, "learning_rate": 2.121391438197372e-06, "loss": 0.0032, "step": 5407 }, { "epoch": 318.11764705882354, "grad_norm": 0.24669653177261353, "learning_rate": 2.118459099430237e-06, "loss": 0.0031, "step": 5408 }, { "epoch": 318.1764705882353, "grad_norm": 0.29610323905944824, "learning_rate": 2.1155285485892572e-06, "loss": 0.0046, "step": 5409 }, { "epoch": 318.2352941176471, "grad_norm": 0.46476033329963684, "learning_rate": 2.112599786339221e-06, "loss": 0.0046, "step": 5410 }, { "epoch": 318.29411764705884, "grad_norm": 0.49941667914390564, "learning_rate": 2.109672813344521e-06, "loss": 0.0045, "step": 5411 }, { "epoch": 318.3529411764706, "grad_norm": 0.46087151765823364, "learning_rate": 2.1067476302691416e-06, "loss": 0.0063, "step": 5412 }, { "epoch": 318.4117647058824, "grad_norm": 0.19069020450115204, "learning_rate": 2.1038242377766604e-06, "loss": 0.0019, "step": 5413 }, { "epoch": 318.47058823529414, "grad_norm": 0.49781858921051025, "learning_rate": 2.1009026365302455e-06, "loss": 0.0036, "step": 5414 }, { "epoch": 318.52941176470586, "grad_norm": 0.20988190174102783, "learning_rate": 2.0979828271926616e-06, "loss": 0.0035, "step": 5415 }, { "epoch": 318.5882352941176, "grad_norm": 0.2834360897541046, "learning_rate": 2.0950648104262694e-06, "loss": 0.0044, "step": 5416 }, { "epoch": 318.6470588235294, "grad_norm": 0.25631797313690186, "learning_rate": 2.0921485868930223e-06, "loss": 0.0034, "step": 5417 }, { "epoch": 318.70588235294116, "grad_norm": 0.4950624704360962, "learning_rate": 2.0892341572544593e-06, "loss": 0.0031, "step": 5418 }, { "epoch": 318.7647058823529, "grad_norm": 0.29944491386413574, "learning_rate": 2.0863215221717226e-06, "loss": 0.0038, "step": 5419 }, { "epoch": 318.8235294117647, "grad_norm": 0.3949037194252014, "learning_rate": 2.0834106823055432e-06, "loss": 0.0063, "step": 5420 }, { "epoch": 318.88235294117646, "grad_norm": 0.5002959370613098, "learning_rate": 2.0805016383162402e-06, "loss": 0.0024, "step": 5421 }, { "epoch": 318.94117647058823, "grad_norm": 0.34936267137527466, "learning_rate": 2.077594390863733e-06, "loss": 0.0057, "step": 5422 }, { "epoch": 319.0, "grad_norm": 0.9020947217941284, "learning_rate": 2.074688940607529e-06, "loss": 0.0063, "step": 5423 }, { "epoch": 319.05882352941177, "grad_norm": 0.2909296452999115, "learning_rate": 2.0717852882067334e-06, "loss": 0.0049, "step": 5424 }, { "epoch": 319.11764705882354, "grad_norm": 0.5354630351066589, "learning_rate": 2.0688834343200324e-06, "loss": 0.0053, "step": 5425 }, { "epoch": 319.1764705882353, "grad_norm": 0.19986368715763092, "learning_rate": 2.065983379605714e-06, "loss": 0.0021, "step": 5426 }, { "epoch": 319.2352941176471, "grad_norm": 0.3400380313396454, "learning_rate": 2.0630851247216556e-06, "loss": 0.0031, "step": 5427 }, { "epoch": 319.29411764705884, "grad_norm": 0.23820090293884277, "learning_rate": 2.0601886703253284e-06, "loss": 0.0048, "step": 5428 }, { "epoch": 319.3529411764706, "grad_norm": 0.25252220034599304, "learning_rate": 2.0572940170737876e-06, "loss": 0.0025, "step": 5429 }, { "epoch": 319.4117647058824, "grad_norm": 0.741034746170044, "learning_rate": 2.054401165623686e-06, "loss": 0.0073, "step": 5430 }, { "epoch": 319.47058823529414, "grad_norm": 0.20883266627788544, "learning_rate": 2.051510116631268e-06, "loss": 0.0038, "step": 5431 }, { "epoch": 319.52941176470586, "grad_norm": 0.37592658400535583, "learning_rate": 2.0486208707523692e-06, "loss": 0.0053, "step": 5432 }, { "epoch": 319.5882352941176, "grad_norm": 0.2287086695432663, "learning_rate": 2.045733428642409e-06, "loss": 0.0037, "step": 5433 }, { "epoch": 319.6470588235294, "grad_norm": 0.3570252060890198, "learning_rate": 2.042847790956407e-06, "loss": 0.0046, "step": 5434 }, { "epoch": 319.70588235294116, "grad_norm": 0.24708791077136993, "learning_rate": 2.0399639583489715e-06, "loss": 0.0031, "step": 5435 }, { "epoch": 319.7647058823529, "grad_norm": 0.586861252784729, "learning_rate": 2.037081931474294e-06, "loss": 0.0024, "step": 5436 }, { "epoch": 319.8235294117647, "grad_norm": 0.29864075779914856, "learning_rate": 2.0342017109861645e-06, "loss": 0.0032, "step": 5437 }, { "epoch": 319.88235294117646, "grad_norm": 0.39490360021591187, "learning_rate": 2.0313232975379617e-06, "loss": 0.0061, "step": 5438 }, { "epoch": 319.94117647058823, "grad_norm": 0.3289036154747009, "learning_rate": 2.0284466917826538e-06, "loss": 0.0048, "step": 5439 }, { "epoch": 320.0, "grad_norm": 0.34585675597190857, "learning_rate": 2.025571894372794e-06, "loss": 0.0057, "step": 5440 }, { "epoch": 320.05882352941177, "grad_norm": 0.5739075541496277, "learning_rate": 2.022698905960532e-06, "loss": 0.0062, "step": 5441 }, { "epoch": 320.11764705882354, "grad_norm": 0.17841261625289917, "learning_rate": 2.019827727197605e-06, "loss": 0.0029, "step": 5442 }, { "epoch": 320.1764705882353, "grad_norm": 0.37761762738227844, "learning_rate": 2.0169583587353426e-06, "loss": 0.003, "step": 5443 }, { "epoch": 320.2352941176471, "grad_norm": 0.7866030335426331, "learning_rate": 2.014090801224654e-06, "loss": 0.0044, "step": 5444 }, { "epoch": 320.29411764705884, "grad_norm": 0.2398047000169754, "learning_rate": 2.0112250553160473e-06, "loss": 0.004, "step": 5445 }, { "epoch": 320.3529411764706, "grad_norm": 0.39553120732307434, "learning_rate": 2.00836112165962e-06, "loss": 0.0046, "step": 5446 }, { "epoch": 320.4117647058824, "grad_norm": 0.20031020045280457, "learning_rate": 2.005499000905048e-06, "loss": 0.0024, "step": 5447 }, { "epoch": 320.47058823529414, "grad_norm": 0.18211568892002106, "learning_rate": 2.002638693701606e-06, "loss": 0.0027, "step": 5448 }, { "epoch": 320.52941176470586, "grad_norm": 0.7692714333534241, "learning_rate": 1.9997802006981547e-06, "loss": 0.0044, "step": 5449 }, { "epoch": 320.5882352941176, "grad_norm": 0.20498578250408173, "learning_rate": 1.996923522543146e-06, "loss": 0.0023, "step": 5450 }, { "epoch": 320.6470588235294, "grad_norm": 0.14494630694389343, "learning_rate": 1.9940686598846103e-06, "loss": 0.0021, "step": 5451 }, { "epoch": 320.70588235294116, "grad_norm": 0.2764948606491089, "learning_rate": 1.9912156133701753e-06, "loss": 0.0047, "step": 5452 }, { "epoch": 320.7647058823529, "grad_norm": 0.5694862604141235, "learning_rate": 1.9883643836470544e-06, "loss": 0.0072, "step": 5453 }, { "epoch": 320.8235294117647, "grad_norm": 0.23745375871658325, "learning_rate": 1.9855149713620525e-06, "loss": 0.0033, "step": 5454 }, { "epoch": 320.88235294117646, "grad_norm": 0.4576890468597412, "learning_rate": 1.982667377161551e-06, "loss": 0.0065, "step": 5455 }, { "epoch": 320.94117647058823, "grad_norm": 0.6677110195159912, "learning_rate": 1.9798216016915295e-06, "loss": 0.0057, "step": 5456 }, { "epoch": 321.0, "grad_norm": 0.23378872871398926, "learning_rate": 1.976977645597552e-06, "loss": 0.005, "step": 5457 }, { "epoch": 321.05882352941177, "grad_norm": 0.30335864424705505, "learning_rate": 1.9741355095247715e-06, "loss": 0.0034, "step": 5458 }, { "epoch": 321.11764705882354, "grad_norm": 0.28805139660835266, "learning_rate": 1.9712951941179216e-06, "loss": 0.0029, "step": 5459 }, { "epoch": 321.1764705882353, "grad_norm": 0.3023480176925659, "learning_rate": 1.9684567000213295e-06, "loss": 0.0037, "step": 5460 }, { "epoch": 321.2352941176471, "grad_norm": 0.3860960006713867, "learning_rate": 1.965620027878906e-06, "loss": 0.0053, "step": 5461 }, { "epoch": 321.29411764705884, "grad_norm": 0.3261767029762268, "learning_rate": 1.9627851783341523e-06, "loss": 0.0032, "step": 5462 }, { "epoch": 321.3529411764706, "grad_norm": 0.3326229751110077, "learning_rate": 1.9599521520301514e-06, "loss": 0.0049, "step": 5463 }, { "epoch": 321.4117647058824, "grad_norm": 0.7355853915214539, "learning_rate": 1.957120949609578e-06, "loss": 0.0049, "step": 5464 }, { "epoch": 321.47058823529414, "grad_norm": 0.5249891877174377, "learning_rate": 1.954291571714685e-06, "loss": 0.0076, "step": 5465 }, { "epoch": 321.52941176470586, "grad_norm": 0.1081608235836029, "learning_rate": 1.951464018987317e-06, "loss": 0.0019, "step": 5466 }, { "epoch": 321.5882352941176, "grad_norm": 0.24297265708446503, "learning_rate": 1.9486382920689073e-06, "loss": 0.0023, "step": 5467 }, { "epoch": 321.6470588235294, "grad_norm": 0.1291845142841339, "learning_rate": 1.9458143916004713e-06, "loss": 0.0022, "step": 5468 }, { "epoch": 321.70588235294116, "grad_norm": 0.18385274708271027, "learning_rate": 1.942992318222605e-06, "loss": 0.0032, "step": 5469 }, { "epoch": 321.7647058823529, "grad_norm": 0.3871799111366272, "learning_rate": 1.9401720725755004e-06, "loss": 0.0042, "step": 5470 }, { "epoch": 321.8235294117647, "grad_norm": 0.5257057547569275, "learning_rate": 1.937353655298928e-06, "loss": 0.0034, "step": 5471 }, { "epoch": 321.88235294117646, "grad_norm": 0.4785424768924713, "learning_rate": 1.934537067032247e-06, "loss": 0.0053, "step": 5472 }, { "epoch": 321.94117647058823, "grad_norm": 0.3144516944885254, "learning_rate": 1.9317223084143966e-06, "loss": 0.0037, "step": 5473 }, { "epoch": 322.0, "grad_norm": 0.6158561110496521, "learning_rate": 1.9289093800839067e-06, "loss": 0.0085, "step": 5474 }, { "epoch": 322.05882352941177, "grad_norm": 0.3129814863204956, "learning_rate": 1.926098282678891e-06, "loss": 0.004, "step": 5475 }, { "epoch": 322.11764705882354, "grad_norm": 0.304482638835907, "learning_rate": 1.923289016837041e-06, "loss": 0.0028, "step": 5476 }, { "epoch": 322.1764705882353, "grad_norm": 0.9101208448410034, "learning_rate": 1.9204815831956425e-06, "loss": 0.0052, "step": 5477 }, { "epoch": 322.2352941176471, "grad_norm": 0.17602914571762085, "learning_rate": 1.9176759823915602e-06, "loss": 0.0026, "step": 5478 }, { "epoch": 322.29411764705884, "grad_norm": 0.23597504198551178, "learning_rate": 1.9148722150612475e-06, "loss": 0.0043, "step": 5479 }, { "epoch": 322.3529411764706, "grad_norm": 0.32907524704933167, "learning_rate": 1.9120702818407322e-06, "loss": 0.0026, "step": 5480 }, { "epoch": 322.4117647058824, "grad_norm": 0.25586068630218506, "learning_rate": 1.9092701833656358e-06, "loss": 0.004, "step": 5481 }, { "epoch": 322.47058823529414, "grad_norm": 0.34104394912719727, "learning_rate": 1.906471920271159e-06, "loss": 0.003, "step": 5482 }, { "epoch": 322.52941176470586, "grad_norm": 0.2533929944038391, "learning_rate": 1.903675493192092e-06, "loss": 0.005, "step": 5483 }, { "epoch": 322.5882352941176, "grad_norm": 0.22736325860023499, "learning_rate": 1.9008809027627962e-06, "loss": 0.0028, "step": 5484 }, { "epoch": 322.6470588235294, "grad_norm": 0.3842260241508484, "learning_rate": 1.8980881496172276e-06, "loss": 0.0034, "step": 5485 }, { "epoch": 322.70588235294116, "grad_norm": 0.4531804025173187, "learning_rate": 1.895297234388923e-06, "loss": 0.0029, "step": 5486 }, { "epoch": 322.7647058823529, "grad_norm": 0.22716568410396576, "learning_rate": 1.8925081577110015e-06, "loss": 0.004, "step": 5487 }, { "epoch": 322.8235294117647, "grad_norm": 0.43577027320861816, "learning_rate": 1.8897209202161615e-06, "loss": 0.0053, "step": 5488 }, { "epoch": 322.88235294117646, "grad_norm": 0.7538076043128967, "learning_rate": 1.8869355225366881e-06, "loss": 0.0086, "step": 5489 }, { "epoch": 322.94117647058823, "grad_norm": 0.6691140532493591, "learning_rate": 1.8841519653044516e-06, "loss": 0.0084, "step": 5490 }, { "epoch": 323.0, "grad_norm": 0.4211328625679016, "learning_rate": 1.8813702491508956e-06, "loss": 0.0035, "step": 5491 }, { "epoch": 323.05882352941177, "grad_norm": 0.3188662528991699, "learning_rate": 1.8785903747070566e-06, "loss": 0.0029, "step": 5492 }, { "epoch": 323.11764705882354, "grad_norm": 0.5228450894355774, "learning_rate": 1.8758123426035457e-06, "loss": 0.0061, "step": 5493 }, { "epoch": 323.1764705882353, "grad_norm": 0.18981117010116577, "learning_rate": 1.8730361534705644e-06, "loss": 0.003, "step": 5494 }, { "epoch": 323.2352941176471, "grad_norm": 1.0265129804611206, "learning_rate": 1.8702618079378831e-06, "loss": 0.008, "step": 5495 }, { "epoch": 323.29411764705884, "grad_norm": 0.3477150797843933, "learning_rate": 1.8674893066348665e-06, "loss": 0.0043, "step": 5496 }, { "epoch": 323.3529411764706, "grad_norm": 0.18103979527950287, "learning_rate": 1.8647186501904535e-06, "loss": 0.0038, "step": 5497 }, { "epoch": 323.4117647058824, "grad_norm": 0.1648349016904831, "learning_rate": 1.8619498392331725e-06, "loss": 0.0026, "step": 5498 }, { "epoch": 323.47058823529414, "grad_norm": 0.2530493438243866, "learning_rate": 1.85918287439112e-06, "loss": 0.0027, "step": 5499 }, { "epoch": 323.52941176470586, "grad_norm": 0.25747597217559814, "learning_rate": 1.856417756291985e-06, "loss": 0.0032, "step": 5500 }, { "epoch": 323.5882352941176, "grad_norm": 0.10919474810361862, "learning_rate": 1.8536544855630367e-06, "loss": 0.0016, "step": 5501 }, { "epoch": 323.6470588235294, "grad_norm": 0.5921597480773926, "learning_rate": 1.8508930628311184e-06, "loss": 0.0059, "step": 5502 }, { "epoch": 323.70588235294116, "grad_norm": 0.13875912129878998, "learning_rate": 1.8481334887226587e-06, "loss": 0.0017, "step": 5503 }, { "epoch": 323.7647058823529, "grad_norm": 0.20328716933727264, "learning_rate": 1.8453757638636683e-06, "loss": 0.003, "step": 5504 }, { "epoch": 323.8235294117647, "grad_norm": 0.567125141620636, "learning_rate": 1.8426198888797387e-06, "loss": 0.0067, "step": 5505 }, { "epoch": 323.88235294117646, "grad_norm": 0.44739219546318054, "learning_rate": 1.8398658643960343e-06, "loss": 0.005, "step": 5506 }, { "epoch": 323.94117647058823, "grad_norm": 0.18759572505950928, "learning_rate": 1.8371136910373088e-06, "loss": 0.0025, "step": 5507 }, { "epoch": 324.0, "grad_norm": 0.28215324878692627, "learning_rate": 1.8343633694278895e-06, "loss": 0.0056, "step": 5508 }, { "epoch": 324.05882352941177, "grad_norm": 0.2417069971561432, "learning_rate": 1.8316149001916916e-06, "loss": 0.0032, "step": 5509 }, { "epoch": 324.11764705882354, "grad_norm": 0.20133182406425476, "learning_rate": 1.8288682839521987e-06, "loss": 0.0024, "step": 5510 }, { "epoch": 324.1764705882353, "grad_norm": 0.1788306087255478, "learning_rate": 1.8261235213324836e-06, "loss": 0.0033, "step": 5511 }, { "epoch": 324.2352941176471, "grad_norm": 0.3597842752933502, "learning_rate": 1.8233806129551935e-06, "loss": 0.004, "step": 5512 }, { "epoch": 324.29411764705884, "grad_norm": 0.228279247879982, "learning_rate": 1.8206395594425597e-06, "loss": 0.0046, "step": 5513 }, { "epoch": 324.3529411764706, "grad_norm": 0.2177591472864151, "learning_rate": 1.8179003614163859e-06, "loss": 0.003, "step": 5514 }, { "epoch": 324.4117647058824, "grad_norm": 0.645947277545929, "learning_rate": 1.8151630194980607e-06, "loss": 0.0034, "step": 5515 }, { "epoch": 324.47058823529414, "grad_norm": 0.20446474850177765, "learning_rate": 1.8124275343085518e-06, "loss": 0.0034, "step": 5516 }, { "epoch": 324.52941176470586, "grad_norm": 0.35937994718551636, "learning_rate": 1.8096939064683983e-06, "loss": 0.0036, "step": 5517 }, { "epoch": 324.5882352941176, "grad_norm": 0.8910333514213562, "learning_rate": 1.8069621365977252e-06, "loss": 0.0046, "step": 5518 }, { "epoch": 324.6470588235294, "grad_norm": 0.292834609746933, "learning_rate": 1.8042322253162359e-06, "loss": 0.0039, "step": 5519 }, { "epoch": 324.70588235294116, "grad_norm": 0.4520329236984253, "learning_rate": 1.8015041732432116e-06, "loss": 0.0045, "step": 5520 }, { "epoch": 324.7647058823529, "grad_norm": 0.3365934491157532, "learning_rate": 1.7987779809975047e-06, "loss": 0.0035, "step": 5521 }, { "epoch": 324.8235294117647, "grad_norm": 0.5147008299827576, "learning_rate": 1.7960536491975556e-06, "loss": 0.008, "step": 5522 }, { "epoch": 324.88235294117646, "grad_norm": 0.3324652910232544, "learning_rate": 1.7933311784613772e-06, "loss": 0.0059, "step": 5523 }, { "epoch": 324.94117647058823, "grad_norm": 0.4606182277202606, "learning_rate": 1.7906105694065646e-06, "loss": 0.0077, "step": 5524 }, { "epoch": 325.0, "grad_norm": 0.6693243384361267, "learning_rate": 1.7878918226502816e-06, "loss": 0.0042, "step": 5525 }, { "epoch": 325.05882352941177, "grad_norm": 0.5910972356796265, "learning_rate": 1.7851749388092799e-06, "loss": 0.0098, "step": 5526 }, { "epoch": 325.11764705882354, "grad_norm": 0.48911675810813904, "learning_rate": 1.7824599184998847e-06, "loss": 0.0055, "step": 5527 }, { "epoch": 325.1764705882353, "grad_norm": 0.23755347728729248, "learning_rate": 1.7797467623379927e-06, "loss": 0.0038, "step": 5528 }, { "epoch": 325.2352941176471, "grad_norm": 0.3067331314086914, "learning_rate": 1.7770354709390868e-06, "loss": 0.0048, "step": 5529 }, { "epoch": 325.29411764705884, "grad_norm": 0.27167588472366333, "learning_rate": 1.7743260449182232e-06, "loss": 0.0034, "step": 5530 }, { "epoch": 325.3529411764706, "grad_norm": 0.27817219495773315, "learning_rate": 1.7716184848900354e-06, "loss": 0.0036, "step": 5531 }, { "epoch": 325.4117647058824, "grad_norm": 0.2560375928878784, "learning_rate": 1.7689127914687288e-06, "loss": 0.0047, "step": 5532 }, { "epoch": 325.47058823529414, "grad_norm": 0.3746802806854248, "learning_rate": 1.7662089652680924e-06, "loss": 0.0044, "step": 5533 }, { "epoch": 325.52941176470586, "grad_norm": 0.2493380755186081, "learning_rate": 1.7635070069014882e-06, "loss": 0.0045, "step": 5534 }, { "epoch": 325.5882352941176, "grad_norm": 0.3201165497303009, "learning_rate": 1.7608069169818586e-06, "loss": 0.0025, "step": 5535 }, { "epoch": 325.6470588235294, "grad_norm": 0.19136475026607513, "learning_rate": 1.758108696121711e-06, "loss": 0.0021, "step": 5536 }, { "epoch": 325.70588235294116, "grad_norm": 0.26264336705207825, "learning_rate": 1.7554123449331417e-06, "loss": 0.0039, "step": 5537 }, { "epoch": 325.7647058823529, "grad_norm": 0.4036567211151123, "learning_rate": 1.7527178640278153e-06, "loss": 0.0032, "step": 5538 }, { "epoch": 325.8235294117647, "grad_norm": 0.8427146077156067, "learning_rate": 1.7500252540169782e-06, "loss": 0.0052, "step": 5539 }, { "epoch": 325.88235294117646, "grad_norm": 0.236435666680336, "learning_rate": 1.7473345155114429e-06, "loss": 0.0037, "step": 5540 }, { "epoch": 325.94117647058823, "grad_norm": 0.42868849635124207, "learning_rate": 1.7446456491216046e-06, "loss": 0.0024, "step": 5541 }, { "epoch": 326.0, "grad_norm": 0.2339903712272644, "learning_rate": 1.7419586554574364e-06, "loss": 0.0056, "step": 5542 }, { "epoch": 326.05882352941177, "grad_norm": 0.16021370887756348, "learning_rate": 1.739273535128476e-06, "loss": 0.002, "step": 5543 }, { "epoch": 326.11764705882354, "grad_norm": 0.871828556060791, "learning_rate": 1.736590288743847e-06, "loss": 0.0038, "step": 5544 }, { "epoch": 326.1764705882353, "grad_norm": 0.3232831656932831, "learning_rate": 1.7339089169122403e-06, "loss": 0.0034, "step": 5545 }, { "epoch": 326.2352941176471, "grad_norm": 0.21249189972877502, "learning_rate": 1.731229420241929e-06, "loss": 0.0022, "step": 5546 }, { "epoch": 326.29411764705884, "grad_norm": 0.23539406061172485, "learning_rate": 1.7285517993407518e-06, "loss": 0.0031, "step": 5547 }, { "epoch": 326.3529411764706, "grad_norm": 0.32517728209495544, "learning_rate": 1.725876054816128e-06, "loss": 0.0058, "step": 5548 }, { "epoch": 326.4117647058824, "grad_norm": 0.1893109381198883, "learning_rate": 1.7232021872750492e-06, "loss": 0.0028, "step": 5549 }, { "epoch": 326.47058823529414, "grad_norm": 0.23558470606803894, "learning_rate": 1.7205301973240863e-06, "loss": 0.0033, "step": 5550 }, { "epoch": 326.52941176470586, "grad_norm": 0.22568713128566742, "learning_rate": 1.717860085569374e-06, "loss": 0.0036, "step": 5551 }, { "epoch": 326.5882352941176, "grad_norm": 0.2902212142944336, "learning_rate": 1.7151918526166277e-06, "loss": 0.004, "step": 5552 }, { "epoch": 326.6470588235294, "grad_norm": 1.316028356552124, "learning_rate": 1.712525499071137e-06, "loss": 0.0092, "step": 5553 }, { "epoch": 326.70588235294116, "grad_norm": 0.31477972865104675, "learning_rate": 1.7098610255377657e-06, "loss": 0.0039, "step": 5554 }, { "epoch": 326.7647058823529, "grad_norm": 0.2933547794818878, "learning_rate": 1.7071984326209446e-06, "loss": 0.0033, "step": 5555 }, { "epoch": 326.8235294117647, "grad_norm": 0.4793495237827301, "learning_rate": 1.7045377209246849e-06, "loss": 0.0053, "step": 5556 }, { "epoch": 326.88235294117646, "grad_norm": 0.176541268825531, "learning_rate": 1.7018788910525708e-06, "loss": 0.0039, "step": 5557 }, { "epoch": 326.94117647058823, "grad_norm": 0.42244601249694824, "learning_rate": 1.6992219436077528e-06, "loss": 0.0056, "step": 5558 }, { "epoch": 327.0, "grad_norm": 0.19119341671466827, "learning_rate": 1.69656687919296e-06, "loss": 0.0037, "step": 5559 }, { "epoch": 327.05882352941177, "grad_norm": 0.3594212830066681, "learning_rate": 1.6939136984104954e-06, "loss": 0.0038, "step": 5560 }, { "epoch": 327.11764705882354, "grad_norm": 0.40555062890052795, "learning_rate": 1.6912624018622347e-06, "loss": 0.0064, "step": 5561 }, { "epoch": 327.1764705882353, "grad_norm": 0.30444321036338806, "learning_rate": 1.6886129901496173e-06, "loss": 0.0046, "step": 5562 }, { "epoch": 327.2352941176471, "grad_norm": 0.4246242344379425, "learning_rate": 1.6859654638736656e-06, "loss": 0.0046, "step": 5563 }, { "epoch": 327.29411764705884, "grad_norm": 0.1904686689376831, "learning_rate": 1.68331982363497e-06, "loss": 0.0027, "step": 5564 }, { "epoch": 327.3529411764706, "grad_norm": 0.21811315417289734, "learning_rate": 1.6806760700336965e-06, "loss": 0.0025, "step": 5565 }, { "epoch": 327.4117647058824, "grad_norm": 0.6049008369445801, "learning_rate": 1.678034203669575e-06, "loss": 0.0031, "step": 5566 }, { "epoch": 327.47058823529414, "grad_norm": 0.17168600857257843, "learning_rate": 1.6753942251419153e-06, "loss": 0.0018, "step": 5567 }, { "epoch": 327.52941176470586, "grad_norm": 0.6664069294929504, "learning_rate": 1.672756135049598e-06, "loss": 0.0033, "step": 5568 }, { "epoch": 327.5882352941176, "grad_norm": 0.4061771333217621, "learning_rate": 1.6701199339910678e-06, "loss": 0.004, "step": 5569 }, { "epoch": 327.6470588235294, "grad_norm": 0.544872522354126, "learning_rate": 1.66748562256435e-06, "loss": 0.005, "step": 5570 }, { "epoch": 327.70588235294116, "grad_norm": 0.7766767740249634, "learning_rate": 1.664853201367037e-06, "loss": 0.0065, "step": 5571 }, { "epoch": 327.7647058823529, "grad_norm": 0.12787218391895294, "learning_rate": 1.6622226709962963e-06, "loss": 0.0017, "step": 5572 }, { "epoch": 327.8235294117647, "grad_norm": 0.7765607237815857, "learning_rate": 1.659594032048858e-06, "loss": 0.0044, "step": 5573 }, { "epoch": 327.88235294117646, "grad_norm": 0.2886303663253784, "learning_rate": 1.6569672851210306e-06, "loss": 0.0061, "step": 5574 }, { "epoch": 327.94117647058823, "grad_norm": 0.35788947343826294, "learning_rate": 1.6543424308086909e-06, "loss": 0.0057, "step": 5575 }, { "epoch": 328.0, "grad_norm": 0.4607323706150055, "learning_rate": 1.6517194697072903e-06, "loss": 0.0054, "step": 5576 }, { "epoch": 328.05882352941177, "grad_norm": 0.27529701590538025, "learning_rate": 1.6490984024118407e-06, "loss": 0.0047, "step": 5577 }, { "epoch": 328.11764705882354, "grad_norm": 0.16589830815792084, "learning_rate": 1.646479229516933e-06, "loss": 0.0024, "step": 5578 }, { "epoch": 328.1764705882353, "grad_norm": 0.1818128377199173, "learning_rate": 1.6438619516167276e-06, "loss": 0.0025, "step": 5579 }, { "epoch": 328.2352941176471, "grad_norm": 0.28966668248176575, "learning_rate": 1.6412465693049561e-06, "loss": 0.0028, "step": 5580 }, { "epoch": 328.29411764705884, "grad_norm": 0.5909058451652527, "learning_rate": 1.638633083174911e-06, "loss": 0.003, "step": 5581 }, { "epoch": 328.3529411764706, "grad_norm": 0.5686115622520447, "learning_rate": 1.6360214938194642e-06, "loss": 0.0081, "step": 5582 }, { "epoch": 328.4117647058824, "grad_norm": 0.14884598553180695, "learning_rate": 1.6334118018310573e-06, "loss": 0.0026, "step": 5583 }, { "epoch": 328.47058823529414, "grad_norm": 0.697854220867157, "learning_rate": 1.630804007801694e-06, "loss": 0.0054, "step": 5584 }, { "epoch": 328.52941176470586, "grad_norm": 0.39131879806518555, "learning_rate": 1.628198112322953e-06, "loss": 0.0069, "step": 5585 }, { "epoch": 328.5882352941176, "grad_norm": 0.456137090921402, "learning_rate": 1.6255941159859812e-06, "loss": 0.0054, "step": 5586 }, { "epoch": 328.6470588235294, "grad_norm": 0.19697219133377075, "learning_rate": 1.6229920193814985e-06, "loss": 0.0021, "step": 5587 }, { "epoch": 328.70588235294116, "grad_norm": 0.4497811198234558, "learning_rate": 1.620391823099784e-06, "loss": 0.0069, "step": 5588 }, { "epoch": 328.7647058823529, "grad_norm": 0.36984869837760925, "learning_rate": 1.6177935277306944e-06, "loss": 0.0055, "step": 5589 }, { "epoch": 328.8235294117647, "grad_norm": 0.21326178312301636, "learning_rate": 1.6151971338636529e-06, "loss": 0.0042, "step": 5590 }, { "epoch": 328.88235294117646, "grad_norm": 0.31466788053512573, "learning_rate": 1.612602642087654e-06, "loss": 0.0022, "step": 5591 }, { "epoch": 328.94117647058823, "grad_norm": 0.28709715604782104, "learning_rate": 1.610010052991251e-06, "loss": 0.0038, "step": 5592 }, { "epoch": 329.0, "grad_norm": 0.2762794494628906, "learning_rate": 1.607419367162577e-06, "loss": 0.003, "step": 5593 }, { "epoch": 329.05882352941177, "grad_norm": 0.6033587455749512, "learning_rate": 1.6048305851893288e-06, "loss": 0.006, "step": 5594 }, { "epoch": 329.11764705882354, "grad_norm": 0.3396777808666229, "learning_rate": 1.6022437076587682e-06, "loss": 0.003, "step": 5595 }, { "epoch": 329.1764705882353, "grad_norm": 0.22392065823078156, "learning_rate": 1.5996587351577296e-06, "loss": 0.0046, "step": 5596 }, { "epoch": 329.2352941176471, "grad_norm": 0.39556241035461426, "learning_rate": 1.5970756682726129e-06, "loss": 0.0028, "step": 5597 }, { "epoch": 329.29411764705884, "grad_norm": 0.24925541877746582, "learning_rate": 1.5944945075893893e-06, "loss": 0.0048, "step": 5598 }, { "epoch": 329.3529411764706, "grad_norm": 0.14282545447349548, "learning_rate": 1.59191525369359e-06, "loss": 0.002, "step": 5599 }, { "epoch": 329.4117647058824, "grad_norm": 0.5851587057113647, "learning_rate": 1.5893379071703207e-06, "loss": 0.0063, "step": 5600 }, { "epoch": 329.47058823529414, "grad_norm": 0.2754552364349365, "learning_rate": 1.5867624686042515e-06, "loss": 0.0047, "step": 5601 }, { "epoch": 329.52941176470586, "grad_norm": 0.26890829205513, "learning_rate": 1.5841889385796226e-06, "loss": 0.0067, "step": 5602 }, { "epoch": 329.5882352941176, "grad_norm": 0.4563203454017639, "learning_rate": 1.5816173176802353e-06, "loss": 0.0062, "step": 5603 }, { "epoch": 329.6470588235294, "grad_norm": 0.2599011957645416, "learning_rate": 1.5790476064894622e-06, "loss": 0.0026, "step": 5604 }, { "epoch": 329.70588235294116, "grad_norm": 0.1886763870716095, "learning_rate": 1.576479805590243e-06, "loss": 0.0034, "step": 5605 }, { "epoch": 329.7647058823529, "grad_norm": 0.47566336393356323, "learning_rate": 1.573913915565084e-06, "loss": 0.0041, "step": 5606 }, { "epoch": 329.8235294117647, "grad_norm": 0.2666691839694977, "learning_rate": 1.5713499369960527e-06, "loss": 0.0035, "step": 5607 }, { "epoch": 329.88235294117646, "grad_norm": 0.41259267926216125, "learning_rate": 1.5687878704647907e-06, "loss": 0.0029, "step": 5608 }, { "epoch": 329.94117647058823, "grad_norm": 0.39814895391464233, "learning_rate": 1.566227716552503e-06, "loss": 0.0034, "step": 5609 }, { "epoch": 330.0, "grad_norm": 0.3869372308254242, "learning_rate": 1.5636694758399563e-06, "loss": 0.0038, "step": 5610 }, { "epoch": 330.05882352941177, "grad_norm": 0.3904106318950653, "learning_rate": 1.5611131489074882e-06, "loss": 0.0028, "step": 5611 }, { "epoch": 330.11764705882354, "grad_norm": 0.5940049290657043, "learning_rate": 1.558558736335003e-06, "loss": 0.0074, "step": 5612 }, { "epoch": 330.1764705882353, "grad_norm": 0.40160009264945984, "learning_rate": 1.5560062387019692e-06, "loss": 0.008, "step": 5613 }, { "epoch": 330.2352941176471, "grad_norm": 0.2944648563861847, "learning_rate": 1.553455656587417e-06, "loss": 0.004, "step": 5614 }, { "epoch": 330.29411764705884, "grad_norm": 0.148264542222023, "learning_rate": 1.5509069905699481e-06, "loss": 0.0025, "step": 5615 }, { "epoch": 330.3529411764706, "grad_norm": 0.16447483003139496, "learning_rate": 1.5483602412277254e-06, "loss": 0.0021, "step": 5616 }, { "epoch": 330.4117647058824, "grad_norm": 0.2220257669687271, "learning_rate": 1.5458154091384814e-06, "loss": 0.0035, "step": 5617 }, { "epoch": 330.47058823529414, "grad_norm": 0.23815318942070007, "learning_rate": 1.5432724948795064e-06, "loss": 0.003, "step": 5618 }, { "epoch": 330.52941176470586, "grad_norm": 0.3913593888282776, "learning_rate": 1.5407314990276634e-06, "loss": 0.0033, "step": 5619 }, { "epoch": 330.5882352941176, "grad_norm": 0.2266910821199417, "learning_rate": 1.5381924221593781e-06, "loss": 0.0026, "step": 5620 }, { "epoch": 330.6470588235294, "grad_norm": 0.09450788795948029, "learning_rate": 1.535655264850634e-06, "loss": 0.0011, "step": 5621 }, { "epoch": 330.70588235294116, "grad_norm": 0.20038793981075287, "learning_rate": 1.5331200276769886e-06, "loss": 0.0046, "step": 5622 }, { "epoch": 330.7647058823529, "grad_norm": 0.47753673791885376, "learning_rate": 1.530586711213561e-06, "loss": 0.0028, "step": 5623 }, { "epoch": 330.8235294117647, "grad_norm": 0.4175841212272644, "learning_rate": 1.528055316035033e-06, "loss": 0.0034, "step": 5624 }, { "epoch": 330.88235294117646, "grad_norm": 0.47529125213623047, "learning_rate": 1.5255258427156484e-06, "loss": 0.0041, "step": 5625 }, { "epoch": 330.94117647058823, "grad_norm": 0.7399299740791321, "learning_rate": 1.5229982918292198e-06, "loss": 0.0071, "step": 5626 }, { "epoch": 331.0, "grad_norm": 0.4252120852470398, "learning_rate": 1.520472663949122e-06, "loss": 0.0036, "step": 5627 }, { "epoch": 331.05882352941177, "grad_norm": 0.39002037048339844, "learning_rate": 1.5179489596482944e-06, "loss": 0.0052, "step": 5628 }, { "epoch": 331.11764705882354, "grad_norm": 0.2408009022474289, "learning_rate": 1.5154271794992348e-06, "loss": 0.0025, "step": 5629 }, { "epoch": 331.1764705882353, "grad_norm": 0.45998430252075195, "learning_rate": 1.512907324074012e-06, "loss": 0.0054, "step": 5630 }, { "epoch": 331.2352941176471, "grad_norm": 0.29016271233558655, "learning_rate": 1.5103893939442526e-06, "loss": 0.0031, "step": 5631 }, { "epoch": 331.29411764705884, "grad_norm": 0.1133224368095398, "learning_rate": 1.5078733896811537e-06, "loss": 0.0018, "step": 5632 }, { "epoch": 331.3529411764706, "grad_norm": 0.12923705577850342, "learning_rate": 1.5053593118554632e-06, "loss": 0.0015, "step": 5633 }, { "epoch": 331.4117647058824, "grad_norm": 0.640906572341919, "learning_rate": 1.502847161037503e-06, "loss": 0.0086, "step": 5634 }, { "epoch": 331.47058823529414, "grad_norm": 0.6105640530586243, "learning_rate": 1.5003369377971555e-06, "loss": 0.0068, "step": 5635 }, { "epoch": 331.52941176470586, "grad_norm": 0.18979693949222565, "learning_rate": 1.4978286427038602e-06, "loss": 0.0027, "step": 5636 }, { "epoch": 331.5882352941176, "grad_norm": 0.23619599640369415, "learning_rate": 1.495322276326626e-06, "loss": 0.0046, "step": 5637 }, { "epoch": 331.6470588235294, "grad_norm": 0.3224242627620697, "learning_rate": 1.492817839234021e-06, "loss": 0.0058, "step": 5638 }, { "epoch": 331.70588235294116, "grad_norm": 0.4201768636703491, "learning_rate": 1.49031533199418e-06, "loss": 0.0044, "step": 5639 }, { "epoch": 331.7647058823529, "grad_norm": 0.09653197973966599, "learning_rate": 1.4878147551747901e-06, "loss": 0.0017, "step": 5640 }, { "epoch": 331.8235294117647, "grad_norm": 0.40556037425994873, "learning_rate": 1.485316109343109e-06, "loss": 0.003, "step": 5641 }, { "epoch": 331.88235294117646, "grad_norm": 0.5541224479675293, "learning_rate": 1.4828193950659552e-06, "loss": 0.0072, "step": 5642 }, { "epoch": 331.94117647058823, "grad_norm": 0.44344741106033325, "learning_rate": 1.4803246129097093e-06, "loss": 0.0036, "step": 5643 }, { "epoch": 332.0, "grad_norm": 0.3022312521934509, "learning_rate": 1.4778317634403082e-06, "loss": 0.0045, "step": 5644 }, { "epoch": 332.05882352941177, "grad_norm": 0.2729772925376892, "learning_rate": 1.4753408472232566e-06, "loss": 0.0056, "step": 5645 }, { "epoch": 332.11764705882354, "grad_norm": 0.31414058804512024, "learning_rate": 1.472851864823619e-06, "loss": 0.0037, "step": 5646 }, { "epoch": 332.1764705882353, "grad_norm": 0.3972974419593811, "learning_rate": 1.4703648168060213e-06, "loss": 0.004, "step": 5647 }, { "epoch": 332.2352941176471, "grad_norm": 0.20609383285045624, "learning_rate": 1.467879703734646e-06, "loss": 0.0029, "step": 5648 }, { "epoch": 332.29411764705884, "grad_norm": 0.2534846365451813, "learning_rate": 1.4653965261732429e-06, "loss": 0.0025, "step": 5649 }, { "epoch": 332.3529411764706, "grad_norm": 0.2674310803413391, "learning_rate": 1.4629152846851236e-06, "loss": 0.0021, "step": 5650 }, { "epoch": 332.4117647058824, "grad_norm": 0.11272544413805008, "learning_rate": 1.4604359798331513e-06, "loss": 0.0017, "step": 5651 }, { "epoch": 332.47058823529414, "grad_norm": 0.5952696204185486, "learning_rate": 1.4579586121797585e-06, "loss": 0.0098, "step": 5652 }, { "epoch": 332.52941176470586, "grad_norm": 0.20090347528457642, "learning_rate": 1.4554831822869365e-06, "loss": 0.0027, "step": 5653 }, { "epoch": 332.5882352941176, "grad_norm": 0.34896427392959595, "learning_rate": 1.4530096907162372e-06, "loss": 0.0036, "step": 5654 }, { "epoch": 332.6470588235294, "grad_norm": 0.23120279610157013, "learning_rate": 1.4505381380287676e-06, "loss": 0.0044, "step": 5655 }, { "epoch": 332.70588235294116, "grad_norm": 0.19987915456295013, "learning_rate": 1.4480685247852011e-06, "loss": 0.0024, "step": 5656 }, { "epoch": 332.7647058823529, "grad_norm": 0.36872920393943787, "learning_rate": 1.4456008515457708e-06, "loss": 0.0086, "step": 5657 }, { "epoch": 332.8235294117647, "grad_norm": 0.1490451842546463, "learning_rate": 1.443135118870267e-06, "loss": 0.0025, "step": 5658 }, { "epoch": 332.88235294117646, "grad_norm": 0.47326964139938354, "learning_rate": 1.4406713273180396e-06, "loss": 0.0053, "step": 5659 }, { "epoch": 332.94117647058823, "grad_norm": 0.3186322748661041, "learning_rate": 1.4382094774479983e-06, "loss": 0.0025, "step": 5660 }, { "epoch": 333.0, "grad_norm": 0.4251973330974579, "learning_rate": 1.4357495698186186e-06, "loss": 0.0054, "step": 5661 }, { "epoch": 333.05882352941177, "grad_norm": 0.40842416882514954, "learning_rate": 1.433291604987923e-06, "loss": 0.0064, "step": 5662 }, { "epoch": 333.11764705882354, "grad_norm": 0.5298284888267517, "learning_rate": 1.4308355835135035e-06, "loss": 0.0045, "step": 5663 }, { "epoch": 333.1764705882353, "grad_norm": 0.1793743073940277, "learning_rate": 1.4283815059525085e-06, "loss": 0.0023, "step": 5664 }, { "epoch": 333.2352941176471, "grad_norm": 0.2896369695663452, "learning_rate": 1.4259293728616485e-06, "loss": 0.0031, "step": 5665 }, { "epoch": 333.29411764705884, "grad_norm": 0.19156156480312347, "learning_rate": 1.423479184797183e-06, "loss": 0.003, "step": 5666 }, { "epoch": 333.3529411764706, "grad_norm": 0.25504326820373535, "learning_rate": 1.421030942314937e-06, "loss": 0.0038, "step": 5667 }, { "epoch": 333.4117647058824, "grad_norm": 0.9344311356544495, "learning_rate": 1.4185846459703012e-06, "loss": 0.0073, "step": 5668 }, { "epoch": 333.47058823529414, "grad_norm": 0.32469338178634644, "learning_rate": 1.4161402963182103e-06, "loss": 0.0026, "step": 5669 }, { "epoch": 333.52941176470586, "grad_norm": 0.3681284785270691, "learning_rate": 1.4136978939131674e-06, "loss": 0.0042, "step": 5670 }, { "epoch": 333.5882352941176, "grad_norm": 0.19094562530517578, "learning_rate": 1.4112574393092304e-06, "loss": 0.0022, "step": 5671 }, { "epoch": 333.6470588235294, "grad_norm": 0.2576889395713806, "learning_rate": 1.4088189330600177e-06, "loss": 0.004, "step": 5672 }, { "epoch": 333.70588235294116, "grad_norm": 0.25991517305374146, "learning_rate": 1.4063823757187023e-06, "loss": 0.0031, "step": 5673 }, { "epoch": 333.7647058823529, "grad_norm": 0.23338866233825684, "learning_rate": 1.4039477678380154e-06, "loss": 0.0042, "step": 5674 }, { "epoch": 333.8235294117647, "grad_norm": 0.25755980610847473, "learning_rate": 1.4015151099702529e-06, "loss": 0.004, "step": 5675 }, { "epoch": 333.88235294117646, "grad_norm": 0.45730191469192505, "learning_rate": 1.3990844026672557e-06, "loss": 0.0047, "step": 5676 }, { "epoch": 333.94117647058823, "grad_norm": 0.41809016466140747, "learning_rate": 1.3966556464804326e-06, "loss": 0.0027, "step": 5677 }, { "epoch": 334.0, "grad_norm": 0.4151668846607208, "learning_rate": 1.3942288419607476e-06, "loss": 0.0065, "step": 5678 }, { "epoch": 334.05882352941177, "grad_norm": 0.4768639802932739, "learning_rate": 1.391803989658721e-06, "loss": 0.0044, "step": 5679 }, { "epoch": 334.11764705882354, "grad_norm": 0.311985045671463, "learning_rate": 1.3893810901244264e-06, "loss": 0.0044, "step": 5680 }, { "epoch": 334.1764705882353, "grad_norm": 0.43071070313453674, "learning_rate": 1.386960143907501e-06, "loss": 0.0057, "step": 5681 }, { "epoch": 334.2352941176471, "grad_norm": 0.3494732975959778, "learning_rate": 1.384541151557136e-06, "loss": 0.0037, "step": 5682 }, { "epoch": 334.29411764705884, "grad_norm": 0.3278051018714905, "learning_rate": 1.3821241136220809e-06, "loss": 0.0029, "step": 5683 }, { "epoch": 334.3529411764706, "grad_norm": 0.2777585983276367, "learning_rate": 1.3797090306506368e-06, "loss": 0.0047, "step": 5684 }, { "epoch": 334.4117647058824, "grad_norm": 0.2550446689128876, "learning_rate": 1.3772959031906663e-06, "loss": 0.0041, "step": 5685 }, { "epoch": 334.47058823529414, "grad_norm": 0.4321936368942261, "learning_rate": 1.3748847317895863e-06, "loss": 0.0046, "step": 5686 }, { "epoch": 334.52941176470586, "grad_norm": 0.5035028457641602, "learning_rate": 1.3724755169943749e-06, "loss": 0.0051, "step": 5687 }, { "epoch": 334.5882352941176, "grad_norm": 0.32892340421676636, "learning_rate": 1.3700682593515557e-06, "loss": 0.006, "step": 5688 }, { "epoch": 334.6470588235294, "grad_norm": 0.328624963760376, "learning_rate": 1.3676629594072177e-06, "loss": 0.0056, "step": 5689 }, { "epoch": 334.70588235294116, "grad_norm": 0.2593592703342438, "learning_rate": 1.3652596177070042e-06, "loss": 0.0032, "step": 5690 }, { "epoch": 334.7647058823529, "grad_norm": 0.29429420828819275, "learning_rate": 1.3628582347961096e-06, "loss": 0.0046, "step": 5691 }, { "epoch": 334.8235294117647, "grad_norm": 0.27490895986557007, "learning_rate": 1.3604588112192895e-06, "loss": 0.0036, "step": 5692 }, { "epoch": 334.88235294117646, "grad_norm": 0.6355653405189514, "learning_rate": 1.3580613475208514e-06, "loss": 0.004, "step": 5693 }, { "epoch": 334.94117647058823, "grad_norm": 0.2952772080898285, "learning_rate": 1.3556658442446625e-06, "loss": 0.0042, "step": 5694 }, { "epoch": 335.0, "grad_norm": 0.34388184547424316, "learning_rate": 1.3532723019341376e-06, "loss": 0.0036, "step": 5695 }, { "epoch": 335.05882352941177, "grad_norm": 0.5071057081222534, "learning_rate": 1.350880721132254e-06, "loss": 0.0051, "step": 5696 }, { "epoch": 335.11764705882354, "grad_norm": 0.14007438719272614, "learning_rate": 1.3484911023815405e-06, "loss": 0.0018, "step": 5697 }, { "epoch": 335.1764705882353, "grad_norm": 0.16123338043689728, "learning_rate": 1.3461034462240852e-06, "loss": 0.0021, "step": 5698 }, { "epoch": 335.2352941176471, "grad_norm": 0.2123868465423584, "learning_rate": 1.3437177532015221e-06, "loss": 0.0028, "step": 5699 }, { "epoch": 335.29411764705884, "grad_norm": 0.30969399213790894, "learning_rate": 1.3413340238550476e-06, "loss": 0.005, "step": 5700 }, { "epoch": 335.3529411764706, "grad_norm": 0.9018517136573792, "learning_rate": 1.33895225872541e-06, "loss": 0.0049, "step": 5701 }, { "epoch": 335.4117647058824, "grad_norm": 0.23565663397312164, "learning_rate": 1.3365724583529149e-06, "loss": 0.0026, "step": 5702 }, { "epoch": 335.47058823529414, "grad_norm": 0.49097132682800293, "learning_rate": 1.3341946232774162e-06, "loss": 0.0038, "step": 5703 }, { "epoch": 335.52941176470586, "grad_norm": 0.24870149791240692, "learning_rate": 1.3318187540383254e-06, "loss": 0.0041, "step": 5704 }, { "epoch": 335.5882352941176, "grad_norm": 0.17035187780857086, "learning_rate": 1.3294448511746128e-06, "loss": 0.0024, "step": 5705 }, { "epoch": 335.6470588235294, "grad_norm": 0.31448599696159363, "learning_rate": 1.3270729152247908e-06, "loss": 0.0061, "step": 5706 }, { "epoch": 335.70588235294116, "grad_norm": 0.34192997217178345, "learning_rate": 1.3247029467269356e-06, "loss": 0.003, "step": 5707 }, { "epoch": 335.7647058823529, "grad_norm": 0.16904757916927338, "learning_rate": 1.3223349462186753e-06, "loss": 0.0019, "step": 5708 }, { "epoch": 335.8235294117647, "grad_norm": 0.29150861501693726, "learning_rate": 1.3199689142371918e-06, "loss": 0.0044, "step": 5709 }, { "epoch": 335.88235294117646, "grad_norm": 1.0283629894256592, "learning_rate": 1.3176048513192152e-06, "loss": 0.0067, "step": 5710 }, { "epoch": 335.94117647058823, "grad_norm": 0.38049593567848206, "learning_rate": 1.315242758001034e-06, "loss": 0.0055, "step": 5711 }, { "epoch": 336.0, "grad_norm": 0.8831846714019775, "learning_rate": 1.3128826348184886e-06, "loss": 0.0082, "step": 5712 }, { "epoch": 336.05882352941177, "grad_norm": 0.23215897381305695, "learning_rate": 1.3105244823069752e-06, "loss": 0.003, "step": 5713 }, { "epoch": 336.11764705882354, "grad_norm": 1.2970608472824097, "learning_rate": 1.3081683010014367e-06, "loss": 0.0045, "step": 5714 }, { "epoch": 336.1764705882353, "grad_norm": 0.3631274998188019, "learning_rate": 1.3058140914363736e-06, "loss": 0.0048, "step": 5715 }, { "epoch": 336.2352941176471, "grad_norm": 0.337687224149704, "learning_rate": 1.30346185414584e-06, "loss": 0.0057, "step": 5716 }, { "epoch": 336.29411764705884, "grad_norm": 0.5756163597106934, "learning_rate": 1.3011115896634364e-06, "loss": 0.0034, "step": 5717 }, { "epoch": 336.3529411764706, "grad_norm": 0.43899136781692505, "learning_rate": 1.298763298522323e-06, "loss": 0.004, "step": 5718 }, { "epoch": 336.4117647058824, "grad_norm": 0.7642011046409607, "learning_rate": 1.2964169812552075e-06, "loss": 0.0024, "step": 5719 }, { "epoch": 336.47058823529414, "grad_norm": 0.11379297822713852, "learning_rate": 1.2940726383943547e-06, "loss": 0.0017, "step": 5720 }, { "epoch": 336.52941176470586, "grad_norm": 0.2177378237247467, "learning_rate": 1.291730270471574e-06, "loss": 0.0044, "step": 5721 }, { "epoch": 336.5882352941176, "grad_norm": 0.4421326816082001, "learning_rate": 1.2893898780182335e-06, "loss": 0.0088, "step": 5722 }, { "epoch": 336.6470588235294, "grad_norm": 0.40442273020744324, "learning_rate": 1.2870514615652496e-06, "loss": 0.006, "step": 5723 }, { "epoch": 336.70588235294116, "grad_norm": 0.3162773847579956, "learning_rate": 1.284715021643096e-06, "loss": 0.0034, "step": 5724 }, { "epoch": 336.7647058823529, "grad_norm": 0.3073984384536743, "learning_rate": 1.2823805587817873e-06, "loss": 0.0035, "step": 5725 }, { "epoch": 336.8235294117647, "grad_norm": 0.295421302318573, "learning_rate": 1.280048073510899e-06, "loss": 0.0037, "step": 5726 }, { "epoch": 336.88235294117646, "grad_norm": 0.21136769652366638, "learning_rate": 1.2777175663595554e-06, "loss": 0.003, "step": 5727 }, { "epoch": 336.94117647058823, "grad_norm": 0.24019815027713776, "learning_rate": 1.2753890378564327e-06, "loss": 0.0026, "step": 5728 }, { "epoch": 337.0, "grad_norm": 0.5023453831672668, "learning_rate": 1.2730624885297537e-06, "loss": 0.0033, "step": 5729 }, { "epoch": 337.05882352941177, "grad_norm": 0.5304373502731323, "learning_rate": 1.2707379189072977e-06, "loss": 0.0044, "step": 5730 }, { "epoch": 337.11764705882354, "grad_norm": 0.2644783556461334, "learning_rate": 1.2684153295163947e-06, "loss": 0.0027, "step": 5731 }, { "epoch": 337.1764705882353, "grad_norm": 0.24073681235313416, "learning_rate": 1.2660947208839203e-06, "loss": 0.0035, "step": 5732 }, { "epoch": 337.2352941176471, "grad_norm": 0.20233292877674103, "learning_rate": 1.2637760935363053e-06, "loss": 0.0035, "step": 5733 }, { "epoch": 337.29411764705884, "grad_norm": 0.20066413283348083, "learning_rate": 1.26145944799953e-06, "loss": 0.003, "step": 5734 }, { "epoch": 337.3529411764706, "grad_norm": 0.3733576834201813, "learning_rate": 1.259144784799128e-06, "loss": 0.0061, "step": 5735 }, { "epoch": 337.4117647058824, "grad_norm": 0.22553962469100952, "learning_rate": 1.256832104460175e-06, "loss": 0.0035, "step": 5736 }, { "epoch": 337.47058823529414, "grad_norm": 0.2519952654838562, "learning_rate": 1.2545214075073042e-06, "loss": 0.0033, "step": 5737 }, { "epoch": 337.52941176470586, "grad_norm": 0.15363895893096924, "learning_rate": 1.2522126944646972e-06, "loss": 0.0022, "step": 5738 }, { "epoch": 337.5882352941176, "grad_norm": 0.28109654784202576, "learning_rate": 1.2499059658560886e-06, "loss": 0.006, "step": 5739 }, { "epoch": 337.6470588235294, "grad_norm": 0.29308682680130005, "learning_rate": 1.247601222204753e-06, "loss": 0.0045, "step": 5740 }, { "epoch": 337.70588235294116, "grad_norm": 0.1915905475616455, "learning_rate": 1.2452984640335242e-06, "loss": 0.0035, "step": 5741 }, { "epoch": 337.7647058823529, "grad_norm": 0.26571354269981384, "learning_rate": 1.2429976918647846e-06, "loss": 0.004, "step": 5742 }, { "epoch": 337.8235294117647, "grad_norm": 0.22773808240890503, "learning_rate": 1.24069890622046e-06, "loss": 0.0045, "step": 5743 }, { "epoch": 337.88235294117646, "grad_norm": 0.20994840562343597, "learning_rate": 1.238402107622031e-06, "loss": 0.0027, "step": 5744 }, { "epoch": 337.94117647058823, "grad_norm": 0.19237393140792847, "learning_rate": 1.2361072965905252e-06, "loss": 0.0028, "step": 5745 }, { "epoch": 338.0, "grad_norm": 0.3049565851688385, "learning_rate": 1.233814473646524e-06, "loss": 0.0028, "step": 5746 }, { "epoch": 338.05882352941177, "grad_norm": 0.21543216705322266, "learning_rate": 1.2315236393101492e-06, "loss": 0.0025, "step": 5747 }, { "epoch": 338.11764705882354, "grad_norm": 1.0706974267959595, "learning_rate": 1.2292347941010773e-06, "loss": 0.0052, "step": 5748 }, { "epoch": 338.1764705882353, "grad_norm": 0.21623091399669647, "learning_rate": 1.226947938538533e-06, "loss": 0.0024, "step": 5749 }, { "epoch": 338.2352941176471, "grad_norm": 0.20948781073093414, "learning_rate": 1.2246630731412911e-06, "loss": 0.0025, "step": 5750 }, { "epoch": 338.29411764705884, "grad_norm": 0.5884403586387634, "learning_rate": 1.2223801984276696e-06, "loss": 0.0059, "step": 5751 }, { "epoch": 338.3529411764706, "grad_norm": 0.4501076936721802, "learning_rate": 1.2200993149155382e-06, "loss": 0.0069, "step": 5752 }, { "epoch": 338.4117647058824, "grad_norm": 0.2829713821411133, "learning_rate": 1.2178204231223167e-06, "loss": 0.0034, "step": 5753 }, { "epoch": 338.47058823529414, "grad_norm": 0.6392129063606262, "learning_rate": 1.2155435235649738e-06, "loss": 0.0029, "step": 5754 }, { "epoch": 338.52941176470586, "grad_norm": 0.5900087952613831, "learning_rate": 1.2132686167600183e-06, "loss": 0.0042, "step": 5755 }, { "epoch": 338.5882352941176, "grad_norm": 0.131147563457489, "learning_rate": 1.2109957032235143e-06, "loss": 0.0023, "step": 5756 }, { "epoch": 338.6470588235294, "grad_norm": 0.25700071454048157, "learning_rate": 1.2087247834710746e-06, "loss": 0.0039, "step": 5757 }, { "epoch": 338.70588235294116, "grad_norm": 0.44685792922973633, "learning_rate": 1.206455858017853e-06, "loss": 0.0059, "step": 5758 }, { "epoch": 338.7647058823529, "grad_norm": 0.2291933000087738, "learning_rate": 1.2041889273785568e-06, "loss": 0.0049, "step": 5759 }, { "epoch": 338.8235294117647, "grad_norm": 0.5640701055526733, "learning_rate": 1.2019239920674385e-06, "loss": 0.0064, "step": 5760 }, { "epoch": 338.88235294117646, "grad_norm": 0.2541458308696747, "learning_rate": 1.199661052598301e-06, "loss": 0.0049, "step": 5761 }, { "epoch": 338.94117647058823, "grad_norm": 0.08630836755037308, "learning_rate": 1.1974001094844868e-06, "loss": 0.0014, "step": 5762 }, { "epoch": 339.0, "grad_norm": 0.39502429962158203, "learning_rate": 1.195141163238892e-06, "loss": 0.0031, "step": 5763 }, { "epoch": 339.05882352941177, "grad_norm": 0.3021468222141266, "learning_rate": 1.1928842143739595e-06, "loss": 0.0026, "step": 5764 }, { "epoch": 339.11764705882354, "grad_norm": 0.10719741135835648, "learning_rate": 1.1906292634016792e-06, "loss": 0.0018, "step": 5765 }, { "epoch": 339.1764705882353, "grad_norm": 0.4580807089805603, "learning_rate": 1.1883763108335822e-06, "loss": 0.0032, "step": 5766 }, { "epoch": 339.2352941176471, "grad_norm": 0.43788981437683105, "learning_rate": 1.1861253571807519e-06, "loss": 0.0046, "step": 5767 }, { "epoch": 339.29411764705884, "grad_norm": 0.46780574321746826, "learning_rate": 1.1838764029538207e-06, "loss": 0.0036, "step": 5768 }, { "epoch": 339.3529411764706, "grad_norm": 0.32502129673957825, "learning_rate": 1.1816294486629565e-06, "loss": 0.0025, "step": 5769 }, { "epoch": 339.4117647058824, "grad_norm": 0.29446548223495483, "learning_rate": 1.1793844948178857e-06, "loss": 0.0065, "step": 5770 }, { "epoch": 339.47058823529414, "grad_norm": 1.552993893623352, "learning_rate": 1.177141541927872e-06, "loss": 0.0061, "step": 5771 }, { "epoch": 339.52941176470586, "grad_norm": 0.31627288460731506, "learning_rate": 1.1749005905017353e-06, "loss": 0.0048, "step": 5772 }, { "epoch": 339.5882352941176, "grad_norm": 0.45242035388946533, "learning_rate": 1.1726616410478263e-06, "loss": 0.0071, "step": 5773 }, { "epoch": 339.6470588235294, "grad_norm": 0.4422461986541748, "learning_rate": 1.170424694074056e-06, "loss": 0.0048, "step": 5774 }, { "epoch": 339.70588235294116, "grad_norm": 0.36274072527885437, "learning_rate": 1.1681897500878737e-06, "loss": 0.0054, "step": 5775 }, { "epoch": 339.7647058823529, "grad_norm": 0.2986987829208374, "learning_rate": 1.1659568095962793e-06, "loss": 0.0044, "step": 5776 }, { "epoch": 339.8235294117647, "grad_norm": 0.33114302158355713, "learning_rate": 1.1637258731058087e-06, "loss": 0.0022, "step": 5777 }, { "epoch": 339.88235294117646, "grad_norm": 0.2755914628505707, "learning_rate": 1.161496941122553e-06, "loss": 0.0033, "step": 5778 }, { "epoch": 339.94117647058823, "grad_norm": 0.26576748490333557, "learning_rate": 1.159270014152145e-06, "loss": 0.0029, "step": 5779 }, { "epoch": 340.0, "grad_norm": 0.21308976411819458, "learning_rate": 1.1570450926997657e-06, "loss": 0.0035, "step": 5780 }, { "epoch": 340.05882352941177, "grad_norm": 0.3503769636154175, "learning_rate": 1.1548221772701317e-06, "loss": 0.0033, "step": 5781 }, { "epoch": 340.11764705882354, "grad_norm": 0.33494845032691956, "learning_rate": 1.1526012683675135e-06, "loss": 0.0022, "step": 5782 }, { "epoch": 340.1764705882353, "grad_norm": 0.4103022515773773, "learning_rate": 1.1503823664957282e-06, "loss": 0.0057, "step": 5783 }, { "epoch": 340.2352941176471, "grad_norm": 0.21111640334129333, "learning_rate": 1.148165472158127e-06, "loss": 0.0039, "step": 5784 }, { "epoch": 340.29411764705884, "grad_norm": 0.1390039473772049, "learning_rate": 1.1459505858576148e-06, "loss": 0.0023, "step": 5785 }, { "epoch": 340.3529411764706, "grad_norm": 0.20613819360733032, "learning_rate": 1.1437377080966384e-06, "loss": 0.0028, "step": 5786 }, { "epoch": 340.4117647058824, "grad_norm": 0.2640751004219055, "learning_rate": 1.1415268393771895e-06, "loss": 0.0023, "step": 5787 }, { "epoch": 340.47058823529414, "grad_norm": 0.20453359186649323, "learning_rate": 1.1393179802008002e-06, "loss": 0.0039, "step": 5788 }, { "epoch": 340.52941176470586, "grad_norm": 0.39712706208229065, "learning_rate": 1.1371111310685524e-06, "loss": 0.0053, "step": 5789 }, { "epoch": 340.5882352941176, "grad_norm": 0.17252053320407867, "learning_rate": 1.1349062924810682e-06, "loss": 0.0026, "step": 5790 }, { "epoch": 340.6470588235294, "grad_norm": 0.5858008861541748, "learning_rate": 1.1327034649385183e-06, "loss": 0.006, "step": 5791 }, { "epoch": 340.70588235294116, "grad_norm": 0.10867335647344589, "learning_rate": 1.130502648940608e-06, "loss": 0.0017, "step": 5792 }, { "epoch": 340.7647058823529, "grad_norm": 0.29771170020103455, "learning_rate": 1.1283038449865956e-06, "loss": 0.0036, "step": 5793 }, { "epoch": 340.8235294117647, "grad_norm": 0.2792544364929199, "learning_rate": 1.1261070535752783e-06, "loss": 0.0067, "step": 5794 }, { "epoch": 340.88235294117646, "grad_norm": 0.22578032314777374, "learning_rate": 1.1239122752050003e-06, "loss": 0.0046, "step": 5795 }, { "epoch": 340.94117647058823, "grad_norm": 0.375335156917572, "learning_rate": 1.1217195103736422e-06, "loss": 0.0038, "step": 5796 }, { "epoch": 341.0, "grad_norm": 0.2582966983318329, "learning_rate": 1.1195287595786352e-06, "loss": 0.0042, "step": 5797 }, { "epoch": 341.05882352941177, "grad_norm": 0.2511938512325287, "learning_rate": 1.1173400233169528e-06, "loss": 0.0042, "step": 5798 }, { "epoch": 341.11764705882354, "grad_norm": 0.29972296953201294, "learning_rate": 1.1151533020851035e-06, "loss": 0.0031, "step": 5799 }, { "epoch": 341.1764705882353, "grad_norm": 0.24830082058906555, "learning_rate": 1.1129685963791493e-06, "loss": 0.0043, "step": 5800 }, { "epoch": 341.2352941176471, "grad_norm": 0.28750646114349365, "learning_rate": 1.110785906694688e-06, "loss": 0.0044, "step": 5801 }, { "epoch": 341.29411764705884, "grad_norm": 0.5705018043518066, "learning_rate": 1.108605233526866e-06, "loss": 0.0056, "step": 5802 }, { "epoch": 341.3529411764706, "grad_norm": 0.45245736837387085, "learning_rate": 1.106426577370363e-06, "loss": 0.0045, "step": 5803 }, { "epoch": 341.4117647058824, "grad_norm": 0.3932887315750122, "learning_rate": 1.1042499387194105e-06, "loss": 0.0061, "step": 5804 }, { "epoch": 341.47058823529414, "grad_norm": 0.176783949136734, "learning_rate": 1.102075318067778e-06, "loss": 0.0022, "step": 5805 }, { "epoch": 341.52941176470586, "grad_norm": 0.17480416595935822, "learning_rate": 1.0999027159087794e-06, "loss": 0.0019, "step": 5806 }, { "epoch": 341.5882352941176, "grad_norm": 0.3892902433872223, "learning_rate": 1.0977321327352663e-06, "loss": 0.0037, "step": 5807 }, { "epoch": 341.6470588235294, "grad_norm": 0.22994831204414368, "learning_rate": 1.0955635690396348e-06, "loss": 0.0023, "step": 5808 }, { "epoch": 341.70588235294116, "grad_norm": 0.44189026951789856, "learning_rate": 1.093397025313827e-06, "loss": 0.0037, "step": 5809 }, { "epoch": 341.7647058823529, "grad_norm": 0.16884934902191162, "learning_rate": 1.0912325020493187e-06, "loss": 0.0023, "step": 5810 }, { "epoch": 341.8235294117647, "grad_norm": 0.49355190992355347, "learning_rate": 1.0890699997371334e-06, "loss": 0.0075, "step": 5811 }, { "epoch": 341.88235294117646, "grad_norm": 0.2993123233318329, "learning_rate": 1.0869095188678347e-06, "loss": 0.0031, "step": 5812 }, { "epoch": 341.94117647058823, "grad_norm": 0.31352052092552185, "learning_rate": 1.084751059931528e-06, "loss": 0.0048, "step": 5813 }, { "epoch": 342.0, "grad_norm": 0.487728089094162, "learning_rate": 1.0825946234178575e-06, "loss": 0.0059, "step": 5814 }, { "epoch": 342.05882352941177, "grad_norm": 0.31129035353660583, "learning_rate": 1.0804402098160105e-06, "loss": 0.0055, "step": 5815 }, { "epoch": 342.11764705882354, "grad_norm": 0.2927027940750122, "learning_rate": 1.0782878196147162e-06, "loss": 0.0029, "step": 5816 }, { "epoch": 342.1764705882353, "grad_norm": 0.16779020428657532, "learning_rate": 1.076137453302246e-06, "loss": 0.002, "step": 5817 }, { "epoch": 342.2352941176471, "grad_norm": 0.2977831959724426, "learning_rate": 1.073989111366406e-06, "loss": 0.0049, "step": 5818 }, { "epoch": 342.29411764705884, "grad_norm": 0.173441544175148, "learning_rate": 1.0718427942945486e-06, "loss": 0.0042, "step": 5819 }, { "epoch": 342.3529411764706, "grad_norm": 0.34520789980888367, "learning_rate": 1.0696985025735662e-06, "loss": 0.0047, "step": 5820 }, { "epoch": 342.4117647058824, "grad_norm": 0.19601166248321533, "learning_rate": 1.0675562366898929e-06, "loss": 0.0023, "step": 5821 }, { "epoch": 342.47058823529414, "grad_norm": 0.6577521562576294, "learning_rate": 1.0654159971294975e-06, "loss": 0.0061, "step": 5822 }, { "epoch": 342.52941176470586, "grad_norm": 0.2116324007511139, "learning_rate": 1.0632777843778952e-06, "loss": 0.0025, "step": 5823 }, { "epoch": 342.5882352941176, "grad_norm": 0.7755225300788879, "learning_rate": 1.061141598920139e-06, "loss": 0.0049, "step": 5824 }, { "epoch": 342.6470588235294, "grad_norm": 0.14897501468658447, "learning_rate": 1.0590074412408214e-06, "loss": 0.002, "step": 5825 }, { "epoch": 342.70588235294116, "grad_norm": 0.22595924139022827, "learning_rate": 1.056875311824076e-06, "loss": 0.0026, "step": 5826 }, { "epoch": 342.7647058823529, "grad_norm": 0.26851531863212585, "learning_rate": 1.054745211153575e-06, "loss": 0.0035, "step": 5827 }, { "epoch": 342.8235294117647, "grad_norm": 0.1766374409198761, "learning_rate": 1.052617139712535e-06, "loss": 0.0023, "step": 5828 }, { "epoch": 342.88235294117646, "grad_norm": 0.328343003988266, "learning_rate": 1.0504910979837046e-06, "loss": 0.0026, "step": 5829 }, { "epoch": 342.94117647058823, "grad_norm": 0.4411298632621765, "learning_rate": 1.0483670864493777e-06, "loss": 0.0054, "step": 5830 }, { "epoch": 343.0, "grad_norm": 0.46191585063934326, "learning_rate": 1.0462451055913847e-06, "loss": 0.0095, "step": 5831 }, { "epoch": 343.05882352941177, "grad_norm": 0.4164176881313324, "learning_rate": 1.0441251558911002e-06, "loss": 0.0067, "step": 5832 }, { "epoch": 343.11764705882354, "grad_norm": 0.1990179866552353, "learning_rate": 1.04200723782943e-06, "loss": 0.0031, "step": 5833 }, { "epoch": 343.1764705882353, "grad_norm": 0.2319086492061615, "learning_rate": 1.0398913518868247e-06, "loss": 0.0043, "step": 5834 }, { "epoch": 343.2352941176471, "grad_norm": 0.6197136640548706, "learning_rate": 1.0377774985432765e-06, "loss": 0.0024, "step": 5835 }, { "epoch": 343.29411764705884, "grad_norm": 0.4797496497631073, "learning_rate": 1.035665678278306e-06, "loss": 0.0058, "step": 5836 }, { "epoch": 343.3529411764706, "grad_norm": 0.246119424700737, "learning_rate": 1.0335558915709831e-06, "loss": 0.004, "step": 5837 }, { "epoch": 343.4117647058824, "grad_norm": 0.4419288635253906, "learning_rate": 1.031448138899912e-06, "loss": 0.0051, "step": 5838 }, { "epoch": 343.47058823529414, "grad_norm": 0.1358630210161209, "learning_rate": 1.029342420743238e-06, "loss": 0.0024, "step": 5839 }, { "epoch": 343.52941176470586, "grad_norm": 0.19147595763206482, "learning_rate": 1.0272387375786397e-06, "loss": 0.0024, "step": 5840 }, { "epoch": 343.5882352941176, "grad_norm": 0.32115113735198975, "learning_rate": 1.025137089883338e-06, "loss": 0.0065, "step": 5841 }, { "epoch": 343.6470588235294, "grad_norm": 0.25634872913360596, "learning_rate": 1.0230374781340912e-06, "loss": 0.0028, "step": 5842 }, { "epoch": 343.70588235294116, "grad_norm": 0.26211369037628174, "learning_rate": 1.0209399028071997e-06, "loss": 0.0032, "step": 5843 }, { "epoch": 343.7647058823529, "grad_norm": 0.5767489075660706, "learning_rate": 1.0188443643784917e-06, "loss": 0.0047, "step": 5844 }, { "epoch": 343.8235294117647, "grad_norm": 0.25814664363861084, "learning_rate": 1.0167508633233435e-06, "loss": 0.0033, "step": 5845 }, { "epoch": 343.88235294117646, "grad_norm": 0.3095361292362213, "learning_rate": 1.0146594001166653e-06, "loss": 0.0026, "step": 5846 }, { "epoch": 343.94117647058823, "grad_norm": 0.3371681869029999, "learning_rate": 1.0125699752329053e-06, "loss": 0.0042, "step": 5847 }, { "epoch": 344.0, "grad_norm": 0.17568281292915344, "learning_rate": 1.010482589146048e-06, "loss": 0.0023, "step": 5848 }, { "epoch": 344.05882352941177, "grad_norm": 0.18725794553756714, "learning_rate": 1.0083972423296162e-06, "loss": 0.0036, "step": 5849 }, { "epoch": 344.11764705882354, "grad_norm": 0.1901543289422989, "learning_rate": 1.0063139352566741e-06, "loss": 0.0036, "step": 5850 }, { "epoch": 344.1764705882353, "grad_norm": 0.4616171717643738, "learning_rate": 1.0042326683998137e-06, "loss": 0.0024, "step": 5851 }, { "epoch": 344.2352941176471, "grad_norm": 0.3010218143463135, "learning_rate": 1.0021534422311741e-06, "loss": 0.0031, "step": 5852 }, { "epoch": 344.29411764705884, "grad_norm": 0.16519492864608765, "learning_rate": 1.0000762572224265e-06, "loss": 0.0018, "step": 5853 }, { "epoch": 344.3529411764706, "grad_norm": 0.24176257848739624, "learning_rate": 9.98001113844781e-07, "loss": 0.0044, "step": 5854 }, { "epoch": 344.4117647058824, "grad_norm": 0.35566386580467224, "learning_rate": 9.95928012568982e-07, "loss": 0.0033, "step": 5855 }, { "epoch": 344.47058823529414, "grad_norm": 0.2939256727695465, "learning_rate": 9.938569538653108e-07, "loss": 0.005, "step": 5856 }, { "epoch": 344.52941176470586, "grad_norm": 0.3621697425842285, "learning_rate": 9.917879382035889e-07, "loss": 0.0052, "step": 5857 }, { "epoch": 344.5882352941176, "grad_norm": 0.3524491786956787, "learning_rate": 9.897209660531736e-07, "loss": 0.0029, "step": 5858 }, { "epoch": 344.6470588235294, "grad_norm": 0.41435879468917847, "learning_rate": 9.876560378829537e-07, "loss": 0.0061, "step": 5859 }, { "epoch": 344.70588235294116, "grad_norm": 0.6783703565597534, "learning_rate": 9.855931541613594e-07, "loss": 0.0058, "step": 5860 }, { "epoch": 344.7647058823529, "grad_norm": 0.28911861777305603, "learning_rate": 9.83532315356357e-07, "loss": 0.0021, "step": 5861 }, { "epoch": 344.8235294117647, "grad_norm": 0.20091882348060608, "learning_rate": 9.814735219354433e-07, "loss": 0.0026, "step": 5862 }, { "epoch": 344.88235294117646, "grad_norm": 0.21685895323753357, "learning_rate": 9.79416774365659e-07, "loss": 0.0032, "step": 5863 }, { "epoch": 344.94117647058823, "grad_norm": 0.5132777094841003, "learning_rate": 9.773620731135747e-07, "loss": 0.0051, "step": 5864 }, { "epoch": 345.0, "grad_norm": 0.16143734753131866, "learning_rate": 9.753094186453028e-07, "loss": 0.0022, "step": 5865 }, { "epoch": 345.05882352941177, "grad_norm": 0.8591729402542114, "learning_rate": 9.732588114264818e-07, "loss": 0.0045, "step": 5866 }, { "epoch": 345.11764705882354, "grad_norm": 0.33727917075157166, "learning_rate": 9.712102519222954e-07, "loss": 0.0063, "step": 5867 }, { "epoch": 345.1764705882353, "grad_norm": 0.39666956663131714, "learning_rate": 9.691637405974574e-07, "loss": 0.0042, "step": 5868 }, { "epoch": 345.2352941176471, "grad_norm": 0.7954006195068359, "learning_rate": 9.67119277916222e-07, "loss": 0.0057, "step": 5869 }, { "epoch": 345.29411764705884, "grad_norm": 0.39623838663101196, "learning_rate": 9.650768643423703e-07, "loss": 0.003, "step": 5870 }, { "epoch": 345.3529411764706, "grad_norm": 0.16682295501232147, "learning_rate": 9.63036500339225e-07, "loss": 0.0038, "step": 5871 }, { "epoch": 345.4117647058824, "grad_norm": 0.2760362923145294, "learning_rate": 9.609981863696439e-07, "loss": 0.0038, "step": 5872 }, { "epoch": 345.47058823529414, "grad_norm": 0.4405128061771393, "learning_rate": 9.589619228960169e-07, "loss": 0.0054, "step": 5873 }, { "epoch": 345.52941176470586, "grad_norm": 0.48845374584198, "learning_rate": 9.5692771038027e-07, "loss": 0.0028, "step": 5874 }, { "epoch": 345.5882352941176, "grad_norm": 0.21019183099269867, "learning_rate": 9.54895549283864e-07, "loss": 0.0021, "step": 5875 }, { "epoch": 345.6470588235294, "grad_norm": 0.261020690202713, "learning_rate": 9.528654400677973e-07, "loss": 0.0024, "step": 5876 }, { "epoch": 345.70588235294116, "grad_norm": 0.27910399436950684, "learning_rate": 9.508373831925944e-07, "loss": 0.0035, "step": 5877 }, { "epoch": 345.7647058823529, "grad_norm": 0.353290855884552, "learning_rate": 9.488113791183229e-07, "loss": 0.0048, "step": 5878 }, { "epoch": 345.8235294117647, "grad_norm": 0.19872507452964783, "learning_rate": 9.467874283045819e-07, "loss": 0.0027, "step": 5879 }, { "epoch": 345.88235294117646, "grad_norm": 0.19692109525203705, "learning_rate": 9.447655312105019e-07, "loss": 0.0024, "step": 5880 }, { "epoch": 345.94117647058823, "grad_norm": 0.8126667737960815, "learning_rate": 9.427456882947516e-07, "loss": 0.0055, "step": 5881 }, { "epoch": 346.0, "grad_norm": 0.2316451072692871, "learning_rate": 9.407279000155311e-07, "loss": 0.0039, "step": 5882 }, { "epoch": 346.05882352941177, "grad_norm": 0.5937872529029846, "learning_rate": 9.387121668305776e-07, "loss": 0.0054, "step": 5883 }, { "epoch": 346.11764705882354, "grad_norm": 0.3443146347999573, "learning_rate": 9.366984891971564e-07, "loss": 0.0041, "step": 5884 }, { "epoch": 346.1764705882353, "grad_norm": 0.26426228880882263, "learning_rate": 9.346868675720722e-07, "loss": 0.0027, "step": 5885 }, { "epoch": 346.2352941176471, "grad_norm": 2.5416510105133057, "learning_rate": 9.326773024116587e-07, "loss": 0.0065, "step": 5886 }, { "epoch": 346.29411764705884, "grad_norm": 0.30814382433891296, "learning_rate": 9.306697941717902e-07, "loss": 0.004, "step": 5887 }, { "epoch": 346.3529411764706, "grad_norm": 0.271373987197876, "learning_rate": 9.286643433078646e-07, "loss": 0.0033, "step": 5888 }, { "epoch": 346.4117647058824, "grad_norm": 0.3149663209915161, "learning_rate": 9.266609502748202e-07, "loss": 0.0036, "step": 5889 }, { "epoch": 346.47058823529414, "grad_norm": 0.22651918232440948, "learning_rate": 9.24659615527127e-07, "loss": 0.004, "step": 5890 }, { "epoch": 346.52941176470586, "grad_norm": 0.37404364347457886, "learning_rate": 9.22660339518785e-07, "loss": 0.0041, "step": 5891 }, { "epoch": 346.5882352941176, "grad_norm": 0.5281399488449097, "learning_rate": 9.206631227033314e-07, "loss": 0.0033, "step": 5892 }, { "epoch": 346.6470588235294, "grad_norm": 0.10698605328798294, "learning_rate": 9.186679655338338e-07, "loss": 0.0021, "step": 5893 }, { "epoch": 346.70588235294116, "grad_norm": 0.2931418716907501, "learning_rate": 9.166748684628968e-07, "loss": 0.0031, "step": 5894 }, { "epoch": 346.7647058823529, "grad_norm": 0.6918272376060486, "learning_rate": 9.146838319426476e-07, "loss": 0.0036, "step": 5895 }, { "epoch": 346.8235294117647, "grad_norm": 0.45403778553009033, "learning_rate": 9.126948564247561e-07, "loss": 0.008, "step": 5896 }, { "epoch": 346.88235294117646, "grad_norm": 0.3359440565109253, "learning_rate": 9.107079423604214e-07, "loss": 0.003, "step": 5897 }, { "epoch": 346.94117647058823, "grad_norm": 0.5693737268447876, "learning_rate": 9.087230902003752e-07, "loss": 0.005, "step": 5898 }, { "epoch": 347.0, "grad_norm": 0.41690701246261597, "learning_rate": 9.067403003948783e-07, "loss": 0.0047, "step": 5899 }, { "epoch": 347.05882352941177, "grad_norm": 0.1831592470407486, "learning_rate": 9.047595733937276e-07, "loss": 0.0042, "step": 5900 }, { "epoch": 347.11764705882354, "grad_norm": 0.5123358368873596, "learning_rate": 9.027809096462503e-07, "loss": 0.0047, "step": 5901 }, { "epoch": 347.1764705882353, "grad_norm": 0.4324650466442108, "learning_rate": 9.008043096013086e-07, "loss": 0.0059, "step": 5902 }, { "epoch": 347.2352941176471, "grad_norm": 0.3126361072063446, "learning_rate": 8.988297737072893e-07, "loss": 0.0044, "step": 5903 }, { "epoch": 347.29411764705884, "grad_norm": 0.13300906121730804, "learning_rate": 8.968573024121185e-07, "loss": 0.0015, "step": 5904 }, { "epoch": 347.3529411764706, "grad_norm": 0.1995469331741333, "learning_rate": 8.948868961632507e-07, "loss": 0.0033, "step": 5905 }, { "epoch": 347.4117647058824, "grad_norm": 0.16431158781051636, "learning_rate": 8.929185554076702e-07, "loss": 0.0034, "step": 5906 }, { "epoch": 347.47058823529414, "grad_norm": 0.5629825592041016, "learning_rate": 8.909522805918969e-07, "loss": 0.0034, "step": 5907 }, { "epoch": 347.52941176470586, "grad_norm": 0.1389555037021637, "learning_rate": 8.889880721619793e-07, "loss": 0.0022, "step": 5908 }, { "epoch": 347.5882352941176, "grad_norm": 0.6366961002349854, "learning_rate": 8.870259305634999e-07, "loss": 0.0067, "step": 5909 }, { "epoch": 347.6470588235294, "grad_norm": 0.3044132888317108, "learning_rate": 8.850658562415648e-07, "loss": 0.004, "step": 5910 }, { "epoch": 347.70588235294116, "grad_norm": 0.43926915526390076, "learning_rate": 8.831078496408207e-07, "loss": 0.003, "step": 5911 }, { "epoch": 347.7647058823529, "grad_norm": 0.28728339076042175, "learning_rate": 8.81151911205439e-07, "loss": 0.0052, "step": 5912 }, { "epoch": 347.8235294117647, "grad_norm": 0.6984697580337524, "learning_rate": 8.791980413791279e-07, "loss": 0.0063, "step": 5913 }, { "epoch": 347.88235294117646, "grad_norm": 0.157797709107399, "learning_rate": 8.772462406051174e-07, "loss": 0.0022, "step": 5914 }, { "epoch": 347.94117647058823, "grad_norm": 0.34479856491088867, "learning_rate": 8.752965093261756e-07, "loss": 0.0032, "step": 5915 }, { "epoch": 348.0, "grad_norm": 0.331734299659729, "learning_rate": 8.733488479845997e-07, "loss": 0.0038, "step": 5916 }, { "epoch": 348.05882352941177, "grad_norm": 0.15462914109230042, "learning_rate": 8.714032570222131e-07, "loss": 0.0021, "step": 5917 }, { "epoch": 348.11764705882354, "grad_norm": 0.5126804113388062, "learning_rate": 8.694597368803748e-07, "loss": 0.0068, "step": 5918 }, { "epoch": 348.1764705882353, "grad_norm": 0.18863485753536224, "learning_rate": 8.675182879999722e-07, "loss": 0.0019, "step": 5919 }, { "epoch": 348.2352941176471, "grad_norm": 0.27735036611557007, "learning_rate": 8.655789108214241e-07, "loss": 0.0024, "step": 5920 }, { "epoch": 348.29411764705884, "grad_norm": 0.3560091555118561, "learning_rate": 8.636416057846742e-07, "loss": 0.0069, "step": 5921 }, { "epoch": 348.3529411764706, "grad_norm": 0.23531709611415863, "learning_rate": 8.61706373329202e-07, "loss": 0.0041, "step": 5922 }, { "epoch": 348.4117647058824, "grad_norm": 0.18348123133182526, "learning_rate": 8.597732138940129e-07, "loss": 0.004, "step": 5923 }, { "epoch": 348.47058823529414, "grad_norm": 0.4171428084373474, "learning_rate": 8.578421279176485e-07, "loss": 0.0039, "step": 5924 }, { "epoch": 348.52941176470586, "grad_norm": 0.2411983609199524, "learning_rate": 8.559131158381695e-07, "loss": 0.003, "step": 5925 }, { "epoch": 348.5882352941176, "grad_norm": 0.34320205450057983, "learning_rate": 8.539861780931746e-07, "loss": 0.0034, "step": 5926 }, { "epoch": 348.6470588235294, "grad_norm": 0.4157249331474304, "learning_rate": 8.520613151197899e-07, "loss": 0.006, "step": 5927 }, { "epoch": 348.70588235294116, "grad_norm": 0.30883604288101196, "learning_rate": 8.501385273546703e-07, "loss": 0.0048, "step": 5928 }, { "epoch": 348.7647058823529, "grad_norm": 0.31299009919166565, "learning_rate": 8.482178152339982e-07, "loss": 0.002, "step": 5929 }, { "epoch": 348.8235294117647, "grad_norm": 0.4476401209831238, "learning_rate": 8.462991791934861e-07, "loss": 0.0039, "step": 5930 }, { "epoch": 348.88235294117646, "grad_norm": 0.10950156301259995, "learning_rate": 8.443826196683802e-07, "loss": 0.0018, "step": 5931 }, { "epoch": 348.94117647058823, "grad_norm": 0.2070738524198532, "learning_rate": 8.424681370934484e-07, "loss": 0.0025, "step": 5932 }, { "epoch": 349.0, "grad_norm": 0.26367154717445374, "learning_rate": 8.405557319029911e-07, "loss": 0.0024, "step": 5933 }, { "epoch": 349.05882352941177, "grad_norm": 0.2412245273590088, "learning_rate": 8.386454045308368e-07, "loss": 0.0044, "step": 5934 }, { "epoch": 349.11764705882354, "grad_norm": 0.34748637676239014, "learning_rate": 8.367371554103465e-07, "loss": 0.0054, "step": 5935 }, { "epoch": 349.1764705882353, "grad_norm": 0.13476242125034332, "learning_rate": 8.348309849744007e-07, "loss": 0.0016, "step": 5936 }, { "epoch": 349.2352941176471, "grad_norm": 0.43234843015670776, "learning_rate": 8.329268936554169e-07, "loss": 0.0029, "step": 5937 }, { "epoch": 349.29411764705884, "grad_norm": 0.3409227132797241, "learning_rate": 8.310248818853383e-07, "loss": 0.0023, "step": 5938 }, { "epoch": 349.3529411764706, "grad_norm": 0.282002717256546, "learning_rate": 8.291249500956355e-07, "loss": 0.0044, "step": 5939 }, { "epoch": 349.4117647058824, "grad_norm": 0.5949348211288452, "learning_rate": 8.272270987173059e-07, "loss": 0.0058, "step": 5940 }, { "epoch": 349.47058823529414, "grad_norm": 0.4174041152000427, "learning_rate": 8.253313281808784e-07, "loss": 0.005, "step": 5941 }, { "epoch": 349.52941176470586, "grad_norm": 0.20408901572227478, "learning_rate": 8.234376389164078e-07, "loss": 0.0034, "step": 5942 }, { "epoch": 349.5882352941176, "grad_norm": 0.27804258465766907, "learning_rate": 8.215460313534796e-07, "loss": 0.0051, "step": 5943 }, { "epoch": 349.6470588235294, "grad_norm": 0.1273326575756073, "learning_rate": 8.196565059212003e-07, "loss": 0.0024, "step": 5944 }, { "epoch": 349.70588235294116, "grad_norm": 0.4179855287075043, "learning_rate": 8.177690630482105e-07, "loss": 0.0065, "step": 5945 }, { "epoch": 349.7647058823529, "grad_norm": 0.2232399731874466, "learning_rate": 8.158837031626776e-07, "loss": 0.0024, "step": 5946 }, { "epoch": 349.8235294117647, "grad_norm": 0.22622184455394745, "learning_rate": 8.14000426692293e-07, "loss": 0.0026, "step": 5947 }, { "epoch": 349.88235294117646, "grad_norm": 1.281346082687378, "learning_rate": 8.121192340642769e-07, "loss": 0.0032, "step": 5948 }, { "epoch": 349.94117647058823, "grad_norm": 0.2796913981437683, "learning_rate": 8.102401257053805e-07, "loss": 0.004, "step": 5949 }, { "epoch": 350.0, "grad_norm": 0.2451666295528412, "learning_rate": 8.083631020418792e-07, "loss": 0.0045, "step": 5950 }, { "epoch": 350.05882352941177, "grad_norm": 0.24375467002391815, "learning_rate": 8.064881634995714e-07, "loss": 0.0052, "step": 5951 }, { "epoch": 350.11764705882354, "grad_norm": 0.18360517919063568, "learning_rate": 8.046153105037902e-07, "loss": 0.0026, "step": 5952 }, { "epoch": 350.1764705882353, "grad_norm": 0.16685298085212708, "learning_rate": 8.027445434793901e-07, "loss": 0.002, "step": 5953 }, { "epoch": 350.2352941176471, "grad_norm": 0.24061319231987, "learning_rate": 8.008758628507574e-07, "loss": 0.0039, "step": 5954 }, { "epoch": 350.29411764705884, "grad_norm": 0.15882501006126404, "learning_rate": 7.990092690417983e-07, "loss": 0.002, "step": 5955 }, { "epoch": 350.3529411764706, "grad_norm": 0.21433740854263306, "learning_rate": 7.971447624759498e-07, "loss": 0.0022, "step": 5956 }, { "epoch": 350.4117647058824, "grad_norm": 0.5373263359069824, "learning_rate": 7.952823435761792e-07, "loss": 0.0027, "step": 5957 }, { "epoch": 350.47058823529414, "grad_norm": 0.1878013014793396, "learning_rate": 7.934220127649694e-07, "loss": 0.0038, "step": 5958 }, { "epoch": 350.52941176470586, "grad_norm": 0.20951394736766815, "learning_rate": 7.915637704643386e-07, "loss": 0.0039, "step": 5959 }, { "epoch": 350.5882352941176, "grad_norm": 0.4808705151081085, "learning_rate": 7.897076170958307e-07, "loss": 0.0074, "step": 5960 }, { "epoch": 350.6470588235294, "grad_norm": 1.107116460800171, "learning_rate": 7.878535530805131e-07, "loss": 0.0052, "step": 5961 }, { "epoch": 350.70588235294116, "grad_norm": 0.2685695290565491, "learning_rate": 7.860015788389785e-07, "loss": 0.0026, "step": 5962 }, { "epoch": 350.7647058823529, "grad_norm": 0.19936303794384003, "learning_rate": 7.841516947913463e-07, "loss": 0.0035, "step": 5963 }, { "epoch": 350.8235294117647, "grad_norm": 0.3561164438724518, "learning_rate": 7.823039013572642e-07, "loss": 0.0057, "step": 5964 }, { "epoch": 350.88235294117646, "grad_norm": 0.3108116686344147, "learning_rate": 7.804581989559057e-07, "loss": 0.0034, "step": 5965 }, { "epoch": 350.94117647058823, "grad_norm": 0.22950394451618195, "learning_rate": 7.786145880059626e-07, "loss": 0.0032, "step": 5966 }, { "epoch": 351.0, "grad_norm": 0.37077581882476807, "learning_rate": 7.767730689256614e-07, "loss": 0.0041, "step": 5967 }, { "epoch": 351.05882352941177, "grad_norm": 0.29961809515953064, "learning_rate": 7.749336421327502e-07, "loss": 0.0023, "step": 5968 }, { "epoch": 351.11764705882354, "grad_norm": 0.3156924843788147, "learning_rate": 7.730963080445031e-07, "loss": 0.0026, "step": 5969 }, { "epoch": 351.1764705882353, "grad_norm": 0.1557232141494751, "learning_rate": 7.712610670777165e-07, "loss": 0.0027, "step": 5970 }, { "epoch": 351.2352941176471, "grad_norm": 0.36719346046447754, "learning_rate": 7.694279196487164e-07, "loss": 0.0054, "step": 5971 }, { "epoch": 351.29411764705884, "grad_norm": 0.223433256149292, "learning_rate": 7.675968661733535e-07, "loss": 0.0053, "step": 5972 }, { "epoch": 351.3529411764706, "grad_norm": 0.19602084159851074, "learning_rate": 7.657679070669988e-07, "loss": 0.0044, "step": 5973 }, { "epoch": 351.4117647058824, "grad_norm": 0.24389824271202087, "learning_rate": 7.639410427445515e-07, "loss": 0.0032, "step": 5974 }, { "epoch": 351.47058823529414, "grad_norm": 0.330963671207428, "learning_rate": 7.621162736204368e-07, "loss": 0.0034, "step": 5975 }, { "epoch": 351.52941176470586, "grad_norm": 0.43356379866600037, "learning_rate": 7.602936001086058e-07, "loss": 0.0049, "step": 5976 }, { "epoch": 351.5882352941176, "grad_norm": 0.8599340915679932, "learning_rate": 7.584730226225257e-07, "loss": 0.0051, "step": 5977 }, { "epoch": 351.6470588235294, "grad_norm": 0.8155368566513062, "learning_rate": 7.56654541575198e-07, "loss": 0.0066, "step": 5978 }, { "epoch": 351.70588235294116, "grad_norm": 0.3521867096424103, "learning_rate": 7.548381573791441e-07, "loss": 0.0041, "step": 5979 }, { "epoch": 351.7647058823529, "grad_norm": 0.19630637764930725, "learning_rate": 7.530238704464133e-07, "loss": 0.0031, "step": 5980 }, { "epoch": 351.8235294117647, "grad_norm": 0.39792293310165405, "learning_rate": 7.512116811885705e-07, "loss": 0.0032, "step": 5981 }, { "epoch": 351.88235294117646, "grad_norm": 0.30558428168296814, "learning_rate": 7.49401590016714e-07, "loss": 0.0039, "step": 5982 }, { "epoch": 351.94117647058823, "grad_norm": 0.22409534454345703, "learning_rate": 7.475935973414639e-07, "loss": 0.0019, "step": 5983 }, { "epoch": 352.0, "grad_norm": 0.8587005734443665, "learning_rate": 7.457877035729588e-07, "loss": 0.0023, "step": 5984 }, { "epoch": 352.05882352941177, "grad_norm": 0.43021467328071594, "learning_rate": 7.439839091208689e-07, "loss": 0.0056, "step": 5985 }, { "epoch": 352.11764705882354, "grad_norm": 0.33457526564598083, "learning_rate": 7.421822143943824e-07, "loss": 0.006, "step": 5986 }, { "epoch": 352.1764705882353, "grad_norm": 0.3396676182746887, "learning_rate": 7.403826198022179e-07, "loss": 0.0045, "step": 5987 }, { "epoch": 352.2352941176471, "grad_norm": 0.37106820940971375, "learning_rate": 7.385851257526066e-07, "loss": 0.0036, "step": 5988 }, { "epoch": 352.29411764705884, "grad_norm": 0.12385591119527817, "learning_rate": 7.367897326533136e-07, "loss": 0.0026, "step": 5989 }, { "epoch": 352.3529411764706, "grad_norm": 0.157868430018425, "learning_rate": 7.349964409116239e-07, "loss": 0.0016, "step": 5990 }, { "epoch": 352.4117647058824, "grad_norm": 0.26565784215927124, "learning_rate": 7.332052509343446e-07, "loss": 0.0034, "step": 5991 }, { "epoch": 352.47058823529414, "grad_norm": 0.3626171350479126, "learning_rate": 7.31416163127805e-07, "loss": 0.0026, "step": 5992 }, { "epoch": 352.52941176470586, "grad_norm": 0.2577395737171173, "learning_rate": 7.296291778978615e-07, "loss": 0.0036, "step": 5993 }, { "epoch": 352.5882352941176, "grad_norm": 0.2759290039539337, "learning_rate": 7.27844295649891e-07, "loss": 0.0055, "step": 5994 }, { "epoch": 352.6470588235294, "grad_norm": 0.41845616698265076, "learning_rate": 7.260615167887952e-07, "loss": 0.0055, "step": 5995 }, { "epoch": 352.70588235294116, "grad_norm": 0.24565882980823517, "learning_rate": 7.242808417189929e-07, "loss": 0.0034, "step": 5996 }, { "epoch": 352.7647058823529, "grad_norm": 0.21831540763378143, "learning_rate": 7.225022708444329e-07, "loss": 0.0026, "step": 5997 }, { "epoch": 352.8235294117647, "grad_norm": 0.2761405408382416, "learning_rate": 7.207258045685861e-07, "loss": 0.0044, "step": 5998 }, { "epoch": 352.88235294117646, "grad_norm": 0.16170461475849152, "learning_rate": 7.189514432944378e-07, "loss": 0.0018, "step": 5999 }, { "epoch": 352.94117647058823, "grad_norm": 0.3586805462837219, "learning_rate": 7.171791874245048e-07, "loss": 0.0022, "step": 6000 }, { "epoch": 353.0, "grad_norm": 0.42351773381233215, "learning_rate": 7.154090373608236e-07, "loss": 0.0052, "step": 6001 }, { "epoch": 353.05882352941177, "grad_norm": 0.3057617247104645, "learning_rate": 7.136409935049526e-07, "loss": 0.0034, "step": 6002 }, { "epoch": 353.11764705882354, "grad_norm": 0.5712677240371704, "learning_rate": 7.11875056257969e-07, "loss": 0.0025, "step": 6003 }, { "epoch": 353.1764705882353, "grad_norm": 0.3376525342464447, "learning_rate": 7.101112260204779e-07, "loss": 0.0059, "step": 6004 }, { "epoch": 353.2352941176471, "grad_norm": 0.28256842494010925, "learning_rate": 7.083495031926047e-07, "loss": 0.0054, "step": 6005 }, { "epoch": 353.29411764705884, "grad_norm": 0.29741188883781433, "learning_rate": 7.065898881739952e-07, "loss": 0.0059, "step": 6006 }, { "epoch": 353.3529411764706, "grad_norm": 0.22048871219158173, "learning_rate": 7.048323813638158e-07, "loss": 0.002, "step": 6007 }, { "epoch": 353.4117647058824, "grad_norm": 0.4865872859954834, "learning_rate": 7.030769831607576e-07, "loss": 0.0021, "step": 6008 }, { "epoch": 353.47058823529414, "grad_norm": 0.2922828197479248, "learning_rate": 7.013236939630352e-07, "loss": 0.0037, "step": 6009 }, { "epoch": 353.52941176470586, "grad_norm": 0.853110671043396, "learning_rate": 6.995725141683785e-07, "loss": 0.0079, "step": 6010 }, { "epoch": 353.5882352941176, "grad_norm": 0.2336549162864685, "learning_rate": 6.978234441740428e-07, "loss": 0.0041, "step": 6011 }, { "epoch": 353.6470588235294, "grad_norm": 0.3366740345954895, "learning_rate": 6.960764843768053e-07, "loss": 0.0026, "step": 6012 }, { "epoch": 353.70588235294116, "grad_norm": 0.3000068962574005, "learning_rate": 6.943316351729645e-07, "loss": 0.0027, "step": 6013 }, { "epoch": 353.7647058823529, "grad_norm": 0.18586091697216034, "learning_rate": 6.92588896958336e-07, "loss": 0.003, "step": 6014 }, { "epoch": 353.8235294117647, "grad_norm": 0.6071428060531616, "learning_rate": 6.908482701282615e-07, "loss": 0.0055, "step": 6015 }, { "epoch": 353.88235294117646, "grad_norm": 0.15500137209892273, "learning_rate": 6.891097550776027e-07, "loss": 0.0022, "step": 6016 }, { "epoch": 353.94117647058823, "grad_norm": 0.3057919442653656, "learning_rate": 6.873733522007409e-07, "loss": 0.0029, "step": 6017 }, { "epoch": 354.0, "grad_norm": 0.3241414725780487, "learning_rate": 6.856390618915775e-07, "loss": 0.0049, "step": 6018 }, { "epoch": 354.05882352941177, "grad_norm": 0.21616524457931519, "learning_rate": 6.83906884543537e-07, "loss": 0.0039, "step": 6019 }, { "epoch": 354.11764705882354, "grad_norm": 0.4469786584377289, "learning_rate": 6.821768205495638e-07, "loss": 0.0041, "step": 6020 }, { "epoch": 354.1764705882353, "grad_norm": 0.339287132024765, "learning_rate": 6.80448870302124e-07, "loss": 0.0028, "step": 6021 }, { "epoch": 354.2352941176471, "grad_norm": 2.6041765213012695, "learning_rate": 6.787230341931983e-07, "loss": 0.0059, "step": 6022 }, { "epoch": 354.29411764705884, "grad_norm": 0.22552116215229034, "learning_rate": 6.76999312614296e-07, "loss": 0.004, "step": 6023 }, { "epoch": 354.3529411764706, "grad_norm": 0.29934269189834595, "learning_rate": 6.752777059564431e-07, "loss": 0.0039, "step": 6024 }, { "epoch": 354.4117647058824, "grad_norm": 0.3057837188243866, "learning_rate": 6.735582146101838e-07, "loss": 0.0042, "step": 6025 }, { "epoch": 354.47058823529414, "grad_norm": 0.2760726809501648, "learning_rate": 6.71840838965585e-07, "loss": 0.0034, "step": 6026 }, { "epoch": 354.52941176470586, "grad_norm": 0.19606834650039673, "learning_rate": 6.701255794122341e-07, "loss": 0.0022, "step": 6027 }, { "epoch": 354.5882352941176, "grad_norm": 0.15902990102767944, "learning_rate": 6.684124363392375e-07, "loss": 0.0018, "step": 6028 }, { "epoch": 354.6470588235294, "grad_norm": 0.16742660105228424, "learning_rate": 6.667014101352199e-07, "loss": 0.0029, "step": 6029 }, { "epoch": 354.70588235294116, "grad_norm": 0.38626405596733093, "learning_rate": 6.649925011883274e-07, "loss": 0.0046, "step": 6030 }, { "epoch": 354.7647058823529, "grad_norm": 0.16867609322071075, "learning_rate": 6.632857098862256e-07, "loss": 0.0027, "step": 6031 }, { "epoch": 354.8235294117647, "grad_norm": 0.33465659618377686, "learning_rate": 6.615810366161035e-07, "loss": 0.0033, "step": 6032 }, { "epoch": 354.88235294117646, "grad_norm": 0.15905605256557465, "learning_rate": 6.598784817646609e-07, "loss": 0.0018, "step": 6033 }, { "epoch": 354.94117647058823, "grad_norm": 0.3576519191265106, "learning_rate": 6.581780457181242e-07, "loss": 0.0089, "step": 6034 }, { "epoch": 355.0, "grad_norm": 0.11893519014120102, "learning_rate": 6.564797288622371e-07, "loss": 0.0016, "step": 6035 }, { "epoch": 355.05882352941177, "grad_norm": 0.22980119287967682, "learning_rate": 6.547835315822637e-07, "loss": 0.0038, "step": 6036 }, { "epoch": 355.11764705882354, "grad_norm": 0.25627633929252625, "learning_rate": 6.530894542629829e-07, "loss": 0.006, "step": 6037 }, { "epoch": 355.1764705882353, "grad_norm": 0.24886098504066467, "learning_rate": 6.513974972886983e-07, "loss": 0.0031, "step": 6038 }, { "epoch": 355.2352941176471, "grad_norm": 0.2891530990600586, "learning_rate": 6.497076610432295e-07, "loss": 0.003, "step": 6039 }, { "epoch": 355.29411764705884, "grad_norm": 0.1687527596950531, "learning_rate": 6.480199459099157e-07, "loss": 0.0025, "step": 6040 }, { "epoch": 355.3529411764706, "grad_norm": 0.27532559633255005, "learning_rate": 6.463343522716137e-07, "loss": 0.0042, "step": 6041 }, { "epoch": 355.4117647058824, "grad_norm": 4.718780040740967, "learning_rate": 6.44650880510701e-07, "loss": 0.0094, "step": 6042 }, { "epoch": 355.47058823529414, "grad_norm": 0.17112691700458527, "learning_rate": 6.429695310090733e-07, "loss": 0.0026, "step": 6043 }, { "epoch": 355.52941176470586, "grad_norm": 0.35442182421684265, "learning_rate": 6.412903041481433e-07, "loss": 0.0043, "step": 6044 }, { "epoch": 355.5882352941176, "grad_norm": 0.13690999150276184, "learning_rate": 6.39613200308844e-07, "loss": 0.0022, "step": 6045 }, { "epoch": 355.6470588235294, "grad_norm": 0.19322489202022552, "learning_rate": 6.379382198716267e-07, "loss": 0.0026, "step": 6046 }, { "epoch": 355.70588235294116, "grad_norm": 0.5123170018196106, "learning_rate": 6.362653632164595e-07, "loss": 0.0038, "step": 6047 }, { "epoch": 355.7647058823529, "grad_norm": 0.33894556760787964, "learning_rate": 6.345946307228301e-07, "loss": 0.0043, "step": 6048 }, { "epoch": 355.8235294117647, "grad_norm": 0.12090180814266205, "learning_rate": 6.329260227697432e-07, "loss": 0.0022, "step": 6049 }, { "epoch": 355.88235294117646, "grad_norm": 0.343650221824646, "learning_rate": 6.312595397357235e-07, "loss": 0.0035, "step": 6050 }, { "epoch": 355.94117647058823, "grad_norm": 0.24473991990089417, "learning_rate": 6.2959518199881e-07, "loss": 0.0039, "step": 6051 }, { "epoch": 356.0, "grad_norm": 0.18794484436511993, "learning_rate": 6.279329499365649e-07, "loss": 0.0032, "step": 6052 }, { "epoch": 356.05882352941177, "grad_norm": 0.35735154151916504, "learning_rate": 6.262728439260624e-07, "loss": 0.0041, "step": 6053 }, { "epoch": 356.11764705882354, "grad_norm": 0.3478688895702362, "learning_rate": 6.246148643439009e-07, "loss": 0.0038, "step": 6054 }, { "epoch": 356.1764705882353, "grad_norm": 0.40130093693733215, "learning_rate": 6.229590115661888e-07, "loss": 0.0041, "step": 6055 }, { "epoch": 356.2352941176471, "grad_norm": 0.28409504890441895, "learning_rate": 6.213052859685575e-07, "loss": 0.0025, "step": 6056 }, { "epoch": 356.29411764705884, "grad_norm": 0.33010226488113403, "learning_rate": 6.196536879261561e-07, "loss": 0.005, "step": 6057 }, { "epoch": 356.3529411764706, "grad_norm": 0.3340829908847809, "learning_rate": 6.180042178136481e-07, "loss": 0.0038, "step": 6058 }, { "epoch": 356.4117647058824, "grad_norm": 0.2312031388282776, "learning_rate": 6.163568760052141e-07, "loss": 0.0035, "step": 6059 }, { "epoch": 356.47058823529414, "grad_norm": 0.12720784544944763, "learning_rate": 6.147116628745542e-07, "loss": 0.0021, "step": 6060 }, { "epoch": 356.52941176470586, "grad_norm": 0.2304617017507553, "learning_rate": 6.130685787948853e-07, "loss": 0.0034, "step": 6061 }, { "epoch": 356.5882352941176, "grad_norm": 0.27204394340515137, "learning_rate": 6.114276241389416e-07, "loss": 0.004, "step": 6062 }, { "epoch": 356.6470588235294, "grad_norm": 0.4328444302082062, "learning_rate": 6.097887992789708e-07, "loss": 0.0042, "step": 6063 }, { "epoch": 356.70588235294116, "grad_norm": 0.6803945899009705, "learning_rate": 6.081521045867422e-07, "loss": 0.0055, "step": 6064 }, { "epoch": 356.7647058823529, "grad_norm": 0.15773531794548035, "learning_rate": 6.065175404335388e-07, "loss": 0.003, "step": 6065 }, { "epoch": 356.8235294117647, "grad_norm": 0.60191410779953, "learning_rate": 6.048851071901607e-07, "loss": 0.0084, "step": 6066 }, { "epoch": 356.88235294117646, "grad_norm": 0.09236280620098114, "learning_rate": 6.03254805226926e-07, "loss": 0.0015, "step": 6067 }, { "epoch": 356.94117647058823, "grad_norm": 0.24075686931610107, "learning_rate": 6.016266349136679e-07, "loss": 0.0029, "step": 6068 }, { "epoch": 357.0, "grad_norm": 0.17053276300430298, "learning_rate": 6.000005966197387e-07, "loss": 0.0023, "step": 6069 }, { "epoch": 357.05882352941177, "grad_norm": 0.3926534950733185, "learning_rate": 5.983766907140009e-07, "loss": 0.0027, "step": 6070 }, { "epoch": 357.11764705882354, "grad_norm": 0.4060342013835907, "learning_rate": 5.96754917564839e-07, "loss": 0.0056, "step": 6071 }, { "epoch": 357.1764705882353, "grad_norm": 0.2823713421821594, "learning_rate": 5.951352775401519e-07, "loss": 0.0058, "step": 6072 }, { "epoch": 357.2352941176471, "grad_norm": 0.1371864229440689, "learning_rate": 5.935177710073581e-07, "loss": 0.0018, "step": 6073 }, { "epoch": 357.29411764705884, "grad_norm": 0.20967556536197662, "learning_rate": 5.919023983333827e-07, "loss": 0.0024, "step": 6074 }, { "epoch": 357.3529411764706, "grad_norm": 0.19916166365146637, "learning_rate": 5.902891598846761e-07, "loss": 0.0022, "step": 6075 }, { "epoch": 357.4117647058824, "grad_norm": 0.3333573341369629, "learning_rate": 5.88678056027202e-07, "loss": 0.0046, "step": 6076 }, { "epoch": 357.47058823529414, "grad_norm": 0.18270348012447357, "learning_rate": 5.870690871264361e-07, "loss": 0.0029, "step": 6077 }, { "epoch": 357.52941176470586, "grad_norm": 1.0303213596343994, "learning_rate": 5.854622535473753e-07, "loss": 0.0037, "step": 6078 }, { "epoch": 357.5882352941176, "grad_norm": 0.5896573662757874, "learning_rate": 5.838575556545267e-07, "loss": 0.0053, "step": 6079 }, { "epoch": 357.6470588235294, "grad_norm": 0.2035754919052124, "learning_rate": 5.822549938119193e-07, "loss": 0.0049, "step": 6080 }, { "epoch": 357.70588235294116, "grad_norm": 0.28803184628486633, "learning_rate": 5.806545683830911e-07, "loss": 0.0024, "step": 6081 }, { "epoch": 357.7647058823529, "grad_norm": 0.477108895778656, "learning_rate": 5.790562797310994e-07, "loss": 0.0044, "step": 6082 }, { "epoch": 357.8235294117647, "grad_norm": 0.19649621844291687, "learning_rate": 5.774601282185177e-07, "loss": 0.0023, "step": 6083 }, { "epoch": 357.88235294117646, "grad_norm": 0.2442621886730194, "learning_rate": 5.758661142074285e-07, "loss": 0.0051, "step": 6084 }, { "epoch": 357.94117647058823, "grad_norm": 0.4613816738128662, "learning_rate": 5.742742380594368e-07, "loss": 0.0048, "step": 6085 }, { "epoch": 358.0, "grad_norm": 0.29254889488220215, "learning_rate": 5.726845001356573e-07, "loss": 0.0041, "step": 6086 }, { "epoch": 358.05882352941177, "grad_norm": 0.1564466655254364, "learning_rate": 5.710969007967249e-07, "loss": 0.0018, "step": 6087 }, { "epoch": 358.11764705882354, "grad_norm": 0.4026077389717102, "learning_rate": 5.695114404027824e-07, "loss": 0.0046, "step": 6088 }, { "epoch": 358.1764705882353, "grad_norm": 0.24312958121299744, "learning_rate": 5.679281193134934e-07, "loss": 0.0031, "step": 6089 }, { "epoch": 358.2352941176471, "grad_norm": 0.26056817173957825, "learning_rate": 5.663469378880338e-07, "loss": 0.0032, "step": 6090 }, { "epoch": 358.29411764705884, "grad_norm": 0.5436758399009705, "learning_rate": 5.647678964850955e-07, "loss": 0.007, "step": 6091 }, { "epoch": 358.3529411764706, "grad_norm": 0.5408241152763367, "learning_rate": 5.631909954628812e-07, "loss": 0.0052, "step": 6092 }, { "epoch": 358.4117647058824, "grad_norm": 0.8909854292869568, "learning_rate": 5.61616235179111e-07, "loss": 0.0047, "step": 6093 }, { "epoch": 358.47058823529414, "grad_norm": 1.7933934926986694, "learning_rate": 5.600436159910216e-07, "loss": 0.0047, "step": 6094 }, { "epoch": 358.52941176470586, "grad_norm": 0.21277089416980743, "learning_rate": 5.584731382553587e-07, "loss": 0.0027, "step": 6095 }, { "epoch": 358.5882352941176, "grad_norm": 0.31084442138671875, "learning_rate": 5.569048023283852e-07, "loss": 0.005, "step": 6096 }, { "epoch": 358.6470588235294, "grad_norm": 0.23325200378894806, "learning_rate": 5.553386085658785e-07, "loss": 0.0045, "step": 6097 }, { "epoch": 358.70588235294116, "grad_norm": 0.17434190213680267, "learning_rate": 5.537745573231301e-07, "loss": 0.0024, "step": 6098 }, { "epoch": 358.7647058823529, "grad_norm": 0.3264266550540924, "learning_rate": 5.52212648954944e-07, "loss": 0.0029, "step": 6099 }, { "epoch": 358.8235294117647, "grad_norm": 0.11060740053653717, "learning_rate": 5.506528838156377e-07, "loss": 0.0019, "step": 6100 }, { "epoch": 358.88235294117646, "grad_norm": 0.3121032416820526, "learning_rate": 5.49095262259045e-07, "loss": 0.0029, "step": 6101 }, { "epoch": 358.94117647058823, "grad_norm": 0.41670164465904236, "learning_rate": 5.475397846385133e-07, "loss": 0.0064, "step": 6102 }, { "epoch": 359.0, "grad_norm": 0.23465614020824432, "learning_rate": 5.459864513068991e-07, "loss": 0.0017, "step": 6103 }, { "epoch": 359.05882352941177, "grad_norm": 4.140110969543457, "learning_rate": 5.444352626165783e-07, "loss": 0.0075, "step": 6104 }, { "epoch": 359.11764705882354, "grad_norm": 0.3932206630706787, "learning_rate": 5.428862189194372e-07, "loss": 0.0031, "step": 6105 }, { "epoch": 359.1764705882353, "grad_norm": 0.2731381952762604, "learning_rate": 5.41339320566876e-07, "loss": 0.0039, "step": 6106 }, { "epoch": 359.2352941176471, "grad_norm": 0.26986414194107056, "learning_rate": 5.397945679098071e-07, "loss": 0.0069, "step": 6107 }, { "epoch": 359.29411764705884, "grad_norm": 0.17487499117851257, "learning_rate": 5.382519612986592e-07, "loss": 0.0025, "step": 6108 }, { "epoch": 359.3529411764706, "grad_norm": 0.2947409749031067, "learning_rate": 5.367115010833734e-07, "loss": 0.0031, "step": 6109 }, { "epoch": 359.4117647058824, "grad_norm": 0.4433583617210388, "learning_rate": 5.351731876133993e-07, "loss": 0.0029, "step": 6110 }, { "epoch": 359.47058823529414, "grad_norm": 0.15555283427238464, "learning_rate": 5.336370212377051e-07, "loss": 0.0021, "step": 6111 }, { "epoch": 359.52941176470586, "grad_norm": 0.6558905243873596, "learning_rate": 5.321030023047691e-07, "loss": 0.0065, "step": 6112 }, { "epoch": 359.5882352941176, "grad_norm": 0.17515890300273895, "learning_rate": 5.305711311625861e-07, "loss": 0.0026, "step": 6113 }, { "epoch": 359.6470588235294, "grad_norm": 0.2523956298828125, "learning_rate": 5.290414081586559e-07, "loss": 0.0032, "step": 6114 }, { "epoch": 359.70588235294116, "grad_norm": 0.511224091053009, "learning_rate": 5.275138336399999e-07, "loss": 0.0037, "step": 6115 }, { "epoch": 359.7647058823529, "grad_norm": 0.15400263667106628, "learning_rate": 5.259884079531452e-07, "loss": 0.0022, "step": 6116 }, { "epoch": 359.8235294117647, "grad_norm": 0.20601259171962738, "learning_rate": 5.244651314441373e-07, "loss": 0.0025, "step": 6117 }, { "epoch": 359.88235294117646, "grad_norm": 0.4194299280643463, "learning_rate": 5.229440044585288e-07, "loss": 0.0054, "step": 6118 }, { "epoch": 359.94117647058823, "grad_norm": 0.3041326701641083, "learning_rate": 5.214250273413879e-07, "loss": 0.0052, "step": 6119 }, { "epoch": 360.0, "grad_norm": 0.49752601981163025, "learning_rate": 5.199082004372958e-07, "loss": 0.0021, "step": 6120 }, { "epoch": 360.05882352941177, "grad_norm": 0.23486340045928955, "learning_rate": 5.183935240903415e-07, "loss": 0.0055, "step": 6121 }, { "epoch": 360.11764705882354, "grad_norm": 0.21027708053588867, "learning_rate": 5.168809986441303e-07, "loss": 0.0022, "step": 6122 }, { "epoch": 360.1764705882353, "grad_norm": 0.1321486234664917, "learning_rate": 5.153706244417789e-07, "loss": 0.0019, "step": 6123 }, { "epoch": 360.2352941176471, "grad_norm": 0.1750880777835846, "learning_rate": 5.138624018259164e-07, "loss": 0.0024, "step": 6124 }, { "epoch": 360.29411764705884, "grad_norm": 0.1893538385629654, "learning_rate": 5.123563311386803e-07, "loss": 0.0038, "step": 6125 }, { "epoch": 360.3529411764706, "grad_norm": 0.20396623015403748, "learning_rate": 5.108524127217241e-07, "loss": 0.0039, "step": 6126 }, { "epoch": 360.4117647058824, "grad_norm": 0.26077553629875183, "learning_rate": 5.093506469162102e-07, "loss": 0.0029, "step": 6127 }, { "epoch": 360.47058823529414, "grad_norm": 0.3938789665699005, "learning_rate": 5.078510340628163e-07, "loss": 0.0035, "step": 6128 }, { "epoch": 360.52941176470586, "grad_norm": 0.28214699029922485, "learning_rate": 5.063535745017267e-07, "loss": 0.0056, "step": 6129 }, { "epoch": 360.5882352941176, "grad_norm": 0.3891405761241913, "learning_rate": 5.0485826857264e-07, "loss": 0.0046, "step": 6130 }, { "epoch": 360.6470588235294, "grad_norm": 0.17575690150260925, "learning_rate": 5.033651166147691e-07, "loss": 0.0025, "step": 6131 }, { "epoch": 360.70588235294116, "grad_norm": 0.3435424566268921, "learning_rate": 5.018741189668308e-07, "loss": 0.004, "step": 6132 }, { "epoch": 360.7647058823529, "grad_norm": 0.1561315953731537, "learning_rate": 5.003852759670613e-07, "loss": 0.0022, "step": 6133 }, { "epoch": 360.8235294117647, "grad_norm": 0.5238484740257263, "learning_rate": 4.988985879532027e-07, "loss": 0.0029, "step": 6134 }, { "epoch": 360.88235294117646, "grad_norm": 0.31924968957901, "learning_rate": 4.974140552625106e-07, "loss": 0.0052, "step": 6135 }, { "epoch": 360.94117647058823, "grad_norm": 0.24392487108707428, "learning_rate": 4.959316782317502e-07, "loss": 0.0031, "step": 6136 }, { "epoch": 361.0, "grad_norm": 0.1787535697221756, "learning_rate": 4.944514571971981e-07, "loss": 0.0023, "step": 6137 }, { "epoch": 361.05882352941177, "grad_norm": 0.14696262776851654, "learning_rate": 4.929733924946433e-07, "loss": 0.0021, "step": 6138 }, { "epoch": 361.11764705882354, "grad_norm": 0.10736668854951859, "learning_rate": 4.914974844593856e-07, "loss": 0.0018, "step": 6139 }, { "epoch": 361.1764705882353, "grad_norm": 0.8927687406539917, "learning_rate": 4.900237334262326e-07, "loss": 0.0072, "step": 6140 }, { "epoch": 361.2352941176471, "grad_norm": 0.3147423267364502, "learning_rate": 4.885521397295034e-07, "loss": 0.0036, "step": 6141 }, { "epoch": 361.29411764705884, "grad_norm": 0.1596369594335556, "learning_rate": 4.870827037030312e-07, "loss": 0.0022, "step": 6142 }, { "epoch": 361.3529411764706, "grad_norm": 0.34658685326576233, "learning_rate": 4.856154256801571e-07, "loss": 0.0056, "step": 6143 }, { "epoch": 361.4117647058824, "grad_norm": 0.2339644879102707, "learning_rate": 4.841503059937313e-07, "loss": 0.0038, "step": 6144 }, { "epoch": 361.47058823529414, "grad_norm": 0.11691217124462128, "learning_rate": 4.826873449761172e-07, "loss": 0.0021, "step": 6145 }, { "epoch": 361.52941176470586, "grad_norm": 0.1964091807603836, "learning_rate": 4.812265429591878e-07, "loss": 0.0027, "step": 6146 }, { "epoch": 361.5882352941176, "grad_norm": 0.24537642300128937, "learning_rate": 4.797679002743227e-07, "loss": 0.0059, "step": 6147 }, { "epoch": 361.6470588235294, "grad_norm": 0.2760416567325592, "learning_rate": 4.783114172524173e-07, "loss": 0.002, "step": 6148 }, { "epoch": 361.70588235294116, "grad_norm": 0.39162689447402954, "learning_rate": 4.768570942238749e-07, "loss": 0.0033, "step": 6149 }, { "epoch": 361.7647058823529, "grad_norm": 0.26636260747909546, "learning_rate": 4.754049315186071e-07, "loss": 0.0042, "step": 6150 }, { "epoch": 361.8235294117647, "grad_norm": 0.23400026559829712, "learning_rate": 4.739549294660373e-07, "loss": 0.0024, "step": 6151 }, { "epoch": 361.88235294117646, "grad_norm": 0.23828215897083282, "learning_rate": 4.7250708839509774e-07, "loss": 0.0058, "step": 6152 }, { "epoch": 361.94117647058823, "grad_norm": 0.18147727847099304, "learning_rate": 4.7106140863423133e-07, "loss": 0.0039, "step": 6153 }, { "epoch": 362.0, "grad_norm": 0.2648840844631195, "learning_rate": 4.696178905113913e-07, "loss": 0.0032, "step": 6154 }, { "epoch": 362.05882352941177, "grad_norm": 0.19908085465431213, "learning_rate": 4.681765343540379e-07, "loss": 0.0022, "step": 6155 }, { "epoch": 362.11764705882354, "grad_norm": 0.2788218557834625, "learning_rate": 4.667373404891429e-07, "loss": 0.0029, "step": 6156 }, { "epoch": 362.1764705882353, "grad_norm": 1.3878428936004639, "learning_rate": 4.6530030924318737e-07, "loss": 0.0057, "step": 6157 }, { "epoch": 362.2352941176471, "grad_norm": 0.4810515344142914, "learning_rate": 4.638654409421617e-07, "loss": 0.0055, "step": 6158 }, { "epoch": 362.29411764705884, "grad_norm": 0.26726529002189636, "learning_rate": 4.624327359115643e-07, "loss": 0.0049, "step": 6159 }, { "epoch": 362.3529411764706, "grad_norm": 0.2583976984024048, "learning_rate": 4.610021944764054e-07, "loss": 0.0027, "step": 6160 }, { "epoch": 362.4117647058824, "grad_norm": 0.5669659376144409, "learning_rate": 4.5957381696120405e-07, "loss": 0.0038, "step": 6161 }, { "epoch": 362.47058823529414, "grad_norm": 0.25623956322669983, "learning_rate": 4.5814760368998457e-07, "loss": 0.003, "step": 6162 }, { "epoch": 362.52941176470586, "grad_norm": 0.2388668954372406, "learning_rate": 4.5672355498628586e-07, "loss": 0.0043, "step": 6163 }, { "epoch": 362.5882352941176, "grad_norm": 0.15776097774505615, "learning_rate": 4.553016711731506e-07, "loss": 0.0022, "step": 6164 }, { "epoch": 362.6470588235294, "grad_norm": 0.39912575483322144, "learning_rate": 4.538819525731364e-07, "loss": 0.0034, "step": 6165 }, { "epoch": 362.70588235294116, "grad_norm": 0.24581065773963928, "learning_rate": 4.5246439950830335e-07, "loss": 0.0035, "step": 6166 }, { "epoch": 362.7647058823529, "grad_norm": 0.21996934711933136, "learning_rate": 4.510490123002231e-07, "loss": 0.0027, "step": 6167 }, { "epoch": 362.8235294117647, "grad_norm": 0.29947584867477417, "learning_rate": 4.496357912699767e-07, "loss": 0.005, "step": 6168 }, { "epoch": 362.88235294117646, "grad_norm": 0.44894516468048096, "learning_rate": 4.482247367381543e-07, "loss": 0.0063, "step": 6169 }, { "epoch": 362.94117647058823, "grad_norm": 0.22519733011722565, "learning_rate": 4.468158490248509e-07, "loss": 0.0029, "step": 6170 }, { "epoch": 363.0, "grad_norm": 0.13015593588352203, "learning_rate": 4.454091284496731e-07, "loss": 0.0021, "step": 6171 }, { "epoch": 363.05882352941177, "grad_norm": 0.1341591775417328, "learning_rate": 4.440045753317379e-07, "loss": 0.002, "step": 6172 }, { "epoch": 363.11764705882354, "grad_norm": 0.15499474108219147, "learning_rate": 4.426021899896649e-07, "loss": 0.0019, "step": 6173 }, { "epoch": 363.1764705882353, "grad_norm": 0.5796432495117188, "learning_rate": 4.4120197274158503e-07, "loss": 0.0037, "step": 6174 }, { "epoch": 363.2352941176471, "grad_norm": 0.8865479826927185, "learning_rate": 4.398039239051388e-07, "loss": 0.006, "step": 6175 }, { "epoch": 363.29411764705884, "grad_norm": 0.3178896903991699, "learning_rate": 4.384080437974736e-07, "loss": 0.0052, "step": 6176 }, { "epoch": 363.3529411764706, "grad_norm": 0.3583469092845917, "learning_rate": 4.370143327352427e-07, "loss": 0.0051, "step": 6177 }, { "epoch": 363.4117647058824, "grad_norm": 0.5847170948982239, "learning_rate": 4.3562279103461113e-07, "loss": 0.0043, "step": 6178 }, { "epoch": 363.47058823529414, "grad_norm": 0.24762459099292755, "learning_rate": 4.342334190112474e-07, "loss": 0.0024, "step": 6179 }, { "epoch": 363.52941176470586, "grad_norm": 0.32461243867874146, "learning_rate": 4.328462169803338e-07, "loss": 0.0066, "step": 6180 }, { "epoch": 363.5882352941176, "grad_norm": 0.241053968667984, "learning_rate": 4.3146118525655425e-07, "loss": 0.0021, "step": 6181 }, { "epoch": 363.6470588235294, "grad_norm": 0.2194766253232956, "learning_rate": 4.300783241541029e-07, "loss": 0.0024, "step": 6182 }, { "epoch": 363.70588235294116, "grad_norm": 0.31481876969337463, "learning_rate": 4.2869763398668217e-07, "loss": 0.0045, "step": 6183 }, { "epoch": 363.7647058823529, "grad_norm": 0.2110607922077179, "learning_rate": 4.2731911506750377e-07, "loss": 0.0043, "step": 6184 }, { "epoch": 363.8235294117647, "grad_norm": 0.14553458988666534, "learning_rate": 4.2594276770927976e-07, "loss": 0.0021, "step": 6185 }, { "epoch": 363.88235294117646, "grad_norm": 0.1824266016483307, "learning_rate": 4.2456859222423707e-07, "loss": 0.0033, "step": 6186 }, { "epoch": 363.94117647058823, "grad_norm": 0.2796285152435303, "learning_rate": 4.2319658892410854e-07, "loss": 0.0046, "step": 6187 }, { "epoch": 364.0, "grad_norm": 0.1120852530002594, "learning_rate": 4.218267581201296e-07, "loss": 0.0016, "step": 6188 }, { "epoch": 364.05882352941177, "grad_norm": 0.12120115011930466, "learning_rate": 4.204591001230474e-07, "loss": 0.0019, "step": 6189 }, { "epoch": 364.11764705882354, "grad_norm": 0.299081951379776, "learning_rate": 4.190936152431158e-07, "loss": 0.0037, "step": 6190 }, { "epoch": 364.1764705882353, "grad_norm": 0.36301389336586, "learning_rate": 4.177303037900948e-07, "loss": 0.0066, "step": 6191 }, { "epoch": 364.2352941176471, "grad_norm": 0.40694427490234375, "learning_rate": 4.163691660732505e-07, "loss": 0.0064, "step": 6192 }, { "epoch": 364.29411764705884, "grad_norm": 0.28017309308052063, "learning_rate": 4.150102024013558e-07, "loss": 0.0036, "step": 6193 }, { "epoch": 364.3529411764706, "grad_norm": 0.2977322041988373, "learning_rate": 4.13653413082693e-07, "loss": 0.0029, "step": 6194 }, { "epoch": 364.4117647058824, "grad_norm": 0.16949927806854248, "learning_rate": 4.122987984250515e-07, "loss": 0.0031, "step": 6195 }, { "epoch": 364.47058823529414, "grad_norm": 0.39734095335006714, "learning_rate": 4.109463587357199e-07, "loss": 0.0052, "step": 6196 }, { "epoch": 364.52941176470586, "grad_norm": 0.2117907851934433, "learning_rate": 4.0959609432150383e-07, "loss": 0.0036, "step": 6197 }, { "epoch": 364.5882352941176, "grad_norm": 0.2651618421077728, "learning_rate": 4.0824800548870835e-07, "loss": 0.0026, "step": 6198 }, { "epoch": 364.6470588235294, "grad_norm": 0.22280266880989075, "learning_rate": 4.0690209254314663e-07, "loss": 0.0017, "step": 6199 }, { "epoch": 364.70588235294116, "grad_norm": 0.9808104634284973, "learning_rate": 4.055583557901399e-07, "loss": 0.004, "step": 6200 }, { "epoch": 364.7647058823529, "grad_norm": 0.11792372167110443, "learning_rate": 4.0421679553451445e-07, "loss": 0.0019, "step": 6201 }, { "epoch": 364.8235294117647, "grad_norm": 0.34537824988365173, "learning_rate": 4.028774120806045e-07, "loss": 0.0059, "step": 6202 }, { "epoch": 364.88235294117646, "grad_norm": 0.28946736454963684, "learning_rate": 4.0154020573224594e-07, "loss": 0.0037, "step": 6203 }, { "epoch": 364.94117647058823, "grad_norm": 0.20828910171985626, "learning_rate": 4.00205176792785e-07, "loss": 0.0031, "step": 6204 }, { "epoch": 365.0, "grad_norm": 0.4089806079864502, "learning_rate": 3.988723255650728e-07, "loss": 0.0037, "step": 6205 }, { "epoch": 365.05882352941177, "grad_norm": 0.2886333763599396, "learning_rate": 3.975416523514686e-07, "loss": 0.0022, "step": 6206 }, { "epoch": 365.11764705882354, "grad_norm": 0.5758633017539978, "learning_rate": 3.962131574538308e-07, "loss": 0.0052, "step": 6207 }, { "epoch": 365.1764705882353, "grad_norm": 0.9038081169128418, "learning_rate": 3.9488684117353184e-07, "loss": 0.0081, "step": 6208 }, { "epoch": 365.2352941176471, "grad_norm": 0.22086893022060394, "learning_rate": 3.9356270381144533e-07, "loss": 0.0029, "step": 6209 }, { "epoch": 365.29411764705884, "grad_norm": 1.7652342319488525, "learning_rate": 3.922407456679522e-07, "loss": 0.0063, "step": 6210 }, { "epoch": 365.3529411764706, "grad_norm": 0.49273717403411865, "learning_rate": 3.90920967042937e-07, "loss": 0.0064, "step": 6211 }, { "epoch": 365.4117647058824, "grad_norm": 1.3788082599639893, "learning_rate": 3.8960336823579135e-07, "loss": 0.0089, "step": 6212 }, { "epoch": 365.47058823529414, "grad_norm": 0.16919414699077606, "learning_rate": 3.8828794954541507e-07, "loss": 0.0031, "step": 6213 }, { "epoch": 365.52941176470586, "grad_norm": 0.24891245365142822, "learning_rate": 3.8697471127020713e-07, "loss": 0.0025, "step": 6214 }, { "epoch": 365.5882352941176, "grad_norm": 0.4826388359069824, "learning_rate": 3.85663653708076e-07, "loss": 0.0023, "step": 6215 }, { "epoch": 365.6470588235294, "grad_norm": 0.2588071823120117, "learning_rate": 3.843547771564371e-07, "loss": 0.0019, "step": 6216 }, { "epoch": 365.70588235294116, "grad_norm": 0.18460705876350403, "learning_rate": 3.830480819122073e-07, "loss": 0.0033, "step": 6217 }, { "epoch": 365.7647058823529, "grad_norm": 0.37250736355781555, "learning_rate": 3.817435682718096e-07, "loss": 0.0047, "step": 6218 }, { "epoch": 365.8235294117647, "grad_norm": 0.34391283988952637, "learning_rate": 3.804412365311738e-07, "loss": 0.0039, "step": 6219 }, { "epoch": 365.88235294117646, "grad_norm": 0.4414571523666382, "learning_rate": 3.791410869857326e-07, "loss": 0.0025, "step": 6220 }, { "epoch": 365.94117647058823, "grad_norm": 0.2770179510116577, "learning_rate": 3.7784311993042777e-07, "loss": 0.0028, "step": 6221 }, { "epoch": 366.0, "grad_norm": 0.17198170721530914, "learning_rate": 3.7654733565969826e-07, "loss": 0.0022, "step": 6222 }, { "epoch": 366.05882352941177, "grad_norm": 0.46044814586639404, "learning_rate": 3.7525373446749445e-07, "loss": 0.0066, "step": 6223 }, { "epoch": 366.11764705882354, "grad_norm": 0.1334667205810547, "learning_rate": 3.739623166472717e-07, "loss": 0.0017, "step": 6224 }, { "epoch": 366.1764705882353, "grad_norm": 0.6029679179191589, "learning_rate": 3.726730824919844e-07, "loss": 0.0034, "step": 6225 }, { "epoch": 366.2352941176471, "grad_norm": 0.2910616993904114, "learning_rate": 3.713860322940976e-07, "loss": 0.0066, "step": 6226 }, { "epoch": 366.29411764705884, "grad_norm": 0.2559886574745178, "learning_rate": 3.701011663455767e-07, "loss": 0.0045, "step": 6227 }, { "epoch": 366.3529411764706, "grad_norm": 0.1927904486656189, "learning_rate": 3.6881848493789507e-07, "loss": 0.0036, "step": 6228 }, { "epoch": 366.4117647058824, "grad_norm": 0.29724156856536865, "learning_rate": 3.675379883620278e-07, "loss": 0.004, "step": 6229 }, { "epoch": 366.47058823529414, "grad_norm": 0.4607064127922058, "learning_rate": 3.662596769084537e-07, "loss": 0.0028, "step": 6230 }, { "epoch": 366.52941176470586, "grad_norm": 0.12842364609241486, "learning_rate": 3.6498355086716066e-07, "loss": 0.0017, "step": 6231 }, { "epoch": 366.5882352941176, "grad_norm": 0.23917268216609955, "learning_rate": 3.6370961052763607e-07, "loss": 0.0032, "step": 6232 }, { "epoch": 366.6470588235294, "grad_norm": 0.1651679426431656, "learning_rate": 3.624378561788722e-07, "loss": 0.0014, "step": 6233 }, { "epoch": 366.70588235294116, "grad_norm": 0.15492749214172363, "learning_rate": 3.611682881093681e-07, "loss": 0.0024, "step": 6234 }, { "epoch": 366.7647058823529, "grad_norm": 0.14239177107810974, "learning_rate": 3.599009066071235e-07, "loss": 0.0019, "step": 6235 }, { "epoch": 366.8235294117647, "grad_norm": 0.38415032625198364, "learning_rate": 3.5863571195964507e-07, "loss": 0.0054, "step": 6236 }, { "epoch": 366.88235294117646, "grad_norm": 0.24941211938858032, "learning_rate": 3.57372704453941e-07, "loss": 0.003, "step": 6237 }, { "epoch": 366.94117647058823, "grad_norm": 0.38949739933013916, "learning_rate": 3.5611188437652434e-07, "loss": 0.0063, "step": 6238 }, { "epoch": 367.0, "grad_norm": 0.5928941369056702, "learning_rate": 3.548532520134129e-07, "loss": 0.006, "step": 6239 }, { "epoch": 367.05882352941177, "grad_norm": 0.19978934526443481, "learning_rate": 3.5359680765012505e-07, "loss": 0.0024, "step": 6240 }, { "epoch": 367.11764705882354, "grad_norm": 0.421490877866745, "learning_rate": 3.523425515716872e-07, "loss": 0.0081, "step": 6241 }, { "epoch": 367.1764705882353, "grad_norm": 0.8503497242927551, "learning_rate": 3.5109048406262613e-07, "loss": 0.0041, "step": 6242 }, { "epoch": 367.2352941176471, "grad_norm": 0.13537369668483734, "learning_rate": 3.4984060540697364e-07, "loss": 0.002, "step": 6243 }, { "epoch": 367.29411764705884, "grad_norm": 0.4524504244327545, "learning_rate": 3.4859291588826393e-07, "loss": 0.004, "step": 6244 }, { "epoch": 367.3529411764706, "grad_norm": 0.3239549696445465, "learning_rate": 3.473474157895351e-07, "loss": 0.005, "step": 6245 }, { "epoch": 367.4117647058824, "grad_norm": 0.22979983687400818, "learning_rate": 3.461041053933278e-07, "loss": 0.0046, "step": 6246 }, { "epoch": 367.47058823529414, "grad_norm": 0.22682732343673706, "learning_rate": 3.448629849816898e-07, "loss": 0.0032, "step": 6247 }, { "epoch": 367.52941176470586, "grad_norm": 0.20383287966251373, "learning_rate": 3.436240548361669e-07, "loss": 0.0034, "step": 6248 }, { "epoch": 367.5882352941176, "grad_norm": 0.23736241459846497, "learning_rate": 3.4238731523780877e-07, "loss": 0.0025, "step": 6249 }, { "epoch": 367.6470588235294, "grad_norm": 0.5174427628517151, "learning_rate": 3.411527664671732e-07, "loss": 0.0054, "step": 6250 }, { "epoch": 367.70588235294116, "grad_norm": 0.26240062713623047, "learning_rate": 3.39920408804314e-07, "loss": 0.0034, "step": 6251 }, { "epoch": 367.7647058823529, "grad_norm": 0.2519940733909607, "learning_rate": 3.3869024252879304e-07, "loss": 0.0043, "step": 6252 }, { "epoch": 367.8235294117647, "grad_norm": 0.2471613883972168, "learning_rate": 3.374622679196726e-07, "loss": 0.004, "step": 6253 }, { "epoch": 367.88235294117646, "grad_norm": 0.33713409304618835, "learning_rate": 3.362364852555189e-07, "loss": 0.0027, "step": 6254 }, { "epoch": 367.94117647058823, "grad_norm": 0.2298421710729599, "learning_rate": 3.3501289481440047e-07, "loss": 0.0028, "step": 6255 }, { "epoch": 368.0, "grad_norm": 0.2306099236011505, "learning_rate": 3.3379149687388866e-07, "loss": 0.0024, "step": 6256 }, { "epoch": 368.05882352941177, "grad_norm": 0.2233920395374298, "learning_rate": 3.3257229171105633e-07, "loss": 0.0026, "step": 6257 }, { "epoch": 368.11764705882354, "grad_norm": 0.26431703567504883, "learning_rate": 3.3135527960248326e-07, "loss": 0.003, "step": 6258 }, { "epoch": 368.1764705882353, "grad_norm": 0.531643271446228, "learning_rate": 3.301404608242431e-07, "loss": 0.0082, "step": 6259 }, { "epoch": 368.2352941176471, "grad_norm": 0.24700391292572021, "learning_rate": 3.289278356519221e-07, "loss": 0.0028, "step": 6260 }, { "epoch": 368.29411764705884, "grad_norm": 0.23170311748981476, "learning_rate": 3.277174043606013e-07, "loss": 0.0032, "step": 6261 }, { "epoch": 368.3529411764706, "grad_norm": 0.31866660714149475, "learning_rate": 3.265091672248688e-07, "loss": 0.0034, "step": 6262 }, { "epoch": 368.4117647058824, "grad_norm": 0.31548789143562317, "learning_rate": 3.2530312451881095e-07, "loss": 0.0028, "step": 6263 }, { "epoch": 368.47058823529414, "grad_norm": 1.2147258520126343, "learning_rate": 3.240992765160189e-07, "loss": 0.0025, "step": 6264 }, { "epoch": 368.52941176470586, "grad_norm": 0.2968190908432007, "learning_rate": 3.2289762348958644e-07, "loss": 0.0039, "step": 6265 }, { "epoch": 368.5882352941176, "grad_norm": 0.4682371914386749, "learning_rate": 3.216981657121077e-07, "loss": 0.0038, "step": 6266 }, { "epoch": 368.6470588235294, "grad_norm": 0.261669784784317, "learning_rate": 3.205009034556783e-07, "loss": 0.0055, "step": 6267 }, { "epoch": 368.70588235294116, "grad_norm": 0.3874601423740387, "learning_rate": 3.193058369918989e-07, "loss": 0.0049, "step": 6268 }, { "epoch": 368.7647058823529, "grad_norm": 0.28426942229270935, "learning_rate": 3.181129665918692e-07, "loss": 0.0045, "step": 6269 }, { "epoch": 368.8235294117647, "grad_norm": 0.6026321053504944, "learning_rate": 3.1692229252619165e-07, "loss": 0.0036, "step": 6270 }, { "epoch": 368.88235294117646, "grad_norm": 0.2279290109872818, "learning_rate": 3.1573381506497024e-07, "loss": 0.0039, "step": 6271 }, { "epoch": 368.94117647058823, "grad_norm": 0.29033875465393066, "learning_rate": 3.145475344778115e-07, "loss": 0.0037, "step": 6272 }, { "epoch": 369.0, "grad_norm": 0.38070327043533325, "learning_rate": 3.133634510338235e-07, "loss": 0.0042, "step": 6273 }, { "epoch": 369.05882352941177, "grad_norm": 0.33244767785072327, "learning_rate": 3.121815650016124e-07, "loss": 0.006, "step": 6274 }, { "epoch": 369.11764705882354, "grad_norm": 0.43753716349601746, "learning_rate": 3.110018766492917e-07, "loss": 0.005, "step": 6275 }, { "epoch": 369.1764705882353, "grad_norm": 0.18144311010837555, "learning_rate": 3.0982438624447275e-07, "loss": 0.0023, "step": 6276 }, { "epoch": 369.2352941176471, "grad_norm": 0.3550466299057007, "learning_rate": 3.0864909405426966e-07, "loss": 0.0055, "step": 6277 }, { "epoch": 369.29411764705884, "grad_norm": 0.22832456231117249, "learning_rate": 3.074760003452959e-07, "loss": 0.0034, "step": 6278 }, { "epoch": 369.3529411764706, "grad_norm": 0.4689403772354126, "learning_rate": 3.063051053836685e-07, "loss": 0.0042, "step": 6279 }, { "epoch": 369.4117647058824, "grad_norm": 0.26433175802230835, "learning_rate": 3.051364094350051e-07, "loss": 0.0043, "step": 6280 }, { "epoch": 369.47058823529414, "grad_norm": 0.2936456501483917, "learning_rate": 3.0396991276442245e-07, "loss": 0.0043, "step": 6281 }, { "epoch": 369.52941176470586, "grad_norm": 0.1737799346446991, "learning_rate": 3.028056156365422e-07, "loss": 0.0029, "step": 6282 }, { "epoch": 369.5882352941176, "grad_norm": 0.27987349033355713, "learning_rate": 3.016435183154831e-07, "loss": 0.0037, "step": 6283 }, { "epoch": 369.6470588235294, "grad_norm": 0.3998394012451172, "learning_rate": 3.004836210648687e-07, "loss": 0.0026, "step": 6284 }, { "epoch": 369.70588235294116, "grad_norm": 0.27273374795913696, "learning_rate": 2.993259241478186e-07, "loss": 0.0029, "step": 6285 }, { "epoch": 369.7647058823529, "grad_norm": 0.3308209478855133, "learning_rate": 2.9817042782695815e-07, "loss": 0.0036, "step": 6286 }, { "epoch": 369.8235294117647, "grad_norm": 0.12147852033376694, "learning_rate": 2.9701713236441334e-07, "loss": 0.0016, "step": 6287 }, { "epoch": 369.88235294117646, "grad_norm": 0.2422657161951065, "learning_rate": 2.958660380218048e-07, "loss": 0.0045, "step": 6288 }, { "epoch": 369.94117647058823, "grad_norm": 0.1930873543024063, "learning_rate": 2.947171450602615e-07, "loss": 0.0024, "step": 6289 }, { "epoch": 370.0, "grad_norm": 0.3659920394420624, "learning_rate": 2.935704537404083e-07, "loss": 0.0059, "step": 6290 }, { "epoch": 370.05882352941177, "grad_norm": 0.1880984902381897, "learning_rate": 2.924259643223726e-07, "loss": 0.0024, "step": 6291 }, { "epoch": 370.11764705882354, "grad_norm": 0.17552249133586884, "learning_rate": 2.91283677065779e-07, "loss": 0.0038, "step": 6292 }, { "epoch": 370.1764705882353, "grad_norm": 0.30324867367744446, "learning_rate": 2.90143592229758e-07, "loss": 0.0031, "step": 6293 }, { "epoch": 370.2352941176471, "grad_norm": 0.21974647045135498, "learning_rate": 2.890057100729382e-07, "loss": 0.0031, "step": 6294 }, { "epoch": 370.29411764705884, "grad_norm": 0.1913926601409912, "learning_rate": 2.8787003085344546e-07, "loss": 0.0028, "step": 6295 }, { "epoch": 370.3529411764706, "grad_norm": 0.22390642762184143, "learning_rate": 2.867365548289092e-07, "loss": 0.0033, "step": 6296 }, { "epoch": 370.4117647058824, "grad_norm": 0.3114457130432129, "learning_rate": 2.8560528225645925e-07, "loss": 0.0023, "step": 6297 }, { "epoch": 370.47058823529414, "grad_norm": 0.47883522510528564, "learning_rate": 2.844762133927237e-07, "loss": 0.0056, "step": 6298 }, { "epoch": 370.52941176470586, "grad_norm": 0.5227228403091431, "learning_rate": 2.833493484938321e-07, "loss": 0.0044, "step": 6299 }, { "epoch": 370.5882352941176, "grad_norm": 0.35648372769355774, "learning_rate": 2.8222468781541226e-07, "loss": 0.0052, "step": 6300 }, { "epoch": 370.6470588235294, "grad_norm": 0.5055397152900696, "learning_rate": 2.8110223161259444e-07, "loss": 0.0054, "step": 6301 }, { "epoch": 370.70588235294116, "grad_norm": 0.18618498742580414, "learning_rate": 2.7998198014000944e-07, "loss": 0.0027, "step": 6302 }, { "epoch": 370.7647058823529, "grad_norm": 0.4707157015800476, "learning_rate": 2.7886393365178176e-07, "loss": 0.0043, "step": 6303 }, { "epoch": 370.8235294117647, "grad_norm": 0.367958664894104, "learning_rate": 2.77748092401543e-07, "loss": 0.005, "step": 6304 }, { "epoch": 370.88235294117646, "grad_norm": 0.21630249917507172, "learning_rate": 2.7663445664242174e-07, "loss": 0.0033, "step": 6305 }, { "epoch": 370.94117647058823, "grad_norm": 0.3255026340484619, "learning_rate": 2.7552302662704477e-07, "loss": 0.0037, "step": 6306 }, { "epoch": 371.0, "grad_norm": 0.23727478086948395, "learning_rate": 2.744138026075405e-07, "loss": 0.0044, "step": 6307 }, { "epoch": 371.05882352941177, "grad_norm": 0.31627386808395386, "learning_rate": 2.733067848355353e-07, "loss": 0.0061, "step": 6308 }, { "epoch": 371.11764705882354, "grad_norm": 0.31957265734672546, "learning_rate": 2.722019735621584e-07, "loss": 0.0029, "step": 6309 }, { "epoch": 371.1764705882353, "grad_norm": 0.22565925121307373, "learning_rate": 2.710993690380326e-07, "loss": 0.0033, "step": 6310 }, { "epoch": 371.2352941176471, "grad_norm": 0.12188756465911865, "learning_rate": 2.699989715132856e-07, "loss": 0.0016, "step": 6311 }, { "epoch": 371.29411764705884, "grad_norm": 0.24705907702445984, "learning_rate": 2.6890078123754214e-07, "loss": 0.0033, "step": 6312 }, { "epoch": 371.3529411764706, "grad_norm": 0.3490754961967468, "learning_rate": 2.6780479845992745e-07, "loss": 0.0037, "step": 6313 }, { "epoch": 371.4117647058824, "grad_norm": 0.2542828917503357, "learning_rate": 2.667110234290626e-07, "loss": 0.0048, "step": 6314 }, { "epoch": 371.47058823529414, "grad_norm": 0.24691352248191833, "learning_rate": 2.656194563930714e-07, "loss": 0.0039, "step": 6315 }, { "epoch": 371.52941176470586, "grad_norm": 0.12235899269580841, "learning_rate": 2.645300975995768e-07, "loss": 0.0022, "step": 6316 }, { "epoch": 371.5882352941176, "grad_norm": 0.12833520770072937, "learning_rate": 2.6344294729569897e-07, "loss": 0.0019, "step": 6317 }, { "epoch": 371.6470588235294, "grad_norm": 0.21912860870361328, "learning_rate": 2.6235800572805615e-07, "loss": 0.0031, "step": 6318 }, { "epoch": 371.70588235294116, "grad_norm": 0.23467065393924713, "learning_rate": 2.6127527314276923e-07, "loss": 0.0023, "step": 6319 }, { "epoch": 371.7647058823529, "grad_norm": 0.2761695086956024, "learning_rate": 2.601947497854562e-07, "loss": 0.0061, "step": 6320 }, { "epoch": 371.8235294117647, "grad_norm": 0.19274327158927917, "learning_rate": 2.5911643590123103e-07, "loss": 0.0027, "step": 6321 }, { "epoch": 371.88235294117646, "grad_norm": 0.9221513271331787, "learning_rate": 2.580403317347102e-07, "loss": 0.0038, "step": 6322 }, { "epoch": 371.94117647058823, "grad_norm": 0.27883008122444153, "learning_rate": 2.569664375300096e-07, "loss": 0.003, "step": 6323 }, { "epoch": 372.0, "grad_norm": 0.17923596501350403, "learning_rate": 2.5589475353073987e-07, "loss": 0.0041, "step": 6324 }, { "epoch": 372.05882352941177, "grad_norm": 0.24196408689022064, "learning_rate": 2.548252799800122e-07, "loss": 0.0031, "step": 6325 }, { "epoch": 372.11764705882354, "grad_norm": 0.2632855474948883, "learning_rate": 2.537580171204379e-07, "loss": 0.0044, "step": 6326 }, { "epoch": 372.1764705882353, "grad_norm": 0.21665923297405243, "learning_rate": 2.526929651941246e-07, "loss": 0.0022, "step": 6327 }, { "epoch": 372.2352941176471, "grad_norm": 0.27820631861686707, "learning_rate": 2.5163012444267997e-07, "loss": 0.0024, "step": 6328 }, { "epoch": 372.29411764705884, "grad_norm": 0.2082872837781906, "learning_rate": 2.5056949510720686e-07, "loss": 0.0024, "step": 6329 }, { "epoch": 372.3529411764706, "grad_norm": 0.32147833704948425, "learning_rate": 2.495110774283116e-07, "loss": 0.0066, "step": 6330 }, { "epoch": 372.4117647058824, "grad_norm": 0.33882245421409607, "learning_rate": 2.484548716460944e-07, "loss": 0.0054, "step": 6331 }, { "epoch": 372.47058823529414, "grad_norm": 0.9809854030609131, "learning_rate": 2.4740087800015576e-07, "loss": 0.0069, "step": 6332 }, { "epoch": 372.52941176470586, "grad_norm": 0.3708801567554474, "learning_rate": 2.463490967295945e-07, "loss": 0.0049, "step": 6333 }, { "epoch": 372.5882352941176, "grad_norm": 0.2769564986228943, "learning_rate": 2.452995280730064e-07, "loss": 0.0029, "step": 6334 }, { "epoch": 372.6470588235294, "grad_norm": 0.16023491322994232, "learning_rate": 2.4425217226848655e-07, "loss": 0.003, "step": 6335 }, { "epoch": 372.70588235294116, "grad_norm": 0.8069300055503845, "learning_rate": 2.4320702955362596e-07, "loss": 0.0065, "step": 6336 }, { "epoch": 372.7647058823529, "grad_norm": 0.156234472990036, "learning_rate": 2.4216410016551616e-07, "loss": 0.0018, "step": 6337 }, { "epoch": 372.8235294117647, "grad_norm": 0.385423481464386, "learning_rate": 2.4112338434074566e-07, "loss": 0.004, "step": 6338 }, { "epoch": 372.88235294117646, "grad_norm": 0.18044239282608032, "learning_rate": 2.400848823154012e-07, "loss": 0.0022, "step": 6339 }, { "epoch": 372.94117647058823, "grad_norm": 0.323233962059021, "learning_rate": 2.3904859432506535e-07, "loss": 0.0052, "step": 6340 }, { "epoch": 373.0, "grad_norm": 0.6503177285194397, "learning_rate": 2.380145206048201e-07, "loss": 0.0046, "step": 6341 }, { "epoch": 373.05882352941177, "grad_norm": 0.09636493027210236, "learning_rate": 2.3698266138924564e-07, "loss": 0.0015, "step": 6342 }, { "epoch": 373.11764705882354, "grad_norm": 0.4308410882949829, "learning_rate": 2.3595301691241913e-07, "loss": 0.0045, "step": 6343 }, { "epoch": 373.1764705882353, "grad_norm": 0.41080471873283386, "learning_rate": 2.349255874079137e-07, "loss": 0.0043, "step": 6344 }, { "epoch": 373.2352941176471, "grad_norm": 2.0426833629608154, "learning_rate": 2.3390037310880297e-07, "loss": 0.0049, "step": 6345 }, { "epoch": 373.29411764705884, "grad_norm": 0.483460932970047, "learning_rate": 2.3287737424765754e-07, "loss": 0.0058, "step": 6346 }, { "epoch": 373.3529411764706, "grad_norm": 0.7542008757591248, "learning_rate": 2.3185659105654179e-07, "loss": 0.0083, "step": 6347 }, { "epoch": 373.4117647058824, "grad_norm": 0.38162165880203247, "learning_rate": 2.308380237670227e-07, "loss": 0.0025, "step": 6348 }, { "epoch": 373.47058823529414, "grad_norm": 0.3958933353424072, "learning_rate": 2.2982167261015987e-07, "loss": 0.0048, "step": 6349 }, { "epoch": 373.52941176470586, "grad_norm": 0.14370059967041016, "learning_rate": 2.2880753781651555e-07, "loss": 0.0023, "step": 6350 }, { "epoch": 373.5882352941176, "grad_norm": 0.2866837680339813, "learning_rate": 2.2779561961614238e-07, "loss": 0.0048, "step": 6351 }, { "epoch": 373.6470588235294, "grad_norm": 0.056993577629327774, "learning_rate": 2.2678591823859563e-07, "loss": 0.0013, "step": 6352 }, { "epoch": 373.70588235294116, "grad_norm": 0.1898386925458908, "learning_rate": 2.2577843391292653e-07, "loss": 0.003, "step": 6353 }, { "epoch": 373.7647058823529, "grad_norm": 0.2895577847957611, "learning_rate": 2.2477316686768225e-07, "loss": 0.0032, "step": 6354 }, { "epoch": 373.8235294117647, "grad_norm": 0.18981704115867615, "learning_rate": 2.2377011733090591e-07, "loss": 0.0047, "step": 6355 }, { "epoch": 373.88235294117646, "grad_norm": 0.2746979296207428, "learning_rate": 2.2276928553013998e-07, "loss": 0.0042, "step": 6356 }, { "epoch": 373.94117647058823, "grad_norm": 0.21321691572666168, "learning_rate": 2.2177067169242395e-07, "loss": 0.002, "step": 6357 }, { "epoch": 374.0, "grad_norm": 0.569826602935791, "learning_rate": 2.2077427604429435e-07, "loss": 0.0046, "step": 6358 }, { "epoch": 374.05882352941177, "grad_norm": 0.33751800656318665, "learning_rate": 2.1978009881177932e-07, "loss": 0.0068, "step": 6359 }, { "epoch": 374.11764705882354, "grad_norm": 0.40104320645332336, "learning_rate": 2.1878814022041062e-07, "loss": 0.0047, "step": 6360 }, { "epoch": 374.1764705882353, "grad_norm": 0.37158986926078796, "learning_rate": 2.1779840049521495e-07, "loss": 0.0075, "step": 6361 }, { "epoch": 374.2352941176471, "grad_norm": 0.10908827185630798, "learning_rate": 2.1681087986071158e-07, "loss": 0.0019, "step": 6362 }, { "epoch": 374.29411764705884, "grad_norm": 0.23271682858467102, "learning_rate": 2.1582557854092024e-07, "loss": 0.0023, "step": 6363 }, { "epoch": 374.3529411764706, "grad_norm": 0.20850171148777008, "learning_rate": 2.1484249675935653e-07, "loss": 0.0029, "step": 6364 }, { "epoch": 374.4117647058824, "grad_norm": 0.18306878209114075, "learning_rate": 2.1386163473903432e-07, "loss": 0.0023, "step": 6365 }, { "epoch": 374.47058823529414, "grad_norm": 1.1201077699661255, "learning_rate": 2.1288299270246005e-07, "loss": 0.0056, "step": 6366 }, { "epoch": 374.52941176470586, "grad_norm": 0.17418891191482544, "learning_rate": 2.1190657087163835e-07, "loss": 0.0018, "step": 6367 }, { "epoch": 374.5882352941176, "grad_norm": 0.5374042391777039, "learning_rate": 2.1093236946807095e-07, "loss": 0.005, "step": 6368 }, { "epoch": 374.6470588235294, "grad_norm": 0.2761567533016205, "learning_rate": 2.0996038871275547e-07, "loss": 0.0047, "step": 6369 }, { "epoch": 374.70588235294116, "grad_norm": 0.20152810215950012, "learning_rate": 2.0899062882618447e-07, "loss": 0.002, "step": 6370 }, { "epoch": 374.7647058823529, "grad_norm": 0.41643962264060974, "learning_rate": 2.0802309002834976e-07, "loss": 0.0042, "step": 6371 }, { "epoch": 374.8235294117647, "grad_norm": 0.22505778074264526, "learning_rate": 2.070577725387357e-07, "loss": 0.002, "step": 6372 }, { "epoch": 374.88235294117646, "grad_norm": 0.22699826955795288, "learning_rate": 2.0609467657632499e-07, "loss": 0.0026, "step": 6373 }, { "epoch": 374.94117647058823, "grad_norm": 0.31299564242362976, "learning_rate": 2.05133802359595e-07, "loss": 0.0026, "step": 6374 }, { "epoch": 375.0, "grad_norm": 0.21598701179027557, "learning_rate": 2.0417515010652032e-07, "loss": 0.004, "step": 6375 }, { "epoch": 375.05882352941177, "grad_norm": 0.165667325258255, "learning_rate": 2.0321872003457255e-07, "loss": 0.0037, "step": 6376 }, { "epoch": 375.11764705882354, "grad_norm": 0.23466408252716064, "learning_rate": 2.0226451236071586e-07, "loss": 0.0027, "step": 6377 }, { "epoch": 375.1764705882353, "grad_norm": 0.3046574890613556, "learning_rate": 2.013125273014116e-07, "loss": 0.003, "step": 6378 }, { "epoch": 375.2352941176471, "grad_norm": 0.23816345632076263, "learning_rate": 2.003627650726192e-07, "loss": 0.0038, "step": 6379 }, { "epoch": 375.29411764705884, "grad_norm": 0.09759575873613358, "learning_rate": 1.994152258897919e-07, "loss": 0.0015, "step": 6380 }, { "epoch": 375.3529411764706, "grad_norm": 0.28993070125579834, "learning_rate": 1.9846990996787773e-07, "loss": 0.003, "step": 6381 }, { "epoch": 375.4117647058824, "grad_norm": 0.23883703351020813, "learning_rate": 1.9752681752132074e-07, "loss": 0.0028, "step": 6382 }, { "epoch": 375.47058823529414, "grad_norm": 0.4108799993991852, "learning_rate": 1.9658594876406422e-07, "loss": 0.0059, "step": 6383 }, { "epoch": 375.52941176470586, "grad_norm": 0.31588512659072876, "learning_rate": 1.956473039095419e-07, "loss": 0.0047, "step": 6384 }, { "epoch": 375.5882352941176, "grad_norm": 0.16939692199230194, "learning_rate": 1.9471088317068455e-07, "loss": 0.003, "step": 6385 }, { "epoch": 375.6470588235294, "grad_norm": 0.16930681467056274, "learning_rate": 1.9377668675992112e-07, "loss": 0.003, "step": 6386 }, { "epoch": 375.70588235294116, "grad_norm": 0.14499837160110474, "learning_rate": 1.9284471488917323e-07, "loss": 0.0019, "step": 6387 }, { "epoch": 375.7647058823529, "grad_norm": 0.14413733780384064, "learning_rate": 1.919149677698562e-07, "loss": 0.0021, "step": 6388 }, { "epoch": 375.8235294117647, "grad_norm": 0.4269481301307678, "learning_rate": 1.9098744561288685e-07, "loss": 0.0058, "step": 6389 }, { "epoch": 375.88235294117646, "grad_norm": 0.18746572732925415, "learning_rate": 1.9006214862867024e-07, "loss": 0.0018, "step": 6390 }, { "epoch": 375.94117647058823, "grad_norm": 0.3133069574832916, "learning_rate": 1.8913907702711286e-07, "loss": 0.0058, "step": 6391 }, { "epoch": 376.0, "grad_norm": 0.3216845393180847, "learning_rate": 1.8821823101760949e-07, "loss": 0.0049, "step": 6392 }, { "epoch": 376.05882352941177, "grad_norm": 0.3053034842014313, "learning_rate": 1.8729961080905745e-07, "loss": 0.0058, "step": 6393 }, { "epoch": 376.11764705882354, "grad_norm": 0.37559303641319275, "learning_rate": 1.8638321660984336e-07, "loss": 0.0042, "step": 6394 }, { "epoch": 376.1764705882353, "grad_norm": 0.24035602807998657, "learning_rate": 1.854690486278521e-07, "loss": 0.0051, "step": 6395 }, { "epoch": 376.2352941176471, "grad_norm": 0.15957267582416534, "learning_rate": 1.8455710707046215e-07, "loss": 0.0026, "step": 6396 }, { "epoch": 376.29411764705884, "grad_norm": 0.21935990452766418, "learning_rate": 1.8364739214454696e-07, "loss": 0.0039, "step": 6397 }, { "epoch": 376.3529411764706, "grad_norm": 0.254019170999527, "learning_rate": 1.82739904056477e-07, "loss": 0.0021, "step": 6398 }, { "epoch": 376.4117647058824, "grad_norm": 0.4702586531639099, "learning_rate": 1.8183464301211317e-07, "loss": 0.0023, "step": 6399 }, { "epoch": 376.47058823529414, "grad_norm": 0.16003289818763733, "learning_rate": 1.8093160921681564e-07, "loss": 0.0025, "step": 6400 }, { "epoch": 376.52941176470586, "grad_norm": 0.17109856009483337, "learning_rate": 1.8003080287543606e-07, "loss": 0.0024, "step": 6401 }, { "epoch": 376.5882352941176, "grad_norm": 0.24230079352855682, "learning_rate": 1.7913222419232435e-07, "loss": 0.0018, "step": 6402 }, { "epoch": 376.6470588235294, "grad_norm": 1.1338462829589844, "learning_rate": 1.7823587337132077e-07, "loss": 0.0046, "step": 6403 }, { "epoch": 376.70588235294116, "grad_norm": 0.15141059458255768, "learning_rate": 1.7734175061576264e-07, "loss": 0.0021, "step": 6404 }, { "epoch": 376.7647058823529, "grad_norm": 0.805122971534729, "learning_rate": 1.764498561284822e-07, "loss": 0.0056, "step": 6405 }, { "epoch": 376.8235294117647, "grad_norm": 0.28025516867637634, "learning_rate": 1.7556019011180646e-07, "loss": 0.004, "step": 6406 }, { "epoch": 376.88235294117646, "grad_norm": 0.3125111758708954, "learning_rate": 1.74672752767554e-07, "loss": 0.0054, "step": 6407 }, { "epoch": 376.94117647058823, "grad_norm": 0.3144741952419281, "learning_rate": 1.7378754429704049e-07, "loss": 0.0028, "step": 6408 }, { "epoch": 377.0, "grad_norm": 0.2451142817735672, "learning_rate": 1.7290456490107522e-07, "loss": 0.003, "step": 6409 }, { "epoch": 377.05882352941177, "grad_norm": 0.1877753585577011, "learning_rate": 1.7202381477996355e-07, "loss": 0.0025, "step": 6410 }, { "epoch": 377.11764705882354, "grad_norm": 0.26807722449302673, "learning_rate": 1.7114529413350234e-07, "loss": 0.0038, "step": 6411 }, { "epoch": 377.1764705882353, "grad_norm": 0.3639312982559204, "learning_rate": 1.7026900316098217e-07, "loss": 0.0068, "step": 6412 }, { "epoch": 377.2352941176471, "grad_norm": 0.19223172962665558, "learning_rate": 1.6939494206119288e-07, "loss": 0.0028, "step": 6413 }, { "epoch": 377.29411764705884, "grad_norm": 0.44858622550964355, "learning_rate": 1.685231110324115e-07, "loss": 0.0039, "step": 6414 }, { "epoch": 377.3529411764706, "grad_norm": 0.1485140323638916, "learning_rate": 1.6765351027241527e-07, "loss": 0.002, "step": 6415 }, { "epoch": 377.4117647058824, "grad_norm": 0.27977657318115234, "learning_rate": 1.6678613997847094e-07, "loss": 0.0043, "step": 6416 }, { "epoch": 377.47058823529414, "grad_norm": 0.41490018367767334, "learning_rate": 1.6592100034734438e-07, "loss": 0.0081, "step": 6417 }, { "epoch": 377.52941176470586, "grad_norm": 0.1642128974199295, "learning_rate": 1.6505809157528972e-07, "loss": 0.0033, "step": 6418 }, { "epoch": 377.5882352941176, "grad_norm": 0.4867697060108185, "learning_rate": 1.641974138580582e-07, "loss": 0.0054, "step": 6419 }, { "epoch": 377.6470588235294, "grad_norm": 0.32787275314331055, "learning_rate": 1.633389673908947e-07, "loss": 0.0047, "step": 6420 }, { "epoch": 377.70588235294116, "grad_norm": 0.3389671742916107, "learning_rate": 1.6248275236853793e-07, "loss": 0.0029, "step": 6421 }, { "epoch": 377.7647058823529, "grad_norm": 0.14866310358047485, "learning_rate": 1.616287689852203e-07, "loss": 0.0021, "step": 6422 }, { "epoch": 377.8235294117647, "grad_norm": 0.4218626320362091, "learning_rate": 1.6077701743466567e-07, "loss": 0.0036, "step": 6423 }, { "epoch": 377.88235294117646, "grad_norm": 0.1078873798251152, "learning_rate": 1.5992749791009622e-07, "loss": 0.002, "step": 6424 }, { "epoch": 377.94117647058823, "grad_norm": 0.19498929381370544, "learning_rate": 1.590802106042255e-07, "loss": 0.0022, "step": 6425 }, { "epoch": 378.0, "grad_norm": 0.14537811279296875, "learning_rate": 1.5823515570925763e-07, "loss": 0.0023, "step": 6426 }, { "epoch": 378.05882352941177, "grad_norm": 0.15750911831855774, "learning_rate": 1.5739233341689585e-07, "loss": 0.0023, "step": 6427 }, { "epoch": 378.11764705882354, "grad_norm": 0.22897401452064514, "learning_rate": 1.5655174391833394e-07, "loss": 0.0033, "step": 6428 }, { "epoch": 378.1764705882353, "grad_norm": 0.17268040776252747, "learning_rate": 1.5571338740425713e-07, "loss": 0.0031, "step": 6429 }, { "epoch": 378.2352941176471, "grad_norm": 0.11928306519985199, "learning_rate": 1.548772640648488e-07, "loss": 0.0014, "step": 6430 }, { "epoch": 378.29411764705884, "grad_norm": 0.252431184053421, "learning_rate": 1.5404337408978288e-07, "loss": 0.0049, "step": 6431 }, { "epoch": 378.3529411764706, "grad_norm": 0.4509327709674835, "learning_rate": 1.5321171766822796e-07, "loss": 0.0051, "step": 6432 }, { "epoch": 378.4117647058824, "grad_norm": 0.3502081632614136, "learning_rate": 1.5238229498884426e-07, "loss": 0.006, "step": 6433 }, { "epoch": 378.47058823529414, "grad_norm": 0.46326348185539246, "learning_rate": 1.5155510623978465e-07, "loss": 0.0042, "step": 6434 }, { "epoch": 378.52941176470586, "grad_norm": 0.49316591024398804, "learning_rate": 1.5073015160869896e-07, "loss": 0.0028, "step": 6435 }, { "epoch": 378.5882352941176, "grad_norm": 0.2094322293996811, "learning_rate": 1.4990743128272755e-07, "loss": 0.0034, "step": 6436 }, { "epoch": 378.6470588235294, "grad_norm": 0.45295315980911255, "learning_rate": 1.4908694544850445e-07, "loss": 0.0026, "step": 6437 }, { "epoch": 378.70588235294116, "grad_norm": 2.505059003829956, "learning_rate": 1.4826869429215518e-07, "loss": 0.009, "step": 6438 }, { "epoch": 378.7647058823529, "grad_norm": 0.19454346597194672, "learning_rate": 1.474526779993024e-07, "loss": 0.0027, "step": 6439 }, { "epoch": 378.8235294117647, "grad_norm": 0.23353882133960724, "learning_rate": 1.4663889675505694e-07, "loss": 0.0035, "step": 6440 }, { "epoch": 378.88235294117646, "grad_norm": 0.3100133538246155, "learning_rate": 1.4582735074402665e-07, "loss": 0.0055, "step": 6441 }, { "epoch": 378.94117647058823, "grad_norm": 0.3851488530635834, "learning_rate": 1.4501804015030874e-07, "loss": 0.0033, "step": 6442 }, { "epoch": 379.0, "grad_norm": 0.13340288400650024, "learning_rate": 1.4421096515749855e-07, "loss": 0.0021, "step": 6443 }, { "epoch": 379.05882352941177, "grad_norm": 0.4309251010417938, "learning_rate": 1.434061259486763e-07, "loss": 0.0077, "step": 6444 }, { "epoch": 379.11764705882354, "grad_norm": 0.3262579143047333, "learning_rate": 1.4260352270642263e-07, "loss": 0.0064, "step": 6445 }, { "epoch": 379.1764705882353, "grad_norm": 0.1405198872089386, "learning_rate": 1.4180315561280745e-07, "loss": 0.0019, "step": 6446 }, { "epoch": 379.2352941176471, "grad_norm": 0.24755169451236725, "learning_rate": 1.410050248493944e-07, "loss": 0.0041, "step": 6447 }, { "epoch": 379.29411764705884, "grad_norm": 0.2414361834526062, "learning_rate": 1.4020913059723862e-07, "loss": 0.0043, "step": 6448 }, { "epoch": 379.3529411764706, "grad_norm": 0.1710044890642166, "learning_rate": 1.3941547303688797e-07, "loss": 0.0026, "step": 6449 }, { "epoch": 379.4117647058824, "grad_norm": 0.32571983337402344, "learning_rate": 1.3862405234838395e-07, "loss": 0.0028, "step": 6450 }, { "epoch": 379.47058823529414, "grad_norm": 0.15460002422332764, "learning_rate": 1.3783486871126073e-07, "loss": 0.002, "step": 6451 }, { "epoch": 379.52941176470586, "grad_norm": 0.15931445360183716, "learning_rate": 1.37047922304544e-07, "loss": 0.0021, "step": 6452 }, { "epoch": 379.5882352941176, "grad_norm": 0.24090775847434998, "learning_rate": 1.3626321330675318e-07, "loss": 0.0029, "step": 6453 }, { "epoch": 379.6470588235294, "grad_norm": 0.5434537529945374, "learning_rate": 1.354807418958992e-07, "loss": 0.0029, "step": 6454 }, { "epoch": 379.70588235294116, "grad_norm": 0.30508771538734436, "learning_rate": 1.3470050824948344e-07, "loss": 0.0055, "step": 6455 }, { "epoch": 379.7647058823529, "grad_norm": 0.28205254673957825, "learning_rate": 1.3392251254450428e-07, "loss": 0.0039, "step": 6456 }, { "epoch": 379.8235294117647, "grad_norm": 0.4228779673576355, "learning_rate": 1.3314675495744944e-07, "loss": 0.0048, "step": 6457 }, { "epoch": 379.88235294117646, "grad_norm": 0.17814040184020996, "learning_rate": 1.3237323566429927e-07, "loss": 0.0022, "step": 6458 }, { "epoch": 379.94117647058823, "grad_norm": 0.4493367373943329, "learning_rate": 1.3160195484052562e-07, "loss": 0.002, "step": 6459 }, { "epoch": 380.0, "grad_norm": 0.3149834871292114, "learning_rate": 1.30832912661093e-07, "loss": 0.0048, "step": 6460 }, { "epoch": 380.05882352941177, "grad_norm": 0.47996073961257935, "learning_rate": 1.300661093004607e-07, "loss": 0.0039, "step": 6461 }, { "epoch": 380.11764705882354, "grad_norm": 0.2342631220817566, "learning_rate": 1.2930154493257741e-07, "loss": 0.0025, "step": 6462 }, { "epoch": 380.1764705882353, "grad_norm": 0.23329873383045197, "learning_rate": 1.2853921973088214e-07, "loss": 0.0034, "step": 6463 }, { "epoch": 380.2352941176471, "grad_norm": 0.3058229684829712, "learning_rate": 1.2777913386831097e-07, "loss": 0.0029, "step": 6464 }, { "epoch": 380.29411764705884, "grad_norm": 0.2255399525165558, "learning_rate": 1.2702128751728825e-07, "loss": 0.0024, "step": 6465 }, { "epoch": 380.3529411764706, "grad_norm": 0.27825358510017395, "learning_rate": 1.262656808497309e-07, "loss": 0.0034, "step": 6466 }, { "epoch": 380.4117647058824, "grad_norm": 0.21721291542053223, "learning_rate": 1.2551231403704734e-07, "loss": 0.0039, "step": 6467 }, { "epoch": 380.47058823529414, "grad_norm": 0.15465609729290009, "learning_rate": 1.2476118725014085e-07, "loss": 0.0021, "step": 6468 }, { "epoch": 380.52941176470586, "grad_norm": 0.6479480266571045, "learning_rate": 1.2401230065940407e-07, "loss": 0.0079, "step": 6469 }, { "epoch": 380.5882352941176, "grad_norm": 0.20689204335212708, "learning_rate": 1.2326565443471994e-07, "loss": 0.0045, "step": 6470 }, { "epoch": 380.6470588235294, "grad_norm": 0.17568853497505188, "learning_rate": 1.2252124874546522e-07, "loss": 0.0025, "step": 6471 }, { "epoch": 380.70588235294116, "grad_norm": 0.21567822992801666, "learning_rate": 1.217790837605104e-07, "loss": 0.0032, "step": 6472 }, { "epoch": 380.7647058823529, "grad_norm": 0.18304647505283356, "learning_rate": 1.2103915964821412e-07, "loss": 0.0023, "step": 6473 }, { "epoch": 380.8235294117647, "grad_norm": 0.33521905541419983, "learning_rate": 1.2030147657642876e-07, "loss": 0.0064, "step": 6474 }, { "epoch": 380.88235294117646, "grad_norm": 0.33405429124832153, "learning_rate": 1.1956603471249494e-07, "loss": 0.0032, "step": 6475 }, { "epoch": 380.94117647058823, "grad_norm": 0.15391986072063446, "learning_rate": 1.1883283422325142e-07, "loss": 0.0026, "step": 6476 }, { "epoch": 381.0, "grad_norm": 0.15807589888572693, "learning_rate": 1.1810187527502182e-07, "loss": 0.0021, "step": 6477 }, { "epoch": 381.05882352941177, "grad_norm": 0.6058407425880432, "learning_rate": 1.173731580336257e-07, "loss": 0.0058, "step": 6478 }, { "epoch": 381.11764705882354, "grad_norm": 0.16389711201190948, "learning_rate": 1.1664668266437084e-07, "loss": 0.0027, "step": 6479 }, { "epoch": 381.1764705882353, "grad_norm": 0.13757048547267914, "learning_rate": 1.1592244933205988e-07, "loss": 0.002, "step": 6480 }, { "epoch": 381.2352941176471, "grad_norm": 0.5461962223052979, "learning_rate": 1.1520045820098246e-07, "loss": 0.0068, "step": 6481 }, { "epoch": 381.29411764705884, "grad_norm": 0.21608223021030426, "learning_rate": 1.1448070943492428e-07, "loss": 0.003, "step": 6482 }, { "epoch": 381.3529411764706, "grad_norm": 0.778340756893158, "learning_rate": 1.1376320319716028e-07, "loss": 0.0039, "step": 6483 }, { "epoch": 381.4117647058824, "grad_norm": 0.4250411093235016, "learning_rate": 1.130479396504558e-07, "loss": 0.0048, "step": 6484 }, { "epoch": 381.47058823529414, "grad_norm": 0.3058023750782013, "learning_rate": 1.1233491895706772e-07, "loss": 0.005, "step": 6485 }, { "epoch": 381.52941176470586, "grad_norm": 0.13731884956359863, "learning_rate": 1.1162414127874555e-07, "loss": 0.0028, "step": 6486 }, { "epoch": 381.5882352941176, "grad_norm": 0.20270493626594543, "learning_rate": 1.1091560677672919e-07, "loss": 0.0037, "step": 6487 }, { "epoch": 381.6470588235294, "grad_norm": 0.22017982602119446, "learning_rate": 1.1020931561174897e-07, "loss": 0.003, "step": 6488 }, { "epoch": 381.70588235294116, "grad_norm": 0.2673662602901459, "learning_rate": 1.0950526794402783e-07, "loss": 0.0026, "step": 6489 }, { "epoch": 381.7647058823529, "grad_norm": 0.7276962399482727, "learning_rate": 1.0880346393327801e-07, "loss": 0.0042, "step": 6490 }, { "epoch": 381.8235294117647, "grad_norm": 0.12482193112373352, "learning_rate": 1.0810390373870328e-07, "loss": 0.0021, "step": 6491 }, { "epoch": 381.88235294117646, "grad_norm": 0.11063823848962784, "learning_rate": 1.0740658751899891e-07, "loss": 0.0018, "step": 6492 }, { "epoch": 381.94117647058823, "grad_norm": 0.2486935704946518, "learning_rate": 1.0671151543235281e-07, "loss": 0.0024, "step": 6493 }, { "epoch": 382.0, "grad_norm": 0.3496301770210266, "learning_rate": 1.0601868763643997e-07, "loss": 0.0036, "step": 6494 }, { "epoch": 382.05882352941177, "grad_norm": 0.1427466869354248, "learning_rate": 1.053281042884291e-07, "loss": 0.0019, "step": 6495 }, { "epoch": 382.11764705882354, "grad_norm": 0.2540222108364105, "learning_rate": 1.0463976554497823e-07, "loss": 0.0037, "step": 6496 }, { "epoch": 382.1764705882353, "grad_norm": 0.2652595341205597, "learning_rate": 1.039536715622369e-07, "loss": 0.0052, "step": 6497 }, { "epoch": 382.2352941176471, "grad_norm": 0.27415546774864197, "learning_rate": 1.0326982249584617e-07, "loss": 0.0056, "step": 6498 }, { "epoch": 382.29411764705884, "grad_norm": 0.27131885290145874, "learning_rate": 1.0258821850093636e-07, "loss": 0.0024, "step": 6499 }, { "epoch": 382.3529411764706, "grad_norm": 0.2477731853723526, "learning_rate": 1.019088597321305e-07, "loss": 0.0026, "step": 6500 }, { "epoch": 382.4117647058824, "grad_norm": 0.320145845413208, "learning_rate": 1.0123174634353861e-07, "loss": 0.0047, "step": 6501 }, { "epoch": 382.47058823529414, "grad_norm": 0.2811915874481201, "learning_rate": 1.0055687848876672e-07, "loss": 0.0023, "step": 6502 }, { "epoch": 382.52941176470586, "grad_norm": 0.3446402847766876, "learning_rate": 9.988425632090571e-08, "loss": 0.0035, "step": 6503 }, { "epoch": 382.5882352941176, "grad_norm": 0.23271416127681732, "learning_rate": 9.921387999254128e-08, "loss": 0.0032, "step": 6504 }, { "epoch": 382.6470588235294, "grad_norm": 0.2976870834827423, "learning_rate": 9.854574965574848e-08, "loss": 0.0048, "step": 6505 }, { "epoch": 382.70588235294116, "grad_norm": 0.21933609247207642, "learning_rate": 9.787986546209161e-08, "loss": 0.0021, "step": 6506 }, { "epoch": 382.7647058823529, "grad_norm": 0.5263001918792725, "learning_rate": 9.72162275626265e-08, "loss": 0.0029, "step": 6507 }, { "epoch": 382.8235294117647, "grad_norm": 0.38101866841316223, "learning_rate": 9.65548361079005e-08, "loss": 0.0058, "step": 6508 }, { "epoch": 382.88235294117646, "grad_norm": 0.2176773101091385, "learning_rate": 9.589569124794918e-08, "loss": 0.0045, "step": 6509 }, { "epoch": 382.94117647058823, "grad_norm": 0.25646117329597473, "learning_rate": 9.523879313229845e-08, "loss": 0.0031, "step": 6510 }, { "epoch": 383.0, "grad_norm": 0.6563746333122253, "learning_rate": 9.45841419099669e-08, "loss": 0.0033, "step": 6511 }, { "epoch": 383.05882352941177, "grad_norm": 0.2122146636247635, "learning_rate": 9.393173772946129e-08, "loss": 0.0032, "step": 6512 }, { "epoch": 383.11764705882354, "grad_norm": 0.23111893236637115, "learning_rate": 9.328158073878102e-08, "loss": 0.003, "step": 6513 }, { "epoch": 383.1764705882353, "grad_norm": 0.37241238355636597, "learning_rate": 9.263367108541143e-08, "loss": 0.0052, "step": 6514 }, { "epoch": 383.2352941176471, "grad_norm": 0.207525372505188, "learning_rate": 9.198800891633275e-08, "loss": 0.0027, "step": 6515 }, { "epoch": 383.29411764705884, "grad_norm": 0.3108038604259491, "learning_rate": 9.134459437801225e-08, "loss": 0.003, "step": 6516 }, { "epoch": 383.3529411764706, "grad_norm": 0.24058324098587036, "learning_rate": 9.070342761640982e-08, "loss": 0.003, "step": 6517 }, { "epoch": 383.4117647058824, "grad_norm": 0.1756620705127716, "learning_rate": 9.006450877697248e-08, "loss": 0.0017, "step": 6518 }, { "epoch": 383.47058823529414, "grad_norm": 0.39686357975006104, "learning_rate": 8.942783800463983e-08, "loss": 0.0034, "step": 6519 }, { "epoch": 383.52941176470586, "grad_norm": 0.30985310673713684, "learning_rate": 8.87934154438408e-08, "loss": 0.0056, "step": 6520 }, { "epoch": 383.5882352941176, "grad_norm": 0.5275324583053589, "learning_rate": 8.816124123849245e-08, "loss": 0.0047, "step": 6521 }, { "epoch": 383.6470588235294, "grad_norm": 0.32034996151924133, "learning_rate": 8.753131553200344e-08, "loss": 0.0023, "step": 6522 }, { "epoch": 383.70588235294116, "grad_norm": 0.20111723244190216, "learning_rate": 8.69036384672739e-08, "loss": 0.0018, "step": 6523 }, { "epoch": 383.7647058823529, "grad_norm": 0.3233012557029724, "learning_rate": 8.627821018669213e-08, "loss": 0.0031, "step": 6524 }, { "epoch": 383.8235294117647, "grad_norm": 0.23857425153255463, "learning_rate": 8.565503083213578e-08, "loss": 0.0032, "step": 6525 }, { "epoch": 383.88235294117646, "grad_norm": 0.3029640018939972, "learning_rate": 8.503410054497286e-08, "loss": 0.0041, "step": 6526 }, { "epoch": 383.94117647058823, "grad_norm": 0.28894278407096863, "learning_rate": 8.441541946606069e-08, "loss": 0.0022, "step": 6527 }, { "epoch": 384.0, "grad_norm": 0.2405901700258255, "learning_rate": 8.379898773574924e-08, "loss": 0.0055, "step": 6528 }, { "epoch": 384.05882352941177, "grad_norm": 0.12158321589231491, "learning_rate": 8.318480549387331e-08, "loss": 0.0019, "step": 6529 }, { "epoch": 384.11764705882354, "grad_norm": 0.1726987063884735, "learning_rate": 8.257287287976146e-08, "loss": 0.0026, "step": 6530 }, { "epoch": 384.1764705882353, "grad_norm": 0.214524045586586, "learning_rate": 8.19631900322293e-08, "loss": 0.0031, "step": 6531 }, { "epoch": 384.2352941176471, "grad_norm": 0.2413085252046585, "learning_rate": 8.135575708958621e-08, "loss": 0.0031, "step": 6532 }, { "epoch": 384.29411764705884, "grad_norm": 0.30965402722358704, "learning_rate": 8.075057418962418e-08, "loss": 0.0037, "step": 6533 }, { "epoch": 384.3529411764706, "grad_norm": 0.3746213912963867, "learning_rate": 8.014764146963116e-08, "loss": 0.0053, "step": 6534 }, { "epoch": 384.4117647058824, "grad_norm": 0.3098808825016022, "learning_rate": 7.954695906638332e-08, "loss": 0.0076, "step": 6535 }, { "epoch": 384.47058823529414, "grad_norm": 0.24782855808734894, "learning_rate": 7.894852711614275e-08, "loss": 0.0028, "step": 6536 }, { "epoch": 384.52941176470586, "grad_norm": 0.11522267013788223, "learning_rate": 7.835234575466643e-08, "loss": 0.0017, "step": 6537 }, { "epoch": 384.5882352941176, "grad_norm": 0.34919244050979614, "learning_rate": 7.775841511719507e-08, "loss": 0.0048, "step": 6538 }, { "epoch": 384.6470588235294, "grad_norm": 0.27058306336402893, "learning_rate": 7.71667353384642e-08, "loss": 0.0038, "step": 6539 }, { "epoch": 384.70588235294116, "grad_norm": 0.3775753378868103, "learning_rate": 7.657730655269424e-08, "loss": 0.0048, "step": 6540 }, { "epoch": 384.7647058823529, "grad_norm": 3.7373390197753906, "learning_rate": 7.599012889359825e-08, "loss": 0.0069, "step": 6541 }, { "epoch": 384.8235294117647, "grad_norm": 0.4875966012477875, "learning_rate": 7.540520249437743e-08, "loss": 0.0064, "step": 6542 }, { "epoch": 384.88235294117646, "grad_norm": 0.2627912163734436, "learning_rate": 7.482252748772234e-08, "loss": 0.0019, "step": 6543 }, { "epoch": 384.94117647058823, "grad_norm": 0.16226841509342194, "learning_rate": 7.424210400581167e-08, "loss": 0.0022, "step": 6544 }, { "epoch": 385.0, "grad_norm": 0.16228324174880981, "learning_rate": 7.366393218031564e-08, "loss": 0.002, "step": 6545 }, { "epoch": 385.05882352941177, "grad_norm": 0.20531724393367767, "learning_rate": 7.308801214239159e-08, "loss": 0.0028, "step": 6546 }, { "epoch": 385.11764705882354, "grad_norm": 0.2869636118412018, "learning_rate": 7.25143440226872e-08, "loss": 0.0039, "step": 6547 }, { "epoch": 385.1764705882353, "grad_norm": 0.33193016052246094, "learning_rate": 7.19429279513384e-08, "loss": 0.0034, "step": 6548 }, { "epoch": 385.2352941176471, "grad_norm": 0.14534445106983185, "learning_rate": 7.137376405797147e-08, "loss": 0.0022, "step": 6549 }, { "epoch": 385.29411764705884, "grad_norm": 0.2726057469844818, "learning_rate": 7.080685247170093e-08, "loss": 0.0043, "step": 6550 }, { "epoch": 385.3529411764706, "grad_norm": 0.22378452122211456, "learning_rate": 7.024219332113059e-08, "loss": 0.0036, "step": 6551 }, { "epoch": 385.4117647058824, "grad_norm": 0.16403713822364807, "learning_rate": 6.96797867343535e-08, "loss": 0.0016, "step": 6552 }, { "epoch": 385.47058823529414, "grad_norm": 0.1325124204158783, "learning_rate": 6.911963283895096e-08, "loss": 0.0022, "step": 6553 }, { "epoch": 385.52941176470586, "grad_norm": 0.2819448709487915, "learning_rate": 6.856173176199466e-08, "loss": 0.0071, "step": 6554 }, { "epoch": 385.5882352941176, "grad_norm": 0.3384711742401123, "learning_rate": 6.800608363004335e-08, "loss": 0.0059, "step": 6555 }, { "epoch": 385.6470588235294, "grad_norm": 0.19213518500328064, "learning_rate": 6.745268856914622e-08, "loss": 0.0027, "step": 6556 }, { "epoch": 385.70588235294116, "grad_norm": 0.19334352016448975, "learning_rate": 6.690154670484062e-08, "loss": 0.002, "step": 6557 }, { "epoch": 385.7647058823529, "grad_norm": 0.32753273844718933, "learning_rate": 6.63526581621532e-08, "loss": 0.0035, "step": 6558 }, { "epoch": 385.8235294117647, "grad_norm": 0.19373644888401031, "learning_rate": 6.580602306559991e-08, "loss": 0.0018, "step": 6559 }, { "epoch": 385.88235294117646, "grad_norm": 0.5667992234230042, "learning_rate": 6.526164153918269e-08, "loss": 0.0072, "step": 6560 }, { "epoch": 385.94117647058823, "grad_norm": 0.24855834245681763, "learning_rate": 6.471951370639718e-08, "loss": 0.0044, "step": 6561 }, { "epoch": 386.0, "grad_norm": 1.0238171815872192, "learning_rate": 6.417963969022389e-08, "loss": 0.0042, "step": 6562 }, { "epoch": 386.05882352941177, "grad_norm": 0.1319715976715088, "learning_rate": 6.364201961313155e-08, "loss": 0.0019, "step": 6563 }, { "epoch": 386.11764705882354, "grad_norm": 0.2544462978839874, "learning_rate": 6.310665359708035e-08, "loss": 0.0046, "step": 6564 }, { "epoch": 386.1764705882353, "grad_norm": 0.33473965525627136, "learning_rate": 6.257354176351982e-08, "loss": 0.0029, "step": 6565 }, { "epoch": 386.2352941176471, "grad_norm": 0.8254721164703369, "learning_rate": 6.204268423338323e-08, "loss": 0.0045, "step": 6566 }, { "epoch": 386.29411764705884, "grad_norm": 0.3078450560569763, "learning_rate": 6.151408112709755e-08, "loss": 0.0038, "step": 6567 }, { "epoch": 386.3529411764706, "grad_norm": 0.19548293948173523, "learning_rate": 6.098773256457468e-08, "loss": 0.0034, "step": 6568 }, { "epoch": 386.4117647058824, "grad_norm": 0.5808397531509399, "learning_rate": 6.046363866521909e-08, "loss": 0.0064, "step": 6569 }, { "epoch": 386.47058823529414, "grad_norm": 0.35367846488952637, "learning_rate": 5.9941799547919e-08, "loss": 0.0065, "step": 6570 }, { "epoch": 386.52941176470586, "grad_norm": 0.18332640826702118, "learning_rate": 5.942221533105419e-08, "loss": 0.0041, "step": 6571 }, { "epoch": 386.5882352941176, "grad_norm": 0.41235068440437317, "learning_rate": 5.890488613249257e-08, "loss": 0.0046, "step": 6572 }, { "epoch": 386.6470588235294, "grad_norm": 0.14230352640151978, "learning_rate": 5.838981206959027e-08, "loss": 0.0022, "step": 6573 }, { "epoch": 386.70588235294116, "grad_norm": 0.24585308134555817, "learning_rate": 5.78769932591916e-08, "loss": 0.0032, "step": 6574 }, { "epoch": 386.7647058823529, "grad_norm": 0.14780229330062866, "learning_rate": 5.7366429817630184e-08, "loss": 0.0022, "step": 6575 }, { "epoch": 386.8235294117647, "grad_norm": 0.5093813538551331, "learning_rate": 5.685812186072559e-08, "loss": 0.0072, "step": 6576 }, { "epoch": 386.88235294117646, "grad_norm": 0.1954745352268219, "learning_rate": 5.635206950378891e-08, "loss": 0.0022, "step": 6577 }, { "epoch": 386.94117647058823, "grad_norm": 0.10785586386919022, "learning_rate": 5.5848272861616094e-08, "loss": 0.0018, "step": 6578 }, { "epoch": 387.0, "grad_norm": 0.2583400309085846, "learning_rate": 5.534673204849572e-08, "loss": 0.0023, "step": 6579 }, { "epoch": 387.05882352941177, "grad_norm": 0.29883766174316406, "learning_rate": 5.484744717820123e-08, "loss": 0.0031, "step": 6580 }, { "epoch": 387.11764705882354, "grad_norm": 0.21984022855758667, "learning_rate": 5.435041836399535e-08, "loss": 0.0036, "step": 6581 }, { "epoch": 387.1764705882353, "grad_norm": 0.5874029397964478, "learning_rate": 5.385564571862789e-08, "loss": 0.0041, "step": 6582 }, { "epoch": 387.2352941176471, "grad_norm": 0.4036409556865692, "learning_rate": 5.336312935433907e-08, "loss": 0.0045, "step": 6583 }, { "epoch": 387.29411764705884, "grad_norm": 0.2767336666584015, "learning_rate": 5.287286938285729e-08, "loss": 0.0037, "step": 6584 }, { "epoch": 387.3529411764706, "grad_norm": 0.14905482530593872, "learning_rate": 5.238486591539471e-08, "loss": 0.0018, "step": 6585 }, { "epoch": 387.4117647058824, "grad_norm": 0.4045553207397461, "learning_rate": 5.189911906265832e-08, "loss": 0.0036, "step": 6586 }, { "epoch": 387.47058823529414, "grad_norm": 0.24051249027252197, "learning_rate": 5.1415628934837755e-08, "loss": 0.0032, "step": 6587 }, { "epoch": 387.52941176470586, "grad_norm": 0.3113757371902466, "learning_rate": 5.0934395641611954e-08, "loss": 0.0066, "step": 6588 }, { "epoch": 387.5882352941176, "grad_norm": 0.1755220741033554, "learning_rate": 5.0455419292150255e-08, "loss": 0.0029, "step": 6589 }, { "epoch": 387.6470588235294, "grad_norm": 0.16344702243804932, "learning_rate": 4.997869999510796e-08, "loss": 0.0033, "step": 6590 }, { "epoch": 387.70588235294116, "grad_norm": 0.37032562494277954, "learning_rate": 4.950423785862857e-08, "loss": 0.0061, "step": 6591 }, { "epoch": 387.7647058823529, "grad_norm": 0.08859598636627197, "learning_rate": 4.903203299034376e-08, "loss": 0.0012, "step": 6592 }, { "epoch": 387.8235294117647, "grad_norm": 0.2563002407550812, "learning_rate": 4.8562085497373405e-08, "loss": 0.0036, "step": 6593 }, { "epoch": 387.88235294117646, "grad_norm": 0.14916886389255524, "learning_rate": 4.809439548632333e-08, "loss": 0.0026, "step": 6594 }, { "epoch": 387.94117647058823, "grad_norm": 0.47875359654426575, "learning_rate": 4.762896306329201e-08, "loss": 0.0057, "step": 6595 }, { "epoch": 388.0, "grad_norm": 0.6434005498886108, "learning_rate": 4.716578833386054e-08, "loss": 0.0028, "step": 6596 }, { "epoch": 388.05882352941177, "grad_norm": 0.3577626049518585, "learning_rate": 4.6704871403099316e-08, "loss": 0.0041, "step": 6597 }, { "epoch": 388.11764705882354, "grad_norm": 0.30477213859558105, "learning_rate": 4.624621237557026e-08, "loss": 0.0055, "step": 6598 }, { "epoch": 388.1764705882353, "grad_norm": 0.22676950693130493, "learning_rate": 4.578981135531679e-08, "loss": 0.0034, "step": 6599 }, { "epoch": 388.2352941176471, "grad_norm": 0.7765980362892151, "learning_rate": 4.5335668445876115e-08, "loss": 0.0034, "step": 6600 }, { "epoch": 388.29411764705884, "grad_norm": 0.25459522008895874, "learning_rate": 4.488378375026803e-08, "loss": 0.0023, "step": 6601 }, { "epoch": 388.3529411764706, "grad_norm": 0.23207752406597137, "learning_rate": 4.4434157371004983e-08, "loss": 0.0034, "step": 6602 }, { "epoch": 388.4117647058824, "grad_norm": 0.7636719942092896, "learning_rate": 4.398678941008205e-08, "loss": 0.0049, "step": 6603 }, { "epoch": 388.47058823529414, "grad_norm": 0.1676653027534485, "learning_rate": 4.354167996898584e-08, "loss": 0.0019, "step": 6604 }, { "epoch": 388.52941176470586, "grad_norm": 0.3176555335521698, "learning_rate": 4.309882914869001e-08, "loss": 0.0053, "step": 6605 }, { "epoch": 388.5882352941176, "grad_norm": 0.46698370575904846, "learning_rate": 4.2658237049655325e-08, "loss": 0.0057, "step": 6606 }, { "epoch": 388.6470588235294, "grad_norm": 0.21387679874897003, "learning_rate": 4.2219903771828494e-08, "loss": 0.0024, "step": 6607 }, { "epoch": 388.70588235294116, "grad_norm": 0.12456576526165009, "learning_rate": 4.178382941464554e-08, "loss": 0.002, "step": 6608 }, { "epoch": 388.7647058823529, "grad_norm": 0.4762243628501892, "learning_rate": 4.135001407703065e-08, "loss": 0.0052, "step": 6609 }, { "epoch": 388.8235294117647, "grad_norm": 0.22715339064598083, "learning_rate": 4.0918457857395124e-08, "loss": 0.0037, "step": 6610 }, { "epoch": 388.88235294117646, "grad_norm": 0.45582497119903564, "learning_rate": 4.048916085363619e-08, "loss": 0.0032, "step": 6611 }, { "epoch": 388.94117647058823, "grad_norm": 0.2718805968761444, "learning_rate": 4.006212316314151e-08, "loss": 0.0045, "step": 6612 }, { "epoch": 389.0, "grad_norm": 0.16197039186954498, "learning_rate": 3.963734488278248e-08, "loss": 0.0025, "step": 6613 }, { "epoch": 389.05882352941177, "grad_norm": 0.21799814701080322, "learning_rate": 3.9214826108920915e-08, "loss": 0.0025, "step": 6614 }, { "epoch": 389.11764705882354, "grad_norm": 0.26386964321136475, "learning_rate": 3.879456693740569e-08, "loss": 0.0022, "step": 6615 }, { "epoch": 389.1764705882353, "grad_norm": 0.21807633340358734, "learning_rate": 3.8376567463571654e-08, "loss": 0.0046, "step": 6616 }, { "epoch": 389.2352941176471, "grad_norm": 0.24579133093357086, "learning_rate": 3.796082778224186e-08, "loss": 0.0049, "step": 6617 }, { "epoch": 389.29411764705884, "grad_norm": 0.30034881830215454, "learning_rate": 3.754734798772752e-08, "loss": 0.0027, "step": 6618 }, { "epoch": 389.3529411764706, "grad_norm": 0.26156529784202576, "learning_rate": 3.7136128173825835e-08, "loss": 0.0031, "step": 6619 }, { "epoch": 389.4117647058824, "grad_norm": 0.24100762605667114, "learning_rate": 3.672716843382218e-08, "loss": 0.0028, "step": 6620 }, { "epoch": 389.47058823529414, "grad_norm": 0.3517746925354004, "learning_rate": 3.6320468860490124e-08, "loss": 0.0059, "step": 6621 }, { "epoch": 389.52941176470586, "grad_norm": 0.20724648237228394, "learning_rate": 3.591602954608697e-08, "loss": 0.0032, "step": 6622 }, { "epoch": 389.5882352941176, "grad_norm": 0.14616946876049042, "learning_rate": 3.551385058236156e-08, "loss": 0.0023, "step": 6623 }, { "epoch": 389.6470588235294, "grad_norm": 0.26472577452659607, "learning_rate": 3.511393206054759e-08, "loss": 0.0032, "step": 6624 }, { "epoch": 389.70588235294116, "grad_norm": 0.2831006348133087, "learning_rate": 3.4716274071365794e-08, "loss": 0.0053, "step": 6625 }, { "epoch": 389.7647058823529, "grad_norm": 0.32689228653907776, "learning_rate": 3.4320876705026266e-08, "loss": 0.0025, "step": 6626 }, { "epoch": 389.8235294117647, "grad_norm": 0.20634938776493073, "learning_rate": 3.39277400512239e-08, "loss": 0.0026, "step": 6627 }, { "epoch": 389.88235294117646, "grad_norm": 0.14961227774620056, "learning_rate": 3.3536864199141814e-08, "loss": 0.0023, "step": 6628 }, { "epoch": 389.94117647058823, "grad_norm": 0.13271024823188782, "learning_rate": 3.314824923745019e-08, "loss": 0.002, "step": 6629 }, { "epoch": 390.0, "grad_norm": 0.36403533816337585, "learning_rate": 3.2761895254306285e-08, "loss": 0.0052, "step": 6630 }, { "epoch": 390.05882352941177, "grad_norm": 0.2420634627342224, "learning_rate": 3.237780233735443e-08, "loss": 0.0033, "step": 6631 }, { "epoch": 390.11764705882354, "grad_norm": 0.415082722902298, "learning_rate": 3.1995970573726055e-08, "loss": 0.0075, "step": 6632 }, { "epoch": 390.1764705882353, "grad_norm": 0.2370632141828537, "learning_rate": 3.161640005003852e-08, "loss": 0.0026, "step": 6633 }, { "epoch": 390.2352941176471, "grad_norm": 0.5742368102073669, "learning_rate": 3.1239090852399624e-08, "loss": 0.0056, "step": 6634 }, { "epoch": 390.29411764705884, "grad_norm": 0.334983229637146, "learning_rate": 3.086404306639979e-08, "loss": 0.0039, "step": 6635 }, { "epoch": 390.3529411764706, "grad_norm": 0.3011443316936493, "learning_rate": 3.049125677711984e-08, "loss": 0.0037, "step": 6636 }, { "epoch": 390.4117647058824, "grad_norm": 0.22156015038490295, "learning_rate": 3.012073206912658e-08, "loss": 0.0032, "step": 6637 }, { "epoch": 390.47058823529414, "grad_norm": 0.10626644641160965, "learning_rate": 2.9752469026471664e-08, "loss": 0.0014, "step": 6638 }, { "epoch": 390.52941176470586, "grad_norm": 0.3877543807029724, "learning_rate": 2.9386467732697154e-08, "loss": 0.0029, "step": 6639 }, { "epoch": 390.5882352941176, "grad_norm": 0.4597057104110718, "learning_rate": 2.902272827082886e-08, "loss": 0.0055, "step": 6640 }, { "epoch": 390.6470588235294, "grad_norm": 0.15954828262329102, "learning_rate": 2.8661250723382993e-08, "loss": 0.0037, "step": 6641 }, { "epoch": 390.70588235294116, "grad_norm": 0.24625030159950256, "learning_rate": 2.8302035172358412e-08, "loss": 0.0037, "step": 6642 }, { "epoch": 390.7647058823529, "grad_norm": 0.21632049977779388, "learning_rate": 2.794508169924548e-08, "loss": 0.0021, "step": 6643 }, { "epoch": 390.8235294117647, "grad_norm": 0.18843716382980347, "learning_rate": 2.7590390385017208e-08, "loss": 0.0031, "step": 6644 }, { "epoch": 390.88235294117646, "grad_norm": 0.23149161040782928, "learning_rate": 2.7237961310135895e-08, "loss": 0.003, "step": 6645 }, { "epoch": 390.94117647058823, "grad_norm": 0.326143354177475, "learning_rate": 2.6887794554549816e-08, "loss": 0.0035, "step": 6646 }, { "epoch": 391.0, "grad_norm": 0.28393444418907166, "learning_rate": 2.6539890197695428e-08, "loss": 0.0034, "step": 6647 }, { "epoch": 391.05882352941177, "grad_norm": 0.13046112656593323, "learning_rate": 2.619424831849293e-08, "loss": 0.0016, "step": 6648 }, { "epoch": 391.11764705882354, "grad_norm": 0.26603254675865173, "learning_rate": 2.585086899535183e-08, "loss": 0.0041, "step": 6649 }, { "epoch": 391.1764705882353, "grad_norm": 0.2998616695404053, "learning_rate": 2.55097523061687e-08, "loss": 0.004, "step": 6650 }, { "epoch": 391.2352941176471, "grad_norm": 0.4149789810180664, "learning_rate": 2.5170898328324977e-08, "loss": 0.0025, "step": 6651 }, { "epoch": 391.29411764705884, "grad_norm": 0.25973403453826904, "learning_rate": 2.4834307138688064e-08, "loss": 0.0036, "step": 6652 }, { "epoch": 391.3529411764706, "grad_norm": 0.43776923418045044, "learning_rate": 2.4499978813616876e-08, "loss": 0.006, "step": 6653 }, { "epoch": 391.4117647058824, "grad_norm": 0.21941067278385162, "learning_rate": 2.416791342895075e-08, "loss": 0.0036, "step": 6654 }, { "epoch": 391.47058823529414, "grad_norm": 0.3336864709854126, "learning_rate": 2.383811106002054e-08, "loss": 0.0043, "step": 6655 }, { "epoch": 391.52941176470586, "grad_norm": 0.17124561965465546, "learning_rate": 2.3510571781640846e-08, "loss": 0.002, "step": 6656 }, { "epoch": 391.5882352941176, "grad_norm": 0.18231970071792603, "learning_rate": 2.3185295668113338e-08, "loss": 0.0032, "step": 6657 }, { "epoch": 391.6470588235294, "grad_norm": 0.278217077255249, "learning_rate": 2.2862282793228995e-08, "loss": 0.0041, "step": 6658 }, { "epoch": 391.70588235294116, "grad_norm": 0.4596138894557953, "learning_rate": 2.2541533230262536e-08, "loss": 0.0051, "step": 6659 }, { "epoch": 391.7647058823529, "grad_norm": 0.16931100189685822, "learning_rate": 2.2223047051973533e-08, "loss": 0.0035, "step": 6660 }, { "epoch": 391.8235294117647, "grad_norm": 0.24038180708885193, "learning_rate": 2.1906824330613085e-08, "loss": 0.0028, "step": 6661 }, { "epoch": 391.88235294117646, "grad_norm": 0.17248840630054474, "learning_rate": 2.159286513791603e-08, "loss": 0.0026, "step": 6662 }, { "epoch": 391.94117647058823, "grad_norm": 0.23532627522945404, "learning_rate": 2.1281169545103174e-08, "loss": 0.0022, "step": 6663 }, { "epoch": 392.0, "grad_norm": 0.2923179268836975, "learning_rate": 2.0971737622883515e-08, "loss": 0.0036, "step": 6664 }, { "epoch": 392.05882352941177, "grad_norm": 0.38638433814048767, "learning_rate": 2.0664569441449787e-08, "loss": 0.0046, "step": 6665 }, { "epoch": 392.11764705882354, "grad_norm": 0.29097431898117065, "learning_rate": 2.0359665070486258e-08, "loss": 0.0034, "step": 6666 }, { "epoch": 392.1764705882353, "grad_norm": 0.38852155208587646, "learning_rate": 2.005702457915648e-08, "loss": 0.0038, "step": 6667 }, { "epoch": 392.2352941176471, "grad_norm": 0.16653668880462646, "learning_rate": 1.9756648036116654e-08, "loss": 0.0027, "step": 6668 }, { "epoch": 392.29411764705884, "grad_norm": 0.21363218128681183, "learning_rate": 1.9458535509507824e-08, "loss": 0.0025, "step": 6669 }, { "epoch": 392.3529411764706, "grad_norm": 0.46291446685791016, "learning_rate": 1.916268706695479e-08, "loss": 0.0046, "step": 6670 }, { "epoch": 392.4117647058824, "grad_norm": 0.4243676960468292, "learning_rate": 1.8869102775572746e-08, "loss": 0.0046, "step": 6671 }, { "epoch": 392.47058823529414, "grad_norm": 0.3417537808418274, "learning_rate": 1.8577782701959536e-08, "loss": 0.0051, "step": 6672 }, { "epoch": 392.52941176470586, "grad_norm": 0.2602797746658325, "learning_rate": 1.8288726912201182e-08, "loss": 0.0035, "step": 6673 }, { "epoch": 392.5882352941176, "grad_norm": 0.3108898997306824, "learning_rate": 1.800193547187079e-08, "loss": 0.0041, "step": 6674 }, { "epoch": 392.6470588235294, "grad_norm": 0.2853691279888153, "learning_rate": 1.771740844602743e-08, "loss": 0.0031, "step": 6675 }, { "epoch": 392.70588235294116, "grad_norm": 0.28331199288368225, "learning_rate": 1.743514589921502e-08, "loss": 0.0042, "step": 6676 }, { "epoch": 392.7647058823529, "grad_norm": 0.18021585047245026, "learning_rate": 1.7155147895464575e-08, "loss": 0.0039, "step": 6677 }, { "epoch": 392.8235294117647, "grad_norm": 0.24885514378547668, "learning_rate": 1.6877414498294166e-08, "loss": 0.0025, "step": 6678 }, { "epoch": 392.88235294117646, "grad_norm": 0.23901033401489258, "learning_rate": 1.660194577070673e-08, "loss": 0.0029, "step": 6679 }, { "epoch": 392.94117647058823, "grad_norm": 0.17458376288414001, "learning_rate": 1.6328741775194502e-08, "loss": 0.003, "step": 6680 }, { "epoch": 393.0, "grad_norm": 0.2322518676519394, "learning_rate": 1.605780257373124e-08, "loss": 0.0031, "step": 6681 }, { "epoch": 393.05882352941177, "grad_norm": 0.2164183408021927, "learning_rate": 1.5789128227780005e-08, "loss": 0.0042, "step": 6682 }, { "epoch": 393.11764705882354, "grad_norm": 0.26939550042152405, "learning_rate": 1.5522718798290925e-08, "loss": 0.003, "step": 6683 }, { "epoch": 393.1764705882353, "grad_norm": 0.23573823273181915, "learning_rate": 1.5258574345696775e-08, "loss": 0.0021, "step": 6684 }, { "epoch": 393.2352941176471, "grad_norm": 0.3712824881076813, "learning_rate": 1.499669492992073e-08, "loss": 0.0033, "step": 6685 }, { "epoch": 393.29411764705884, "grad_norm": 0.29028207063674927, "learning_rate": 1.4737080610368604e-08, "loss": 0.003, "step": 6686 }, { "epoch": 393.3529411764706, "grad_norm": 0.1042868047952652, "learning_rate": 1.4479731445935508e-08, "loss": 0.0016, "step": 6687 }, { "epoch": 393.4117647058824, "grad_norm": 0.3367651402950287, "learning_rate": 1.4224647494999189e-08, "loss": 0.0028, "step": 6688 }, { "epoch": 393.47058823529414, "grad_norm": 0.4716337025165558, "learning_rate": 1.3971828815426691e-08, "loss": 0.0073, "step": 6689 }, { "epoch": 393.52941176470586, "grad_norm": 0.3491489291191101, "learning_rate": 1.3721275464568806e-08, "loss": 0.0057, "step": 6690 }, { "epoch": 393.5882352941176, "grad_norm": 0.3798167109489441, "learning_rate": 1.3472987499264511e-08, "loss": 0.0045, "step": 6691 }, { "epoch": 393.6470588235294, "grad_norm": 0.24511422216892242, "learning_rate": 1.3226964975837641e-08, "loss": 0.0037, "step": 6692 }, { "epoch": 393.70588235294116, "grad_norm": 0.2853400409221649, "learning_rate": 1.2983207950097998e-08, "loss": 0.0029, "step": 6693 }, { "epoch": 393.7647058823529, "grad_norm": 0.2696162760257721, "learning_rate": 1.2741716477342459e-08, "loss": 0.0041, "step": 6694 }, { "epoch": 393.8235294117647, "grad_norm": 0.3156551420688629, "learning_rate": 1.2502490612352757e-08, "loss": 0.0038, "step": 6695 }, { "epoch": 393.88235294117646, "grad_norm": 0.5733904838562012, "learning_rate": 1.2265530409397708e-08, "loss": 0.002, "step": 6696 }, { "epoch": 393.94117647058823, "grad_norm": 0.15032097697257996, "learning_rate": 1.2030835922229866e-08, "loss": 0.0021, "step": 6697 }, { "epoch": 394.0, "grad_norm": 0.42292889952659607, "learning_rate": 1.179840720409331e-08, "loss": 0.0061, "step": 6698 }, { "epoch": 394.05882352941177, "grad_norm": 0.23729254305362701, "learning_rate": 1.1568244307710308e-08, "loss": 0.0036, "step": 6699 }, { "epoch": 394.11764705882354, "grad_norm": 0.34904104471206665, "learning_rate": 1.134034728529687e-08, "loss": 0.0025, "step": 6700 }, { "epoch": 394.1764705882353, "grad_norm": 0.28878143429756165, "learning_rate": 1.1114716188548314e-08, "loss": 0.0048, "step": 6701 }, { "epoch": 394.2352941176471, "grad_norm": 0.09897883981466293, "learning_rate": 1.0891351068651469e-08, "loss": 0.0021, "step": 6702 }, { "epoch": 394.29411764705884, "grad_norm": 0.2952391803264618, "learning_rate": 1.0670251976275803e-08, "loss": 0.0053, "step": 6703 }, { "epoch": 394.3529411764706, "grad_norm": 0.2579192817211151, "learning_rate": 1.0451418961576754e-08, "loss": 0.0023, "step": 6704 }, { "epoch": 394.4117647058824, "grad_norm": 0.24357888102531433, "learning_rate": 1.0234852074197943e-08, "loss": 0.0024, "step": 6705 }, { "epoch": 394.47058823529414, "grad_norm": 0.19851720333099365, "learning_rate": 1.0020551363266739e-08, "loss": 0.0025, "step": 6706 }, { "epoch": 394.52941176470586, "grad_norm": 0.2545214593410492, "learning_rate": 9.80851687739648e-09, "loss": 0.0037, "step": 6707 }, { "epoch": 394.5882352941176, "grad_norm": 0.38083532452583313, "learning_rate": 9.598748664688683e-09, "loss": 0.0043, "step": 6708 }, { "epoch": 394.6470588235294, "grad_norm": 0.310821533203125, "learning_rate": 9.391246772729733e-09, "loss": 0.0041, "step": 6709 }, { "epoch": 394.70588235294116, "grad_norm": 0.19890320301055908, "learning_rate": 9.186011248588644e-09, "loss": 0.0026, "step": 6710 }, { "epoch": 394.7647058823529, "grad_norm": 0.6747136116027832, "learning_rate": 8.983042138824837e-09, "loss": 0.0064, "step": 6711 }, { "epoch": 394.8235294117647, "grad_norm": 0.29110994935035706, "learning_rate": 8.782339489482595e-09, "loss": 0.0036, "step": 6712 }, { "epoch": 394.88235294117646, "grad_norm": 0.4612199366092682, "learning_rate": 8.58390334608883e-09, "loss": 0.0039, "step": 6713 }, { "epoch": 394.94117647058823, "grad_norm": 0.5457082986831665, "learning_rate": 8.387733753661976e-09, "loss": 0.004, "step": 6714 }, { "epoch": 395.0, "grad_norm": 0.34584948420524597, "learning_rate": 8.193830756699773e-09, "loss": 0.0042, "step": 6715 }, { "epoch": 395.05882352941177, "grad_norm": 0.5505496263504028, "learning_rate": 8.002194399191476e-09, "loss": 0.0044, "step": 6716 }, { "epoch": 395.11764705882354, "grad_norm": 0.2507135570049286, "learning_rate": 7.812824724608981e-09, "loss": 0.0031, "step": 6717 }, { "epoch": 395.1764705882353, "grad_norm": 0.33168917894363403, "learning_rate": 7.625721775910144e-09, "loss": 0.0053, "step": 6718 }, { "epoch": 395.2352941176471, "grad_norm": 0.14907221496105194, "learning_rate": 7.440885595539904e-09, "loss": 0.0019, "step": 6719 }, { "epoch": 395.29411764705884, "grad_norm": 0.22808469831943512, "learning_rate": 7.2583162254280526e-09, "loss": 0.0024, "step": 6720 }, { "epoch": 395.3529411764706, "grad_norm": 0.09418592602014542, "learning_rate": 7.07801370699146e-09, "loss": 0.0013, "step": 6721 }, { "epoch": 395.4117647058824, "grad_norm": 0.330998957157135, "learning_rate": 6.899978081130742e-09, "loss": 0.0032, "step": 6722 }, { "epoch": 395.47058823529414, "grad_norm": 0.14092470705509186, "learning_rate": 6.724209388232483e-09, "loss": 0.0022, "step": 6723 }, { "epoch": 395.52941176470586, "grad_norm": 0.15282198786735535, "learning_rate": 6.550707668171452e-09, "loss": 0.0023, "step": 6724 }, { "epoch": 395.5882352941176, "grad_norm": 0.30025750398635864, "learning_rate": 6.3794729603050556e-09, "loss": 0.0041, "step": 6725 }, { "epoch": 395.6470588235294, "grad_norm": 0.291096031665802, "learning_rate": 6.210505303477776e-09, "loss": 0.0032, "step": 6726 }, { "epoch": 395.70588235294116, "grad_norm": 0.11478777229785919, "learning_rate": 6.0438047360222855e-09, "loss": 0.002, "step": 6727 }, { "epoch": 395.7647058823529, "grad_norm": 0.5004266500473022, "learning_rate": 5.879371295751668e-09, "loss": 0.0071, "step": 6728 }, { "epoch": 395.8235294117647, "grad_norm": 0.2541077435016632, "learning_rate": 5.717205019969419e-09, "loss": 0.0036, "step": 6729 }, { "epoch": 395.88235294117646, "grad_norm": 0.3402318060398102, "learning_rate": 5.557305945461667e-09, "loss": 0.0054, "step": 6730 }, { "epoch": 395.94117647058823, "grad_norm": 0.41024383902549744, "learning_rate": 5.399674108502728e-09, "loss": 0.0063, "step": 6731 }, { "epoch": 396.0, "grad_norm": 0.3597095310688019, "learning_rate": 5.2443095448506674e-09, "loss": 0.0059, "step": 6732 }, { "epoch": 396.05882352941177, "grad_norm": 0.5933746695518494, "learning_rate": 5.091212289750625e-09, "loss": 0.003, "step": 6733 }, { "epoch": 396.11764705882354, "grad_norm": 0.34368690848350525, "learning_rate": 4.940382377931485e-09, "loss": 0.0053, "step": 6734 }, { "epoch": 396.1764705882353, "grad_norm": 0.18126843869686127, "learning_rate": 4.791819843609213e-09, "loss": 0.0036, "step": 6735 }, { "epoch": 396.2352941176471, "grad_norm": 0.15870662033557892, "learning_rate": 4.645524720485739e-09, "loss": 0.0023, "step": 6736 }, { "epoch": 396.29411764705884, "grad_norm": 0.7372483015060425, "learning_rate": 4.501497041748959e-09, "loss": 0.0073, "step": 6737 }, { "epoch": 396.3529411764706, "grad_norm": 0.1909855604171753, "learning_rate": 4.359736840069406e-09, "loss": 0.0038, "step": 6738 }, { "epoch": 396.4117647058824, "grad_norm": 0.1672680675983429, "learning_rate": 4.220244147606911e-09, "loss": 0.0023, "step": 6739 }, { "epoch": 396.47058823529414, "grad_norm": 0.48524779081344604, "learning_rate": 4.0830189960050505e-09, "loss": 0.0062, "step": 6740 }, { "epoch": 396.52941176470586, "grad_norm": 0.2444634735584259, "learning_rate": 3.948061416392257e-09, "loss": 0.0026, "step": 6741 }, { "epoch": 396.5882352941176, "grad_norm": 0.11911467462778091, "learning_rate": 3.815371439385152e-09, "loss": 0.0022, "step": 6742 }, { "epoch": 396.6470588235294, "grad_norm": 0.16218000650405884, "learning_rate": 3.6849490950841007e-09, "loss": 0.0021, "step": 6743 }, { "epoch": 396.70588235294116, "grad_norm": 0.13948270678520203, "learning_rate": 3.556794413074327e-09, "loss": 0.0017, "step": 6744 }, { "epoch": 396.7647058823529, "grad_norm": 0.45385268330574036, "learning_rate": 3.4309074224292415e-09, "loss": 0.0066, "step": 6745 }, { "epoch": 396.8235294117647, "grad_norm": 0.49655261635780334, "learning_rate": 3.3072881517048907e-09, "loss": 0.0056, "step": 6746 }, { "epoch": 396.88235294117646, "grad_norm": 0.46762344241142273, "learning_rate": 3.185936628945507e-09, "loss": 0.0036, "step": 6747 }, { "epoch": 396.94117647058823, "grad_norm": 0.17641305923461914, "learning_rate": 3.066852881679072e-09, "loss": 0.002, "step": 6748 }, { "epoch": 397.0, "grad_norm": 0.31803077459335327, "learning_rate": 2.9500369369195313e-09, "loss": 0.0037, "step": 6749 }, { "epoch": 397.05882352941177, "grad_norm": 0.3119764029979706, "learning_rate": 2.8354888211667984e-09, "loss": 0.0051, "step": 6750 }, { "epoch": 397.11764705882354, "grad_norm": 0.17648057639598846, "learning_rate": 2.7232085604056435e-09, "loss": 0.0037, "step": 6751 }, { "epoch": 397.1764705882353, "grad_norm": 0.3246845602989197, "learning_rate": 2.613196180107913e-09, "loss": 0.003, "step": 6752 }, { "epoch": 397.2352941176471, "grad_norm": 0.5736954212188721, "learning_rate": 2.5054517052292005e-09, "loss": 0.0024, "step": 6753 }, { "epoch": 397.29411764705884, "grad_norm": 0.2716946303844452, "learning_rate": 2.3999751602110656e-09, "loss": 0.0041, "step": 6754 }, { "epoch": 397.3529411764706, "grad_norm": 0.3023034632205963, "learning_rate": 2.2967665689810346e-09, "loss": 0.004, "step": 6755 }, { "epoch": 397.4117647058824, "grad_norm": 0.3519704043865204, "learning_rate": 2.1958259549514914e-09, "loss": 0.0058, "step": 6756 }, { "epoch": 397.47058823529414, "grad_norm": 0.4229896664619446, "learning_rate": 2.097153341021896e-09, "loss": 0.0041, "step": 6757 }, { "epoch": 397.52941176470586, "grad_norm": 0.1431521326303482, "learning_rate": 2.0007487495754543e-09, "loss": 0.003, "step": 6758 }, { "epoch": 397.5882352941176, "grad_norm": 0.31163251399993896, "learning_rate": 1.9066122024824494e-09, "loss": 0.0047, "step": 6759 }, { "epoch": 397.6470588235294, "grad_norm": 0.35494041442871094, "learning_rate": 1.8147437210958018e-09, "loss": 0.0041, "step": 6760 }, { "epoch": 397.70588235294116, "grad_norm": 0.2185196578502655, "learning_rate": 1.7251433262577278e-09, "loss": 0.0021, "step": 6761 }, { "epoch": 397.7647058823529, "grad_norm": 0.248795285820961, "learning_rate": 1.6378110382930802e-09, "loss": 0.0032, "step": 6762 }, { "epoch": 397.8235294117647, "grad_norm": 0.30836397409439087, "learning_rate": 1.5527468770126786e-09, "loss": 0.0026, "step": 6763 }, { "epoch": 397.88235294117646, "grad_norm": 0.6485151052474976, "learning_rate": 1.4699508617144199e-09, "loss": 0.0069, "step": 6764 }, { "epoch": 397.94117647058823, "grad_norm": 0.14908871054649353, "learning_rate": 1.3894230111810569e-09, "loss": 0.0021, "step": 6765 }, { "epoch": 398.0, "grad_norm": 0.4738357365131378, "learning_rate": 1.3111633436779792e-09, "loss": 0.0068, "step": 6766 }, { "epoch": 398.05882352941177, "grad_norm": 0.10884681344032288, "learning_rate": 1.2351718769609832e-09, "loss": 0.0018, "step": 6767 }, { "epoch": 398.11764705882354, "grad_norm": 0.18631234765052795, "learning_rate": 1.161448628267392e-09, "loss": 0.0038, "step": 6768 }, { "epoch": 398.1764705882353, "grad_norm": 0.29334011673927307, "learning_rate": 1.089993614321605e-09, "loss": 0.0038, "step": 6769 }, { "epoch": 398.2352941176471, "grad_norm": 0.2157500833272934, "learning_rate": 1.0208068513328785e-09, "loss": 0.0023, "step": 6770 }, { "epoch": 398.29411764705884, "grad_norm": 0.19327478110790253, "learning_rate": 9.53888354996435e-10, "loss": 0.0024, "step": 6771 }, { "epoch": 398.3529411764706, "grad_norm": 0.36497265100479126, "learning_rate": 8.892381404923545e-10, "loss": 0.0046, "step": 6772 }, { "epoch": 398.4117647058824, "grad_norm": 0.2008226215839386, "learning_rate": 8.268562224866828e-10, "loss": 0.0036, "step": 6773 }, { "epoch": 398.47058823529414, "grad_norm": 0.2092186063528061, "learning_rate": 7.667426151314327e-10, "loss": 0.0026, "step": 6774 }, { "epoch": 398.52941176470586, "grad_norm": 0.33522069454193115, "learning_rate": 7.08897332063474e-10, "loss": 0.0043, "step": 6775 }, { "epoch": 398.5882352941176, "grad_norm": 0.12427864223718643, "learning_rate": 6.533203864034221e-10, "loss": 0.0025, "step": 6776 }, { "epoch": 398.6470588235294, "grad_norm": 0.3807130753993988, "learning_rate": 6.0001179076008e-10, "loss": 0.0045, "step": 6777 }, { "epoch": 398.70588235294116, "grad_norm": 0.23385675251483917, "learning_rate": 5.489715572259968e-10, "loss": 0.0019, "step": 6778 }, { "epoch": 398.7647058823529, "grad_norm": 0.5497791171073914, "learning_rate": 5.001996973807988e-10, "loss": 0.0057, "step": 6779 }, { "epoch": 398.8235294117647, "grad_norm": 0.22294141352176666, "learning_rate": 4.5369622228674805e-10, "loss": 0.003, "step": 6780 }, { "epoch": 398.88235294117646, "grad_norm": 0.30369430780410767, "learning_rate": 4.0946114249429403e-10, "loss": 0.0044, "step": 6781 }, { "epoch": 398.94117647058823, "grad_norm": 0.28871747851371765, "learning_rate": 3.6749446803763244e-10, "loss": 0.0032, "step": 6782 }, { "epoch": 399.0, "grad_norm": 0.2677325904369354, "learning_rate": 3.277962084369257e-10, "loss": 0.0039, "step": 6783 }, { "epoch": 399.05882352941177, "grad_norm": 0.22838996350765228, "learning_rate": 2.9036637269830303e-10, "loss": 0.0024, "step": 6784 }, { "epoch": 399.11764705882354, "grad_norm": 0.24035584926605225, "learning_rate": 2.5520496931163986e-10, "loss": 0.0022, "step": 6785 }, { "epoch": 399.1764705882353, "grad_norm": 0.5280086398124695, "learning_rate": 2.2231200625499883e-10, "loss": 0.0029, "step": 6786 }, { "epoch": 399.2352941176471, "grad_norm": 0.3173239529132843, "learning_rate": 1.9168749098796847e-10, "loss": 0.0086, "step": 6787 }, { "epoch": 399.29411764705884, "grad_norm": 0.3781053125858307, "learning_rate": 1.6333143045832445e-10, "loss": 0.0027, "step": 6788 }, { "epoch": 399.3529411764706, "grad_norm": 0.5994939208030701, "learning_rate": 1.3724383109980922e-10, "loss": 0.0069, "step": 6789 }, { "epoch": 399.4117647058824, "grad_norm": 1.3111522197723389, "learning_rate": 1.1342469882991148e-10, "loss": 0.007, "step": 6790 }, { "epoch": 399.47058823529414, "grad_norm": 0.13711781799793243, "learning_rate": 9.187403905097647e-11, "loss": 0.0022, "step": 6791 }, { "epoch": 399.52941176470586, "grad_norm": 0.19382043182849884, "learning_rate": 7.259185665353663e-11, "loss": 0.0019, "step": 6792 }, { "epoch": 399.5882352941176, "grad_norm": 0.1981040984392166, "learning_rate": 5.557815600965022e-11, "loss": 0.0024, "step": 6793 }, { "epoch": 399.6470588235294, "grad_norm": 0.19414809346199036, "learning_rate": 4.083294098067292e-11, "loss": 0.0031, "step": 6794 }, { "epoch": 399.70588235294116, "grad_norm": 0.21035201847553253, "learning_rate": 2.8356214911706705e-11, "loss": 0.0029, "step": 6795 }, { "epoch": 399.7647058823529, "grad_norm": 0.2544931173324585, "learning_rate": 1.8147980630489613e-11, "loss": 0.0039, "step": 6796 }, { "epoch": 399.8235294117647, "grad_norm": 0.27310022711753845, "learning_rate": 1.020824045516733e-11, "loss": 0.0063, "step": 6797 }, { "epoch": 399.88235294117646, "grad_norm": 0.1172463521361351, "learning_rate": 4.536996186521592e-12, "loss": 0.0017, "step": 6798 }, { "epoch": 399.94117647058823, "grad_norm": 0.12398873269557953, "learning_rate": 1.134249111300889e-12, "loss": 0.0023, "step": 6799 }, { "epoch": 400.0, "grad_norm": 0.3046536445617676, "learning_rate": 0.0, "loss": 0.0036, "step": 6800 }, { "epoch": 400.0, "step": 6800, "total_flos": 4225898119168000.0, "train_loss": 0.21003313879318097, "train_runtime": 2154.8061, "train_samples_per_second": 98.756, "train_steps_per_second": 3.156 } ], "logging_steps": 1.0, "max_steps": 6800, "num_input_tokens_seen": 0, "num_train_epochs": 400, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4225898119168000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }