MatteoCargnelutti's picture
Upload folder using huggingface_hub
389b858 verified
{
"best_metric": 0.157407745718956,
"best_model_checkpoint": "hlbooks-topic-classifier-bert-multilingual-uncased/checkpoint-7578",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 7578,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009897070467141725,
"grad_norm": 3.174437999725342,
"learning_rate": 1.5171503957783642e-06,
"loss": 3.0686,
"step": 25
},
{
"epoch": 0.01979414093428345,
"grad_norm": 5.321998119354248,
"learning_rate": 3.1002638522427443e-06,
"loss": 2.9657,
"step": 50
},
{
"epoch": 0.029691211401425176,
"grad_norm": 7.038234710693359,
"learning_rate": 4.617414248021108e-06,
"loss": 2.8352,
"step": 75
},
{
"epoch": 0.0395882818685669,
"grad_norm": 6.537544250488281,
"learning_rate": 6.200527704485489e-06,
"loss": 2.738,
"step": 100
},
{
"epoch": 0.04948535233570863,
"grad_norm": 6.735942840576172,
"learning_rate": 7.849604221635884e-06,
"loss": 2.6217,
"step": 125
},
{
"epoch": 0.05938242280285035,
"grad_norm": 5.756272792816162,
"learning_rate": 9.49868073878628e-06,
"loss": 2.4216,
"step": 150
},
{
"epoch": 0.06927949326999208,
"grad_norm": 5.069972991943359,
"learning_rate": 1.1147757255936676e-05,
"loss": 2.2817,
"step": 175
},
{
"epoch": 0.0791765637371338,
"grad_norm": 8.664924621582031,
"learning_rate": 1.2796833773087072e-05,
"loss": 2.1123,
"step": 200
},
{
"epoch": 0.08907363420427554,
"grad_norm": 9.564645767211914,
"learning_rate": 1.4445910290237468e-05,
"loss": 1.8298,
"step": 225
},
{
"epoch": 0.09897070467141726,
"grad_norm": 18.612655639648438,
"learning_rate": 1.6094986807387864e-05,
"loss": 1.4781,
"step": 250
},
{
"epoch": 0.10886777513855898,
"grad_norm": 7.96259069442749,
"learning_rate": 1.774406332453826e-05,
"loss": 1.176,
"step": 275
},
{
"epoch": 0.1187648456057007,
"grad_norm": 9.508382797241211,
"learning_rate": 1.9393139841688653e-05,
"loss": 1.038,
"step": 300
},
{
"epoch": 0.12866191607284244,
"grad_norm": 9.386768341064453,
"learning_rate": 2.104221635883905e-05,
"loss": 0.9101,
"step": 325
},
{
"epoch": 0.13855898653998416,
"grad_norm": 8.491036415100098,
"learning_rate": 2.269129287598945e-05,
"loss": 0.8534,
"step": 350
},
{
"epoch": 0.14845605700712589,
"grad_norm": 13.790663719177246,
"learning_rate": 2.4340369393139843e-05,
"loss": 0.6119,
"step": 375
},
{
"epoch": 0.1583531274742676,
"grad_norm": 14.771766662597656,
"learning_rate": 2.5989445910290237e-05,
"loss": 0.6075,
"step": 400
},
{
"epoch": 0.16825019794140933,
"grad_norm": 5.174952030181885,
"learning_rate": 2.763852242744063e-05,
"loss": 0.6037,
"step": 425
},
{
"epoch": 0.17814726840855108,
"grad_norm": 12.023221015930176,
"learning_rate": 2.9287598944591033e-05,
"loss": 0.5757,
"step": 450
},
{
"epoch": 0.1880443388756928,
"grad_norm": 6.669355869293213,
"learning_rate": 3.093667546174143e-05,
"loss": 0.5343,
"step": 475
},
{
"epoch": 0.19794140934283452,
"grad_norm": 11.855779647827148,
"learning_rate": 3.258575197889182e-05,
"loss": 0.5008,
"step": 500
},
{
"epoch": 0.20783847980997625,
"grad_norm": 3.5531342029571533,
"learning_rate": 3.423482849604222e-05,
"loss": 0.4706,
"step": 525
},
{
"epoch": 0.21773555027711797,
"grad_norm": 26.04022216796875,
"learning_rate": 3.588390501319262e-05,
"loss": 0.483,
"step": 550
},
{
"epoch": 0.2276326207442597,
"grad_norm": 7.481447696685791,
"learning_rate": 3.753298153034301e-05,
"loss": 0.4332,
"step": 575
},
{
"epoch": 0.2375296912114014,
"grad_norm": 18.177900314331055,
"learning_rate": 3.9182058047493406e-05,
"loss": 0.444,
"step": 600
},
{
"epoch": 0.24742676167854316,
"grad_norm": 10.481646537780762,
"learning_rate": 4.08311345646438e-05,
"loss": 0.3346,
"step": 625
},
{
"epoch": 0.2573238321456849,
"grad_norm": 15.196333885192871,
"learning_rate": 4.2480211081794194e-05,
"loss": 0.3647,
"step": 650
},
{
"epoch": 0.2672209026128266,
"grad_norm": 8.303031921386719,
"learning_rate": 4.412928759894459e-05,
"loss": 0.4139,
"step": 675
},
{
"epoch": 0.2771179730799683,
"grad_norm": 9.322664260864258,
"learning_rate": 4.577836411609499e-05,
"loss": 0.4656,
"step": 700
},
{
"epoch": 0.28701504354711005,
"grad_norm": 7.468392848968506,
"learning_rate": 4.7427440633245384e-05,
"loss": 0.3996,
"step": 725
},
{
"epoch": 0.29691211401425177,
"grad_norm": 7.190126895904541,
"learning_rate": 4.907651715039578e-05,
"loss": 0.3771,
"step": 750
},
{
"epoch": 0.3068091844813935,
"grad_norm": 8.512736320495605,
"learning_rate": 4.991935483870968e-05,
"loss": 0.4247,
"step": 775
},
{
"epoch": 0.3167062549485352,
"grad_norm": 15.00452995300293,
"learning_rate": 4.973607038123168e-05,
"loss": 0.3576,
"step": 800
},
{
"epoch": 0.32660332541567694,
"grad_norm": 19.776445388793945,
"learning_rate": 4.955278592375367e-05,
"loss": 0.347,
"step": 825
},
{
"epoch": 0.33650039588281866,
"grad_norm": 9.140093803405762,
"learning_rate": 4.9369501466275664e-05,
"loss": 0.3799,
"step": 850
},
{
"epoch": 0.34639746634996044,
"grad_norm": 12.198457717895508,
"learning_rate": 4.918621700879766e-05,
"loss": 0.4086,
"step": 875
},
{
"epoch": 0.35629453681710216,
"grad_norm": 3.6618921756744385,
"learning_rate": 4.900293255131965e-05,
"loss": 0.3457,
"step": 900
},
{
"epoch": 0.3661916072842439,
"grad_norm": 17.983041763305664,
"learning_rate": 4.881964809384165e-05,
"loss": 0.364,
"step": 925
},
{
"epoch": 0.3760886777513856,
"grad_norm": 0.4606013000011444,
"learning_rate": 4.863636363636364e-05,
"loss": 0.2662,
"step": 950
},
{
"epoch": 0.3859857482185273,
"grad_norm": 5.343178749084473,
"learning_rate": 4.8453079178885635e-05,
"loss": 0.3289,
"step": 975
},
{
"epoch": 0.39588281868566905,
"grad_norm": 11.04477596282959,
"learning_rate": 4.826979472140763e-05,
"loss": 0.3723,
"step": 1000
},
{
"epoch": 0.40577988915281077,
"grad_norm": 8.415637016296387,
"learning_rate": 4.808651026392962e-05,
"loss": 0.29,
"step": 1025
},
{
"epoch": 0.4156769596199525,
"grad_norm": 37.04660415649414,
"learning_rate": 4.790322580645161e-05,
"loss": 0.4181,
"step": 1050
},
{
"epoch": 0.4255740300870942,
"grad_norm": 11.579482078552246,
"learning_rate": 4.7719941348973606e-05,
"loss": 0.2785,
"step": 1075
},
{
"epoch": 0.43547110055423593,
"grad_norm": 8.857477188110352,
"learning_rate": 4.75366568914956e-05,
"loss": 0.3266,
"step": 1100
},
{
"epoch": 0.44536817102137766,
"grad_norm": 5.317532539367676,
"learning_rate": 4.73533724340176e-05,
"loss": 0.344,
"step": 1125
},
{
"epoch": 0.4552652414885194,
"grad_norm": 4.178307056427002,
"learning_rate": 4.717008797653959e-05,
"loss": 0.3613,
"step": 1150
},
{
"epoch": 0.4651623119556611,
"grad_norm": 8.49862003326416,
"learning_rate": 4.6986803519061584e-05,
"loss": 0.2527,
"step": 1175
},
{
"epoch": 0.4750593824228028,
"grad_norm": 0.5914684534072876,
"learning_rate": 4.6803519061583577e-05,
"loss": 0.258,
"step": 1200
},
{
"epoch": 0.4849564528899446,
"grad_norm": 3.90535306930542,
"learning_rate": 4.662023460410557e-05,
"loss": 0.3091,
"step": 1225
},
{
"epoch": 0.4948535233570863,
"grad_norm": 7.211574077606201,
"learning_rate": 4.643695014662757e-05,
"loss": 0.3302,
"step": 1250
},
{
"epoch": 0.504750593824228,
"grad_norm": 9.782793045043945,
"learning_rate": 4.625366568914956e-05,
"loss": 0.3487,
"step": 1275
},
{
"epoch": 0.5146476642913698,
"grad_norm": 9.339737892150879,
"learning_rate": 4.6070381231671554e-05,
"loss": 0.2164,
"step": 1300
},
{
"epoch": 0.5245447347585115,
"grad_norm": 3.444295644760132,
"learning_rate": 4.588709677419355e-05,
"loss": 0.2874,
"step": 1325
},
{
"epoch": 0.5344418052256532,
"grad_norm": 15.61107063293457,
"learning_rate": 4.570381231671555e-05,
"loss": 0.3161,
"step": 1350
},
{
"epoch": 0.5443388756927949,
"grad_norm": 12.160496711730957,
"learning_rate": 4.552052785923754e-05,
"loss": 0.2242,
"step": 1375
},
{
"epoch": 0.5542359461599367,
"grad_norm": 7.081170558929443,
"learning_rate": 4.533724340175953e-05,
"loss": 0.2625,
"step": 1400
},
{
"epoch": 0.5641330166270784,
"grad_norm": 2.664806365966797,
"learning_rate": 4.5153958944281525e-05,
"loss": 0.2885,
"step": 1425
},
{
"epoch": 0.5740300870942201,
"grad_norm": 5.956775188446045,
"learning_rate": 4.497067448680352e-05,
"loss": 0.3064,
"step": 1450
},
{
"epoch": 0.5839271575613618,
"grad_norm": 10.270496368408203,
"learning_rate": 4.478739002932552e-05,
"loss": 0.2519,
"step": 1475
},
{
"epoch": 0.5938242280285035,
"grad_norm": 11.354063987731934,
"learning_rate": 4.460410557184751e-05,
"loss": 0.2874,
"step": 1500
},
{
"epoch": 0.6037212984956453,
"grad_norm": 4.400442600250244,
"learning_rate": 4.44208211143695e-05,
"loss": 0.248,
"step": 1525
},
{
"epoch": 0.613618368962787,
"grad_norm": 5.145227909088135,
"learning_rate": 4.4237536656891496e-05,
"loss": 0.2757,
"step": 1550
},
{
"epoch": 0.6235154394299287,
"grad_norm": 6.018128395080566,
"learning_rate": 4.4054252199413495e-05,
"loss": 0.2929,
"step": 1575
},
{
"epoch": 0.6334125098970704,
"grad_norm": 11.10319709777832,
"learning_rate": 4.387096774193549e-05,
"loss": 0.2645,
"step": 1600
},
{
"epoch": 0.6433095803642122,
"grad_norm": 4.846808433532715,
"learning_rate": 4.368768328445748e-05,
"loss": 0.1884,
"step": 1625
},
{
"epoch": 0.6532066508313539,
"grad_norm": 20.15575408935547,
"learning_rate": 4.3504398826979474e-05,
"loss": 0.2996,
"step": 1650
},
{
"epoch": 0.6631037212984956,
"grad_norm": 15.41659164428711,
"learning_rate": 4.3321114369501466e-05,
"loss": 0.2613,
"step": 1675
},
{
"epoch": 0.6730007917656373,
"grad_norm": 9.153544425964355,
"learning_rate": 4.3137829912023466e-05,
"loss": 0.2867,
"step": 1700
},
{
"epoch": 0.6828978622327792,
"grad_norm": 6.918684482574463,
"learning_rate": 4.295454545454546e-05,
"loss": 0.1998,
"step": 1725
},
{
"epoch": 0.6927949326999209,
"grad_norm": 3.967953681945801,
"learning_rate": 4.277126099706745e-05,
"loss": 0.2632,
"step": 1750
},
{
"epoch": 0.7026920031670626,
"grad_norm": 6.128458023071289,
"learning_rate": 4.2587976539589444e-05,
"loss": 0.2471,
"step": 1775
},
{
"epoch": 0.7125890736342043,
"grad_norm": 3.5776052474975586,
"learning_rate": 4.2404692082111444e-05,
"loss": 0.2609,
"step": 1800
},
{
"epoch": 0.722486144101346,
"grad_norm": 8.483747482299805,
"learning_rate": 4.222140762463344e-05,
"loss": 0.2468,
"step": 1825
},
{
"epoch": 0.7323832145684878,
"grad_norm": 3.365809679031372,
"learning_rate": 4.203812316715543e-05,
"loss": 0.2166,
"step": 1850
},
{
"epoch": 0.7422802850356295,
"grad_norm": 6.7934489250183105,
"learning_rate": 4.1854838709677415e-05,
"loss": 0.207,
"step": 1875
},
{
"epoch": 0.7521773555027712,
"grad_norm": 10.689802169799805,
"learning_rate": 4.1671554252199415e-05,
"loss": 0.2137,
"step": 1900
},
{
"epoch": 0.7620744259699129,
"grad_norm": 8.500500679016113,
"learning_rate": 4.148826979472141e-05,
"loss": 0.2068,
"step": 1925
},
{
"epoch": 0.7719714964370546,
"grad_norm": 3.0795400142669678,
"learning_rate": 4.13049853372434e-05,
"loss": 0.2335,
"step": 1950
},
{
"epoch": 0.7818685669041964,
"grad_norm": 6.6960768699646,
"learning_rate": 4.112170087976539e-05,
"loss": 0.3198,
"step": 1975
},
{
"epoch": 0.7917656373713381,
"grad_norm": 9.185827255249023,
"learning_rate": 4.093841642228739e-05,
"loss": 0.2917,
"step": 2000
},
{
"epoch": 0.8016627078384798,
"grad_norm": 10.589933395385742,
"learning_rate": 4.0755131964809386e-05,
"loss": 0.2764,
"step": 2025
},
{
"epoch": 0.8115597783056215,
"grad_norm": 4.64451789855957,
"learning_rate": 4.057184750733138e-05,
"loss": 0.2737,
"step": 2050
},
{
"epoch": 0.8214568487727633,
"grad_norm": 17.72431182861328,
"learning_rate": 4.038856304985337e-05,
"loss": 0.2484,
"step": 2075
},
{
"epoch": 0.831353919239905,
"grad_norm": 10.842966079711914,
"learning_rate": 4.0205278592375364e-05,
"loss": 0.2172,
"step": 2100
},
{
"epoch": 0.8412509897070467,
"grad_norm": 4.673035621643066,
"learning_rate": 4.0021994134897364e-05,
"loss": 0.2094,
"step": 2125
},
{
"epoch": 0.8511480601741884,
"grad_norm": 7.97542142868042,
"learning_rate": 3.9838709677419356e-05,
"loss": 0.2674,
"step": 2150
},
{
"epoch": 0.8610451306413301,
"grad_norm": 3.362852096557617,
"learning_rate": 3.965542521994135e-05,
"loss": 0.2136,
"step": 2175
},
{
"epoch": 0.8709422011084719,
"grad_norm": 8.911001205444336,
"learning_rate": 3.947214076246334e-05,
"loss": 0.1915,
"step": 2200
},
{
"epoch": 0.8808392715756136,
"grad_norm": 8.764837265014648,
"learning_rate": 3.928885630498534e-05,
"loss": 0.1691,
"step": 2225
},
{
"epoch": 0.8907363420427553,
"grad_norm": 9.035571098327637,
"learning_rate": 3.9105571847507334e-05,
"loss": 0.2329,
"step": 2250
},
{
"epoch": 0.900633412509897,
"grad_norm": 3.9743757247924805,
"learning_rate": 3.892228739002933e-05,
"loss": 0.2151,
"step": 2275
},
{
"epoch": 0.9105304829770388,
"grad_norm": 4.488095283508301,
"learning_rate": 3.873900293255132e-05,
"loss": 0.2525,
"step": 2300
},
{
"epoch": 0.9204275534441805,
"grad_norm": 7.458625316619873,
"learning_rate": 3.855571847507331e-05,
"loss": 0.225,
"step": 2325
},
{
"epoch": 0.9303246239113222,
"grad_norm": 12.148482322692871,
"learning_rate": 3.837243401759531e-05,
"loss": 0.2642,
"step": 2350
},
{
"epoch": 0.9402216943784639,
"grad_norm": 5.104764461517334,
"learning_rate": 3.8189149560117305e-05,
"loss": 0.2268,
"step": 2375
},
{
"epoch": 0.9501187648456056,
"grad_norm": 4.338552951812744,
"learning_rate": 3.80058651026393e-05,
"loss": 0.217,
"step": 2400
},
{
"epoch": 0.9600158353127475,
"grad_norm": 4.058621406555176,
"learning_rate": 3.782258064516129e-05,
"loss": 0.2058,
"step": 2425
},
{
"epoch": 0.9699129057798892,
"grad_norm": 4.036930084228516,
"learning_rate": 3.763929618768329e-05,
"loss": 0.2721,
"step": 2450
},
{
"epoch": 0.9798099762470309,
"grad_norm": 5.858448028564453,
"learning_rate": 3.745601173020528e-05,
"loss": 0.2379,
"step": 2475
},
{
"epoch": 0.9897070467141726,
"grad_norm": 10.197368621826172,
"learning_rate": 3.7272727272727276e-05,
"loss": 0.2552,
"step": 2500
},
{
"epoch": 0.9996041171813144,
"grad_norm": 3.942063093185425,
"learning_rate": 3.708944281524927e-05,
"loss": 0.1938,
"step": 2525
},
{
"epoch": 1.0,
"eval_accuracy": 0.9472,
"eval_f1_macro": 0.9359430887114246,
"eval_f1_micro": 0.9472,
"eval_f1_weighted": 0.947114042501874,
"eval_loss": 0.20199425518512726,
"eval_precision_macro": 0.9529085930911292,
"eval_precision_micro": 0.9472,
"eval_precision_weighted": 0.948726542737522,
"eval_recall_macro": 0.9217770604016604,
"eval_recall_micro": 0.9472,
"eval_recall_weighted": 0.9472,
"eval_runtime": 5.1028,
"eval_samples_per_second": 979.852,
"eval_steps_per_second": 15.482,
"step": 2526
},
{
"epoch": 1.009501187648456,
"grad_norm": 2.8100507259368896,
"learning_rate": 3.690615835777126e-05,
"loss": 0.1958,
"step": 2550
},
{
"epoch": 1.0193982581155978,
"grad_norm": 4.251704692840576,
"learning_rate": 3.672287390029326e-05,
"loss": 0.1684,
"step": 2575
},
{
"epoch": 1.0292953285827395,
"grad_norm": 13.66088581085205,
"learning_rate": 3.6539589442815254e-05,
"loss": 0.1564,
"step": 2600
},
{
"epoch": 1.0391923990498813,
"grad_norm": 6.338856220245361,
"learning_rate": 3.6356304985337246e-05,
"loss": 0.1673,
"step": 2625
},
{
"epoch": 1.049089469517023,
"grad_norm": 0.7401424646377563,
"learning_rate": 3.617302052785924e-05,
"loss": 0.1579,
"step": 2650
},
{
"epoch": 1.0589865399841647,
"grad_norm": 7.7578325271606445,
"learning_rate": 3.598973607038124e-05,
"loss": 0.1639,
"step": 2675
},
{
"epoch": 1.0688836104513064,
"grad_norm": 10.281119346618652,
"learning_rate": 3.580645161290323e-05,
"loss": 0.1367,
"step": 2700
},
{
"epoch": 1.0787806809184481,
"grad_norm": 9.214536666870117,
"learning_rate": 3.562316715542522e-05,
"loss": 0.1913,
"step": 2725
},
{
"epoch": 1.0886777513855899,
"grad_norm": 7.096231937408447,
"learning_rate": 3.543988269794721e-05,
"loss": 0.1718,
"step": 2750
},
{
"epoch": 1.0985748218527316,
"grad_norm": 1.1231356859207153,
"learning_rate": 3.525659824046921e-05,
"loss": 0.1623,
"step": 2775
},
{
"epoch": 1.1084718923198733,
"grad_norm": 9.105703353881836,
"learning_rate": 3.50733137829912e-05,
"loss": 0.1516,
"step": 2800
},
{
"epoch": 1.118368962787015,
"grad_norm": 2.2311670780181885,
"learning_rate": 3.4890029325513195e-05,
"loss": 0.1094,
"step": 2825
},
{
"epoch": 1.1282660332541568,
"grad_norm": 5.654956817626953,
"learning_rate": 3.470674486803519e-05,
"loss": 0.2151,
"step": 2850
},
{
"epoch": 1.1381631037212985,
"grad_norm": 0.6430861353874207,
"learning_rate": 3.452346041055719e-05,
"loss": 0.1211,
"step": 2875
},
{
"epoch": 1.1480601741884402,
"grad_norm": 4.405457496643066,
"learning_rate": 3.434017595307918e-05,
"loss": 0.1062,
"step": 2900
},
{
"epoch": 1.157957244655582,
"grad_norm": 3.2275402545928955,
"learning_rate": 3.415689149560117e-05,
"loss": 0.1378,
"step": 2925
},
{
"epoch": 1.1678543151227236,
"grad_norm": 3.635753870010376,
"learning_rate": 3.3973607038123166e-05,
"loss": 0.1489,
"step": 2950
},
{
"epoch": 1.1777513855898654,
"grad_norm": 2.695546865463257,
"learning_rate": 3.379032258064516e-05,
"loss": 0.1703,
"step": 2975
},
{
"epoch": 1.187648456057007,
"grad_norm": 1.3425699472427368,
"learning_rate": 3.360703812316716e-05,
"loss": 0.1421,
"step": 3000
},
{
"epoch": 1.1975455265241488,
"grad_norm": 11.01319408416748,
"learning_rate": 3.342375366568915e-05,
"loss": 0.1221,
"step": 3025
},
{
"epoch": 1.2074425969912905,
"grad_norm": 12.686071395874023,
"learning_rate": 3.3240469208211144e-05,
"loss": 0.1699,
"step": 3050
},
{
"epoch": 1.2173396674584323,
"grad_norm": 8.48775577545166,
"learning_rate": 3.305718475073314e-05,
"loss": 0.1505,
"step": 3075
},
{
"epoch": 1.227236737925574,
"grad_norm": 5.593795299530029,
"learning_rate": 3.2873900293255136e-05,
"loss": 0.0906,
"step": 3100
},
{
"epoch": 1.2371338083927157,
"grad_norm": 13.118975639343262,
"learning_rate": 3.269061583577713e-05,
"loss": 0.1389,
"step": 3125
},
{
"epoch": 1.2470308788598574,
"grad_norm": 10.423203468322754,
"learning_rate": 3.250733137829912e-05,
"loss": 0.1789,
"step": 3150
},
{
"epoch": 1.2569279493269991,
"grad_norm": 8.423437118530273,
"learning_rate": 3.2324046920821115e-05,
"loss": 0.1683,
"step": 3175
},
{
"epoch": 1.2668250197941409,
"grad_norm": 1.197938323020935,
"learning_rate": 3.214076246334311e-05,
"loss": 0.1356,
"step": 3200
},
{
"epoch": 1.2767220902612826,
"grad_norm": 4.034856796264648,
"learning_rate": 3.195747800586511e-05,
"loss": 0.2099,
"step": 3225
},
{
"epoch": 1.2866191607284243,
"grad_norm": 6.324892997741699,
"learning_rate": 3.17741935483871e-05,
"loss": 0.1658,
"step": 3250
},
{
"epoch": 1.2965162311955662,
"grad_norm": 7.48460054397583,
"learning_rate": 3.159090909090909e-05,
"loss": 0.131,
"step": 3275
},
{
"epoch": 1.3064133016627077,
"grad_norm": 21.121984481811523,
"learning_rate": 3.1407624633431085e-05,
"loss": 0.1781,
"step": 3300
},
{
"epoch": 1.3163103721298497,
"grad_norm": 3.5356669425964355,
"learning_rate": 3.1224340175953085e-05,
"loss": 0.1873,
"step": 3325
},
{
"epoch": 1.3262074425969912,
"grad_norm": 9.41774845123291,
"learning_rate": 3.104105571847508e-05,
"loss": 0.1325,
"step": 3350
},
{
"epoch": 1.3361045130641331,
"grad_norm": 14.932723045349121,
"learning_rate": 3.085777126099707e-05,
"loss": 0.1727,
"step": 3375
},
{
"epoch": 1.3460015835312746,
"grad_norm": 6.5432233810424805,
"learning_rate": 3.067448680351906e-05,
"loss": 0.1487,
"step": 3400
},
{
"epoch": 1.3558986539984166,
"grad_norm": 2.8642232418060303,
"learning_rate": 3.049120234604106e-05,
"loss": 0.1365,
"step": 3425
},
{
"epoch": 1.365795724465558,
"grad_norm": 5.638850688934326,
"learning_rate": 3.0307917888563052e-05,
"loss": 0.1628,
"step": 3450
},
{
"epoch": 1.3756927949327,
"grad_norm": 0.21325694024562836,
"learning_rate": 3.0124633431085048e-05,
"loss": 0.1555,
"step": 3475
},
{
"epoch": 1.3855898653998415,
"grad_norm": 3.200031042098999,
"learning_rate": 2.994134897360704e-05,
"loss": 0.1263,
"step": 3500
},
{
"epoch": 1.3954869358669835,
"grad_norm": 10.525226593017578,
"learning_rate": 2.9758064516129037e-05,
"loss": 0.1657,
"step": 3525
},
{
"epoch": 1.405384006334125,
"grad_norm": 4.1915283203125,
"learning_rate": 2.957478005865103e-05,
"loss": 0.1551,
"step": 3550
},
{
"epoch": 1.415281076801267,
"grad_norm": 9.446343421936035,
"learning_rate": 2.9391495601173026e-05,
"loss": 0.1293,
"step": 3575
},
{
"epoch": 1.4251781472684084,
"grad_norm": 15.011846542358398,
"learning_rate": 2.9208211143695012e-05,
"loss": 0.1402,
"step": 3600
},
{
"epoch": 1.4350752177355504,
"grad_norm": 0.7313398718833923,
"learning_rate": 2.902492668621701e-05,
"loss": 0.1057,
"step": 3625
},
{
"epoch": 1.444972288202692,
"grad_norm": 19.252836227416992,
"learning_rate": 2.8841642228739e-05,
"loss": 0.1155,
"step": 3650
},
{
"epoch": 1.4548693586698338,
"grad_norm": 1.615921974182129,
"learning_rate": 2.8658357771260997e-05,
"loss": 0.1466,
"step": 3675
},
{
"epoch": 1.4647664291369755,
"grad_norm": 2.299511432647705,
"learning_rate": 2.847507331378299e-05,
"loss": 0.1147,
"step": 3700
},
{
"epoch": 1.4746634996041172,
"grad_norm": 3.294553756713867,
"learning_rate": 2.8291788856304986e-05,
"loss": 0.1232,
"step": 3725
},
{
"epoch": 1.484560570071259,
"grad_norm": 0.17168129980564117,
"learning_rate": 2.810850439882698e-05,
"loss": 0.1586,
"step": 3750
},
{
"epoch": 1.4944576405384007,
"grad_norm": 12.83199691772461,
"learning_rate": 2.7925219941348972e-05,
"loss": 0.1096,
"step": 3775
},
{
"epoch": 1.5043547110055424,
"grad_norm": 12.708085060119629,
"learning_rate": 2.7741935483870968e-05,
"loss": 0.1043,
"step": 3800
},
{
"epoch": 1.5142517814726841,
"grad_norm": 4.487904071807861,
"learning_rate": 2.755865102639296e-05,
"loss": 0.1116,
"step": 3825
},
{
"epoch": 1.5241488519398259,
"grad_norm": 5.373720645904541,
"learning_rate": 2.7375366568914957e-05,
"loss": 0.1456,
"step": 3850
},
{
"epoch": 1.5340459224069676,
"grad_norm": 0.20354461669921875,
"learning_rate": 2.719208211143695e-05,
"loss": 0.1209,
"step": 3875
},
{
"epoch": 1.5439429928741093,
"grad_norm": 5.304108142852783,
"learning_rate": 2.7008797653958946e-05,
"loss": 0.1816,
"step": 3900
},
{
"epoch": 1.553840063341251,
"grad_norm": 0.092073954641819,
"learning_rate": 2.682551319648094e-05,
"loss": 0.1568,
"step": 3925
},
{
"epoch": 1.5637371338083927,
"grad_norm": 9.854479789733887,
"learning_rate": 2.6642228739002935e-05,
"loss": 0.1741,
"step": 3950
},
{
"epoch": 1.5736342042755345,
"grad_norm": 0.4590989947319031,
"learning_rate": 2.6458944281524928e-05,
"loss": 0.1702,
"step": 3975
},
{
"epoch": 1.5835312747426762,
"grad_norm": 0.07429279386997223,
"learning_rate": 2.6275659824046924e-05,
"loss": 0.0975,
"step": 4000
},
{
"epoch": 1.593428345209818,
"grad_norm": 5.391401290893555,
"learning_rate": 2.6092375366568917e-05,
"loss": 0.1497,
"step": 4025
},
{
"epoch": 1.6033254156769596,
"grad_norm": 0.18375837802886963,
"learning_rate": 2.590909090909091e-05,
"loss": 0.116,
"step": 4050
},
{
"epoch": 1.6132224861441014,
"grad_norm": 13.557960510253906,
"learning_rate": 2.5725806451612905e-05,
"loss": 0.1927,
"step": 4075
},
{
"epoch": 1.623119556611243,
"grad_norm": 1.467595100402832,
"learning_rate": 2.5542521994134898e-05,
"loss": 0.1396,
"step": 4100
},
{
"epoch": 1.6330166270783848,
"grad_norm": 5.85172700881958,
"learning_rate": 2.5359237536656894e-05,
"loss": 0.1363,
"step": 4125
},
{
"epoch": 1.6429136975455265,
"grad_norm": 1.1925976276397705,
"learning_rate": 2.5175953079178887e-05,
"loss": 0.1604,
"step": 4150
},
{
"epoch": 1.6528107680126682,
"grad_norm": 8.975228309631348,
"learning_rate": 2.4992668621700883e-05,
"loss": 0.1534,
"step": 4175
},
{
"epoch": 1.66270783847981,
"grad_norm": 3.5665903091430664,
"learning_rate": 2.4809384164222876e-05,
"loss": 0.1466,
"step": 4200
},
{
"epoch": 1.6726049089469517,
"grad_norm": 5.208387851715088,
"learning_rate": 2.462609970674487e-05,
"loss": 0.101,
"step": 4225
},
{
"epoch": 1.6825019794140934,
"grad_norm": 8.346717834472656,
"learning_rate": 2.444281524926686e-05,
"loss": 0.0966,
"step": 4250
},
{
"epoch": 1.6923990498812351,
"grad_norm": 1.0066956281661987,
"learning_rate": 2.4259530791788858e-05,
"loss": 0.1251,
"step": 4275
},
{
"epoch": 1.7022961203483769,
"grad_norm": 8.269057273864746,
"learning_rate": 2.407624633431085e-05,
"loss": 0.1595,
"step": 4300
},
{
"epoch": 1.7121931908155186,
"grad_norm": 6.28223180770874,
"learning_rate": 2.3892961876832843e-05,
"loss": 0.1732,
"step": 4325
},
{
"epoch": 1.7220902612826603,
"grad_norm": 5.962674140930176,
"learning_rate": 2.370967741935484e-05,
"loss": 0.1601,
"step": 4350
},
{
"epoch": 1.731987331749802,
"grad_norm": 4.525330066680908,
"learning_rate": 2.3526392961876832e-05,
"loss": 0.1427,
"step": 4375
},
{
"epoch": 1.7418844022169437,
"grad_norm": 9.384072303771973,
"learning_rate": 2.334310850439883e-05,
"loss": 0.1489,
"step": 4400
},
{
"epoch": 1.7517814726840855,
"grad_norm": 16.098506927490234,
"learning_rate": 2.315982404692082e-05,
"loss": 0.1101,
"step": 4425
},
{
"epoch": 1.7616785431512272,
"grad_norm": 4.9497480392456055,
"learning_rate": 2.2976539589442817e-05,
"loss": 0.1364,
"step": 4450
},
{
"epoch": 1.771575613618369,
"grad_norm": 4.449967384338379,
"learning_rate": 2.279325513196481e-05,
"loss": 0.1799,
"step": 4475
},
{
"epoch": 1.7814726840855108,
"grad_norm": 3.7315053939819336,
"learning_rate": 2.2609970674486806e-05,
"loss": 0.1342,
"step": 4500
},
{
"epoch": 1.7913697545526523,
"grad_norm": 3.46779727935791,
"learning_rate": 2.24266862170088e-05,
"loss": 0.1348,
"step": 4525
},
{
"epoch": 1.8012668250197943,
"grad_norm": 6.9282402992248535,
"learning_rate": 2.2243401759530792e-05,
"loss": 0.1259,
"step": 4550
},
{
"epoch": 1.8111638954869358,
"grad_norm": 6.039886951446533,
"learning_rate": 2.2060117302052788e-05,
"loss": 0.1177,
"step": 4575
},
{
"epoch": 1.8210609659540777,
"grad_norm": 8.866342544555664,
"learning_rate": 2.187683284457478e-05,
"loss": 0.0904,
"step": 4600
},
{
"epoch": 1.8309580364212192,
"grad_norm": 11.28046989440918,
"learning_rate": 2.1693548387096777e-05,
"loss": 0.1127,
"step": 4625
},
{
"epoch": 1.8408551068883612,
"grad_norm": 0.10267776250839233,
"learning_rate": 2.1510263929618766e-05,
"loss": 0.1338,
"step": 4650
},
{
"epoch": 1.8507521773555027,
"grad_norm": 4.542361259460449,
"learning_rate": 2.1326979472140763e-05,
"loss": 0.1465,
"step": 4675
},
{
"epoch": 1.8606492478226446,
"grad_norm": 6.66448974609375,
"learning_rate": 2.1143695014662755e-05,
"loss": 0.1373,
"step": 4700
},
{
"epoch": 1.8705463182897861,
"grad_norm": 5.7664690017700195,
"learning_rate": 2.096041055718475e-05,
"loss": 0.1329,
"step": 4725
},
{
"epoch": 1.880443388756928,
"grad_norm": 6.261977195739746,
"learning_rate": 2.0777126099706744e-05,
"loss": 0.0891,
"step": 4750
},
{
"epoch": 1.8903404592240696,
"grad_norm": 0.11381009221076965,
"learning_rate": 2.059384164222874e-05,
"loss": 0.1099,
"step": 4775
},
{
"epoch": 1.9002375296912115,
"grad_norm": 2.1505606174468994,
"learning_rate": 2.0410557184750733e-05,
"loss": 0.157,
"step": 4800
},
{
"epoch": 1.910134600158353,
"grad_norm": 8.369518280029297,
"learning_rate": 2.022727272727273e-05,
"loss": 0.1188,
"step": 4825
},
{
"epoch": 1.920031670625495,
"grad_norm": 14.388636589050293,
"learning_rate": 2.0043988269794722e-05,
"loss": 0.1404,
"step": 4850
},
{
"epoch": 1.9299287410926365,
"grad_norm": 0.20757636427879333,
"learning_rate": 1.9860703812316715e-05,
"loss": 0.1269,
"step": 4875
},
{
"epoch": 1.9398258115597784,
"grad_norm": 4.5515875816345215,
"learning_rate": 1.967741935483871e-05,
"loss": 0.0991,
"step": 4900
},
{
"epoch": 1.94972288202692,
"grad_norm": 0.5187767744064331,
"learning_rate": 1.9494134897360704e-05,
"loss": 0.0744,
"step": 4925
},
{
"epoch": 1.9596199524940618,
"grad_norm": 7.752375602722168,
"learning_rate": 1.93108504398827e-05,
"loss": 0.1262,
"step": 4950
},
{
"epoch": 1.9695170229612033,
"grad_norm": 4.192614555358887,
"learning_rate": 1.9127565982404693e-05,
"loss": 0.1082,
"step": 4975
},
{
"epoch": 1.9794140934283453,
"grad_norm": 12.195773124694824,
"learning_rate": 1.894428152492669e-05,
"loss": 0.118,
"step": 5000
},
{
"epoch": 1.9893111638954868,
"grad_norm": 4.470797061920166,
"learning_rate": 1.8760997067448682e-05,
"loss": 0.1131,
"step": 5025
},
{
"epoch": 1.9992082343626287,
"grad_norm": 2.6608407497406006,
"learning_rate": 1.8577712609970678e-05,
"loss": 0.1176,
"step": 5050
},
{
"epoch": 2.0,
"eval_accuracy": 0.962,
"eval_f1_macro": 0.9527593111357376,
"eval_f1_micro": 0.962,
"eval_f1_weighted": 0.961887560772812,
"eval_loss": 0.16408780217170715,
"eval_precision_macro": 0.9611804451780929,
"eval_precision_micro": 0.962,
"eval_precision_weighted": 0.9624429314047246,
"eval_recall_macro": 0.9460711525998422,
"eval_recall_micro": 0.962,
"eval_recall_weighted": 0.962,
"eval_runtime": 5.1023,
"eval_samples_per_second": 979.953,
"eval_steps_per_second": 15.483,
"step": 5052
},
{
"epoch": 2.0091053048297702,
"grad_norm": 6.416041374206543,
"learning_rate": 1.8394428152492667e-05,
"loss": 0.0793,
"step": 5075
},
{
"epoch": 2.019002375296912,
"grad_norm": 9.853547096252441,
"learning_rate": 1.8211143695014664e-05,
"loss": 0.0642,
"step": 5100
},
{
"epoch": 2.0288994457640537,
"grad_norm": 9.938668251037598,
"learning_rate": 1.8027859237536656e-05,
"loss": 0.0778,
"step": 5125
},
{
"epoch": 2.0387965162311956,
"grad_norm": 0.6218538880348206,
"learning_rate": 1.7844574780058653e-05,
"loss": 0.07,
"step": 5150
},
{
"epoch": 2.048693586698337,
"grad_norm": 7.014169216156006,
"learning_rate": 1.7661290322580645e-05,
"loss": 0.0591,
"step": 5175
},
{
"epoch": 2.058590657165479,
"grad_norm": 0.011782053858041763,
"learning_rate": 1.7478005865102638e-05,
"loss": 0.0638,
"step": 5200
},
{
"epoch": 2.0684877276326206,
"grad_norm": 2.117039918899536,
"learning_rate": 1.7294721407624634e-05,
"loss": 0.0571,
"step": 5225
},
{
"epoch": 2.0783847980997625,
"grad_norm": 6.522469997406006,
"learning_rate": 1.7111436950146627e-05,
"loss": 0.0623,
"step": 5250
},
{
"epoch": 2.088281868566904,
"grad_norm": 0.01880364678800106,
"learning_rate": 1.6928152492668623e-05,
"loss": 0.0487,
"step": 5275
},
{
"epoch": 2.098178939034046,
"grad_norm": 0.3510414958000183,
"learning_rate": 1.6744868035190616e-05,
"loss": 0.0678,
"step": 5300
},
{
"epoch": 2.1080760095011875,
"grad_norm": 0.7997303009033203,
"learning_rate": 1.6561583577712612e-05,
"loss": 0.0633,
"step": 5325
},
{
"epoch": 2.1179730799683294,
"grad_norm": 0.026792127639055252,
"learning_rate": 1.6378299120234605e-05,
"loss": 0.0861,
"step": 5350
},
{
"epoch": 2.127870150435471,
"grad_norm": 0.4307959973812103,
"learning_rate": 1.61950146627566e-05,
"loss": 0.0593,
"step": 5375
},
{
"epoch": 2.137767220902613,
"grad_norm": 0.2015238106250763,
"learning_rate": 1.6011730205278594e-05,
"loss": 0.0576,
"step": 5400
},
{
"epoch": 2.147664291369755,
"grad_norm": 0.7783300876617432,
"learning_rate": 1.5828445747800587e-05,
"loss": 0.0734,
"step": 5425
},
{
"epoch": 2.1575613618368963,
"grad_norm": 0.03129582852125168,
"learning_rate": 1.5645161290322583e-05,
"loss": 0.0778,
"step": 5450
},
{
"epoch": 2.167458432304038,
"grad_norm": 3.1593480110168457,
"learning_rate": 1.5461876832844576e-05,
"loss": 0.0872,
"step": 5475
},
{
"epoch": 2.1773555027711797,
"grad_norm": 0.04930011183023453,
"learning_rate": 1.527859237536657e-05,
"loss": 0.0458,
"step": 5500
},
{
"epoch": 2.1872525732383217,
"grad_norm": 8.676435470581055,
"learning_rate": 1.5095307917888563e-05,
"loss": 0.0268,
"step": 5525
},
{
"epoch": 2.197149643705463,
"grad_norm": 13.317243576049805,
"learning_rate": 1.4912023460410557e-05,
"loss": 0.0375,
"step": 5550
},
{
"epoch": 2.2070467141726047,
"grad_norm": 6.073598861694336,
"learning_rate": 1.4728739002932552e-05,
"loss": 0.045,
"step": 5575
},
{
"epoch": 2.2169437846397466,
"grad_norm": 1.5578258037567139,
"learning_rate": 1.4545454545454545e-05,
"loss": 0.0858,
"step": 5600
},
{
"epoch": 2.2268408551068886,
"grad_norm": 1.701669692993164,
"learning_rate": 1.4362170087976539e-05,
"loss": 0.0505,
"step": 5625
},
{
"epoch": 2.23673792557403,
"grad_norm": 9.207208633422852,
"learning_rate": 1.4178885630498534e-05,
"loss": 0.0729,
"step": 5650
},
{
"epoch": 2.246634996041172,
"grad_norm": 0.026624349877238274,
"learning_rate": 1.3995601173020528e-05,
"loss": 0.0368,
"step": 5675
},
{
"epoch": 2.2565320665083135,
"grad_norm": 0.5754280686378479,
"learning_rate": 1.3812316715542523e-05,
"loss": 0.0518,
"step": 5700
},
{
"epoch": 2.2664291369754554,
"grad_norm": 0.3010414242744446,
"learning_rate": 1.3629032258064517e-05,
"loss": 0.0675,
"step": 5725
},
{
"epoch": 2.276326207442597,
"grad_norm": 0.0495084747672081,
"learning_rate": 1.3445747800586511e-05,
"loss": 0.0861,
"step": 5750
},
{
"epoch": 2.286223277909739,
"grad_norm": 2.4626643657684326,
"learning_rate": 1.3262463343108506e-05,
"loss": 0.0792,
"step": 5775
},
{
"epoch": 2.2961203483768804,
"grad_norm": 0.9338565468788147,
"learning_rate": 1.30791788856305e-05,
"loss": 0.0385,
"step": 5800
},
{
"epoch": 2.3060174188440223,
"grad_norm": 3.80712628364563,
"learning_rate": 1.2895894428152493e-05,
"loss": 0.0907,
"step": 5825
},
{
"epoch": 2.315914489311164,
"grad_norm": 6.940539836883545,
"learning_rate": 1.2712609970674488e-05,
"loss": 0.0802,
"step": 5850
},
{
"epoch": 2.325811559778306,
"grad_norm": 4.521027088165283,
"learning_rate": 1.2529325513196482e-05,
"loss": 0.0752,
"step": 5875
},
{
"epoch": 2.3357086302454473,
"grad_norm": 1.9812321662902832,
"learning_rate": 1.2346041055718475e-05,
"loss": 0.082,
"step": 5900
},
{
"epoch": 2.3456057007125892,
"grad_norm": 11.957037925720215,
"learning_rate": 1.216275659824047e-05,
"loss": 0.0492,
"step": 5925
},
{
"epoch": 2.3555027711797307,
"grad_norm": 0.16896741092205048,
"learning_rate": 1.1979472140762464e-05,
"loss": 0.0519,
"step": 5950
},
{
"epoch": 2.3653998416468727,
"grad_norm": 0.6698777675628662,
"learning_rate": 1.1796187683284458e-05,
"loss": 0.1032,
"step": 5975
},
{
"epoch": 2.375296912114014,
"grad_norm": 13.273818969726562,
"learning_rate": 1.1612903225806453e-05,
"loss": 0.0844,
"step": 6000
},
{
"epoch": 2.385193982581156,
"grad_norm": 2.729861259460449,
"learning_rate": 1.1429618768328447e-05,
"loss": 0.0663,
"step": 6025
},
{
"epoch": 2.3950910530482976,
"grad_norm": 15.863778114318848,
"learning_rate": 1.124633431085044e-05,
"loss": 0.039,
"step": 6050
},
{
"epoch": 2.4049881235154396,
"grad_norm": 8.72951602935791,
"learning_rate": 1.1063049853372435e-05,
"loss": 0.0667,
"step": 6075
},
{
"epoch": 2.414885193982581,
"grad_norm": 0.06009228155016899,
"learning_rate": 1.0879765395894429e-05,
"loss": 0.0962,
"step": 6100
},
{
"epoch": 2.424782264449723,
"grad_norm": 1.6163275241851807,
"learning_rate": 1.0696480938416424e-05,
"loss": 0.0765,
"step": 6125
},
{
"epoch": 2.4346793349168645,
"grad_norm": 0.023228373378515244,
"learning_rate": 1.0513196480938416e-05,
"loss": 0.0786,
"step": 6150
},
{
"epoch": 2.4445764053840064,
"grad_norm": 0.010257094167172909,
"learning_rate": 1.032991202346041e-05,
"loss": 0.0398,
"step": 6175
},
{
"epoch": 2.454473475851148,
"grad_norm": 5.985715389251709,
"learning_rate": 1.0146627565982405e-05,
"loss": 0.0757,
"step": 6200
},
{
"epoch": 2.46437054631829,
"grad_norm": 0.06866980344057083,
"learning_rate": 9.9633431085044e-06,
"loss": 0.0554,
"step": 6225
},
{
"epoch": 2.4742676167854314,
"grad_norm": 4.456401348114014,
"learning_rate": 9.780058651026392e-06,
"loss": 0.0496,
"step": 6250
},
{
"epoch": 2.4841646872525733,
"grad_norm": 17.26448631286621,
"learning_rate": 9.596774193548387e-06,
"loss": 0.0679,
"step": 6275
},
{
"epoch": 2.494061757719715,
"grad_norm": 8.371593475341797,
"learning_rate": 9.413489736070381e-06,
"loss": 0.0745,
"step": 6300
},
{
"epoch": 2.5039588281868568,
"grad_norm": 0.02239610068500042,
"learning_rate": 9.230205278592376e-06,
"loss": 0.0557,
"step": 6325
},
{
"epoch": 2.5138558986539983,
"grad_norm": 0.046766772866249084,
"learning_rate": 9.04692082111437e-06,
"loss": 0.0438,
"step": 6350
},
{
"epoch": 2.52375296912114,
"grad_norm": 8.119983673095703,
"learning_rate": 8.863636363636365e-06,
"loss": 0.0713,
"step": 6375
},
{
"epoch": 2.5336500395882817,
"grad_norm": 0.0916699767112732,
"learning_rate": 8.68035190615836e-06,
"loss": 0.1,
"step": 6400
},
{
"epoch": 2.5435471100554237,
"grad_norm": 6.661031246185303,
"learning_rate": 8.497067448680352e-06,
"loss": 0.0949,
"step": 6425
},
{
"epoch": 2.553444180522565,
"grad_norm": 5.773529529571533,
"learning_rate": 8.313782991202347e-06,
"loss": 0.0834,
"step": 6450
},
{
"epoch": 2.563341250989707,
"grad_norm": 19.465478897094727,
"learning_rate": 8.13049853372434e-06,
"loss": 0.0927,
"step": 6475
},
{
"epoch": 2.5732383214568486,
"grad_norm": 9.270087242126465,
"learning_rate": 7.947214076246334e-06,
"loss": 0.0591,
"step": 6500
},
{
"epoch": 2.5831353919239906,
"grad_norm": 1.235212802886963,
"learning_rate": 7.763929618768328e-06,
"loss": 0.1145,
"step": 6525
},
{
"epoch": 2.5930324623911325,
"grad_norm": 0.10507909208536148,
"learning_rate": 7.580645161290323e-06,
"loss": 0.0928,
"step": 6550
},
{
"epoch": 2.602929532858274,
"grad_norm": 1.12295663356781,
"learning_rate": 7.397360703812317e-06,
"loss": 0.0602,
"step": 6575
},
{
"epoch": 2.6128266033254155,
"grad_norm": 8.503447532653809,
"learning_rate": 7.214076246334312e-06,
"loss": 0.077,
"step": 6600
},
{
"epoch": 2.6227236737925574,
"grad_norm": 0.9171582460403442,
"learning_rate": 7.030791788856305e-06,
"loss": 0.048,
"step": 6625
},
{
"epoch": 2.6326207442596994,
"grad_norm": 0.28691366314888,
"learning_rate": 6.8475073313783e-06,
"loss": 0.0548,
"step": 6650
},
{
"epoch": 2.642517814726841,
"grad_norm": 2.4092676639556885,
"learning_rate": 6.664222873900293e-06,
"loss": 0.0654,
"step": 6675
},
{
"epoch": 2.6524148851939824,
"grad_norm": 0.248480886220932,
"learning_rate": 6.480938416422287e-06,
"loss": 0.0776,
"step": 6700
},
{
"epoch": 2.6623119556611243,
"grad_norm": 1.0501718521118164,
"learning_rate": 6.2976539589442816e-06,
"loss": 0.0749,
"step": 6725
},
{
"epoch": 2.6722090261282663,
"grad_norm": 0.16996045410633087,
"learning_rate": 6.114369501466276e-06,
"loss": 0.0448,
"step": 6750
},
{
"epoch": 2.6821060965954078,
"grad_norm": 0.015705592930316925,
"learning_rate": 5.93108504398827e-06,
"loss": 0.043,
"step": 6775
},
{
"epoch": 2.6920031670625493,
"grad_norm": 13.008760452270508,
"learning_rate": 5.747800586510264e-06,
"loss": 0.045,
"step": 6800
},
{
"epoch": 2.701900237529691,
"grad_norm": 0.49796026945114136,
"learning_rate": 5.564516129032259e-06,
"loss": 0.0387,
"step": 6825
},
{
"epoch": 2.711797307996833,
"grad_norm": 6.566326141357422,
"learning_rate": 5.381231671554252e-06,
"loss": 0.0412,
"step": 6850
},
{
"epoch": 2.7216943784639747,
"grad_norm": 0.014399710111320019,
"learning_rate": 5.197947214076247e-06,
"loss": 0.0596,
"step": 6875
},
{
"epoch": 2.731591448931116,
"grad_norm": 0.3872062563896179,
"learning_rate": 5.014662756598241e-06,
"loss": 0.0523,
"step": 6900
},
{
"epoch": 2.741488519398258,
"grad_norm": 0.1425359696149826,
"learning_rate": 4.831378299120235e-06,
"loss": 0.0461,
"step": 6925
},
{
"epoch": 2.7513855898654,
"grad_norm": 0.4896790683269501,
"learning_rate": 4.6480938416422284e-06,
"loss": 0.0616,
"step": 6950
},
{
"epoch": 2.7612826603325415,
"grad_norm": 6.286714553833008,
"learning_rate": 4.464809384164223e-06,
"loss": 0.0896,
"step": 6975
},
{
"epoch": 2.771179730799683,
"grad_norm": 0.058547962456941605,
"learning_rate": 4.281524926686217e-06,
"loss": 0.0648,
"step": 7000
},
{
"epoch": 2.781076801266825,
"grad_norm": 0.3147684335708618,
"learning_rate": 4.098240469208212e-06,
"loss": 0.0701,
"step": 7025
},
{
"epoch": 2.790973871733967,
"grad_norm": 2.779256582260132,
"learning_rate": 3.9149560117302055e-06,
"loss": 0.0352,
"step": 7050
},
{
"epoch": 2.8008709422011084,
"grad_norm": 0.26303109526634216,
"learning_rate": 3.7316715542521995e-06,
"loss": 0.0663,
"step": 7075
},
{
"epoch": 2.81076801266825,
"grad_norm": 11.363053321838379,
"learning_rate": 3.5483870967741936e-06,
"loss": 0.0883,
"step": 7100
},
{
"epoch": 2.820665083135392,
"grad_norm": 0.36071789264678955,
"learning_rate": 3.365102639296188e-06,
"loss": 0.0762,
"step": 7125
},
{
"epoch": 2.830562153602534,
"grad_norm": 0.014336947351694107,
"learning_rate": 3.1818181818181817e-06,
"loss": 0.0433,
"step": 7150
},
{
"epoch": 2.8404592240696753,
"grad_norm": 8.244430541992188,
"learning_rate": 2.9985337243401757e-06,
"loss": 0.0436,
"step": 7175
},
{
"epoch": 2.850356294536817,
"grad_norm": 0.018452562391757965,
"learning_rate": 2.8152492668621702e-06,
"loss": 0.0478,
"step": 7200
},
{
"epoch": 2.8602533650039588,
"grad_norm": 0.9135558605194092,
"learning_rate": 2.6319648093841647e-06,
"loss": 0.0582,
"step": 7225
},
{
"epoch": 2.8701504354711007,
"grad_norm": 0.03040502220392227,
"learning_rate": 2.4486803519061583e-06,
"loss": 0.0353,
"step": 7250
},
{
"epoch": 2.880047505938242,
"grad_norm": 0.026754941791296005,
"learning_rate": 2.265395894428153e-06,
"loss": 0.0769,
"step": 7275
},
{
"epoch": 2.889944576405384,
"grad_norm": 0.09019900858402252,
"learning_rate": 2.082111436950147e-06,
"loss": 0.0506,
"step": 7300
},
{
"epoch": 2.8998416468725257,
"grad_norm": 0.04084889218211174,
"learning_rate": 1.8988269794721409e-06,
"loss": 0.0411,
"step": 7325
},
{
"epoch": 2.9097387173396676,
"grad_norm": 2.3881072998046875,
"learning_rate": 1.715542521994135e-06,
"loss": 0.0549,
"step": 7350
},
{
"epoch": 2.919635787806809,
"grad_norm": 7.836863040924072,
"learning_rate": 1.532258064516129e-06,
"loss": 0.0514,
"step": 7375
},
{
"epoch": 2.929532858273951,
"grad_norm": 1.4827078580856323,
"learning_rate": 1.3489736070381233e-06,
"loss": 0.0608,
"step": 7400
},
{
"epoch": 2.9394299287410925,
"grad_norm": 0.012712684459984303,
"learning_rate": 1.1656891495601175e-06,
"loss": 0.0542,
"step": 7425
},
{
"epoch": 2.9493269992082345,
"grad_norm": 6.55382776260376,
"learning_rate": 9.824046920821116e-07,
"loss": 0.0518,
"step": 7450
},
{
"epoch": 2.959224069675376,
"grad_norm": 0.21079152822494507,
"learning_rate": 7.991202346041056e-07,
"loss": 0.0618,
"step": 7475
},
{
"epoch": 2.969121140142518,
"grad_norm": 4.107754707336426,
"learning_rate": 6.158357771260998e-07,
"loss": 0.0368,
"step": 7500
},
{
"epoch": 2.9790182106096594,
"grad_norm": 0.4540683925151825,
"learning_rate": 4.325513196480939e-07,
"loss": 0.0595,
"step": 7525
},
{
"epoch": 2.9889152810768014,
"grad_norm": 0.416092187166214,
"learning_rate": 2.4926686217008803e-07,
"loss": 0.0626,
"step": 7550
},
{
"epoch": 2.998812351543943,
"grad_norm": 3.699631452560425,
"learning_rate": 6.598240469208211e-08,
"loss": 0.0729,
"step": 7575
},
{
"epoch": 3.0,
"eval_accuracy": 0.9694,
"eval_f1_macro": 0.9613886456444749,
"eval_f1_micro": 0.9694,
"eval_f1_weighted": 0.9693030681223207,
"eval_loss": 0.157407745718956,
"eval_precision_macro": 0.9679892485977634,
"eval_precision_micro": 0.9694,
"eval_precision_weighted": 0.9695713537396466,
"eval_recall_macro": 0.9560667596679707,
"eval_recall_micro": 0.9694,
"eval_recall_weighted": 0.9694,
"eval_runtime": 5.0753,
"eval_samples_per_second": 985.172,
"eval_steps_per_second": 15.566,
"step": 7578
}
],
"logging_steps": 25,
"max_steps": 7578,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.381368787756646e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}