{ "best_metric": 0.157407745718956, "best_model_checkpoint": "hlbooks-topic-classifier-bert-multilingual-uncased/checkpoint-7578", "epoch": 3.0, "eval_steps": 500, "global_step": 7578, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009897070467141725, "grad_norm": 3.174437999725342, "learning_rate": 1.5171503957783642e-06, "loss": 3.0686, "step": 25 }, { "epoch": 0.01979414093428345, "grad_norm": 5.321998119354248, "learning_rate": 3.1002638522427443e-06, "loss": 2.9657, "step": 50 }, { "epoch": 0.029691211401425176, "grad_norm": 7.038234710693359, "learning_rate": 4.617414248021108e-06, "loss": 2.8352, "step": 75 }, { "epoch": 0.0395882818685669, "grad_norm": 6.537544250488281, "learning_rate": 6.200527704485489e-06, "loss": 2.738, "step": 100 }, { "epoch": 0.04948535233570863, "grad_norm": 6.735942840576172, "learning_rate": 7.849604221635884e-06, "loss": 2.6217, "step": 125 }, { "epoch": 0.05938242280285035, "grad_norm": 5.756272792816162, "learning_rate": 9.49868073878628e-06, "loss": 2.4216, "step": 150 }, { "epoch": 0.06927949326999208, "grad_norm": 5.069972991943359, "learning_rate": 1.1147757255936676e-05, "loss": 2.2817, "step": 175 }, { "epoch": 0.0791765637371338, "grad_norm": 8.664924621582031, "learning_rate": 1.2796833773087072e-05, "loss": 2.1123, "step": 200 }, { "epoch": 0.08907363420427554, "grad_norm": 9.564645767211914, "learning_rate": 1.4445910290237468e-05, "loss": 1.8298, "step": 225 }, { "epoch": 0.09897070467141726, "grad_norm": 18.612655639648438, "learning_rate": 1.6094986807387864e-05, "loss": 1.4781, "step": 250 }, { "epoch": 0.10886777513855898, "grad_norm": 7.96259069442749, "learning_rate": 1.774406332453826e-05, "loss": 1.176, "step": 275 }, { "epoch": 0.1187648456057007, "grad_norm": 9.508382797241211, "learning_rate": 1.9393139841688653e-05, "loss": 1.038, "step": 300 }, { "epoch": 0.12866191607284244, "grad_norm": 9.386768341064453, "learning_rate": 2.104221635883905e-05, "loss": 0.9101, "step": 325 }, { "epoch": 0.13855898653998416, "grad_norm": 8.491036415100098, "learning_rate": 2.269129287598945e-05, "loss": 0.8534, "step": 350 }, { "epoch": 0.14845605700712589, "grad_norm": 13.790663719177246, "learning_rate": 2.4340369393139843e-05, "loss": 0.6119, "step": 375 }, { "epoch": 0.1583531274742676, "grad_norm": 14.771766662597656, "learning_rate": 2.5989445910290237e-05, "loss": 0.6075, "step": 400 }, { "epoch": 0.16825019794140933, "grad_norm": 5.174952030181885, "learning_rate": 2.763852242744063e-05, "loss": 0.6037, "step": 425 }, { "epoch": 0.17814726840855108, "grad_norm": 12.023221015930176, "learning_rate": 2.9287598944591033e-05, "loss": 0.5757, "step": 450 }, { "epoch": 0.1880443388756928, "grad_norm": 6.669355869293213, "learning_rate": 3.093667546174143e-05, "loss": 0.5343, "step": 475 }, { "epoch": 0.19794140934283452, "grad_norm": 11.855779647827148, "learning_rate": 3.258575197889182e-05, "loss": 0.5008, "step": 500 }, { "epoch": 0.20783847980997625, "grad_norm": 3.5531342029571533, "learning_rate": 3.423482849604222e-05, "loss": 0.4706, "step": 525 }, { "epoch": 0.21773555027711797, "grad_norm": 26.04022216796875, "learning_rate": 3.588390501319262e-05, "loss": 0.483, "step": 550 }, { "epoch": 0.2276326207442597, "grad_norm": 7.481447696685791, "learning_rate": 3.753298153034301e-05, "loss": 0.4332, "step": 575 }, { "epoch": 0.2375296912114014, "grad_norm": 18.177900314331055, "learning_rate": 3.9182058047493406e-05, "loss": 0.444, "step": 600 }, { "epoch": 0.24742676167854316, "grad_norm": 10.481646537780762, "learning_rate": 4.08311345646438e-05, "loss": 0.3346, "step": 625 }, { "epoch": 0.2573238321456849, "grad_norm": 15.196333885192871, "learning_rate": 4.2480211081794194e-05, "loss": 0.3647, "step": 650 }, { "epoch": 0.2672209026128266, "grad_norm": 8.303031921386719, "learning_rate": 4.412928759894459e-05, "loss": 0.4139, "step": 675 }, { "epoch": 0.2771179730799683, "grad_norm": 9.322664260864258, "learning_rate": 4.577836411609499e-05, "loss": 0.4656, "step": 700 }, { "epoch": 0.28701504354711005, "grad_norm": 7.468392848968506, "learning_rate": 4.7427440633245384e-05, "loss": 0.3996, "step": 725 }, { "epoch": 0.29691211401425177, "grad_norm": 7.190126895904541, "learning_rate": 4.907651715039578e-05, "loss": 0.3771, "step": 750 }, { "epoch": 0.3068091844813935, "grad_norm": 8.512736320495605, "learning_rate": 4.991935483870968e-05, "loss": 0.4247, "step": 775 }, { "epoch": 0.3167062549485352, "grad_norm": 15.00452995300293, "learning_rate": 4.973607038123168e-05, "loss": 0.3576, "step": 800 }, { "epoch": 0.32660332541567694, "grad_norm": 19.776445388793945, "learning_rate": 4.955278592375367e-05, "loss": 0.347, "step": 825 }, { "epoch": 0.33650039588281866, "grad_norm": 9.140093803405762, "learning_rate": 4.9369501466275664e-05, "loss": 0.3799, "step": 850 }, { "epoch": 0.34639746634996044, "grad_norm": 12.198457717895508, "learning_rate": 4.918621700879766e-05, "loss": 0.4086, "step": 875 }, { "epoch": 0.35629453681710216, "grad_norm": 3.6618921756744385, "learning_rate": 4.900293255131965e-05, "loss": 0.3457, "step": 900 }, { "epoch": 0.3661916072842439, "grad_norm": 17.983041763305664, "learning_rate": 4.881964809384165e-05, "loss": 0.364, "step": 925 }, { "epoch": 0.3760886777513856, "grad_norm": 0.4606013000011444, "learning_rate": 4.863636363636364e-05, "loss": 0.2662, "step": 950 }, { "epoch": 0.3859857482185273, "grad_norm": 5.343178749084473, "learning_rate": 4.8453079178885635e-05, "loss": 0.3289, "step": 975 }, { "epoch": 0.39588281868566905, "grad_norm": 11.04477596282959, "learning_rate": 4.826979472140763e-05, "loss": 0.3723, "step": 1000 }, { "epoch": 0.40577988915281077, "grad_norm": 8.415637016296387, "learning_rate": 4.808651026392962e-05, "loss": 0.29, "step": 1025 }, { "epoch": 0.4156769596199525, "grad_norm": 37.04660415649414, "learning_rate": 4.790322580645161e-05, "loss": 0.4181, "step": 1050 }, { "epoch": 0.4255740300870942, "grad_norm": 11.579482078552246, "learning_rate": 4.7719941348973606e-05, "loss": 0.2785, "step": 1075 }, { "epoch": 0.43547110055423593, "grad_norm": 8.857477188110352, "learning_rate": 4.75366568914956e-05, "loss": 0.3266, "step": 1100 }, { "epoch": 0.44536817102137766, "grad_norm": 5.317532539367676, "learning_rate": 4.73533724340176e-05, "loss": 0.344, "step": 1125 }, { "epoch": 0.4552652414885194, "grad_norm": 4.178307056427002, "learning_rate": 4.717008797653959e-05, "loss": 0.3613, "step": 1150 }, { "epoch": 0.4651623119556611, "grad_norm": 8.49862003326416, "learning_rate": 4.6986803519061584e-05, "loss": 0.2527, "step": 1175 }, { "epoch": 0.4750593824228028, "grad_norm": 0.5914684534072876, "learning_rate": 4.6803519061583577e-05, "loss": 0.258, "step": 1200 }, { "epoch": 0.4849564528899446, "grad_norm": 3.90535306930542, "learning_rate": 4.662023460410557e-05, "loss": 0.3091, "step": 1225 }, { "epoch": 0.4948535233570863, "grad_norm": 7.211574077606201, "learning_rate": 4.643695014662757e-05, "loss": 0.3302, "step": 1250 }, { "epoch": 0.504750593824228, "grad_norm": 9.782793045043945, "learning_rate": 4.625366568914956e-05, "loss": 0.3487, "step": 1275 }, { "epoch": 0.5146476642913698, "grad_norm": 9.339737892150879, "learning_rate": 4.6070381231671554e-05, "loss": 0.2164, "step": 1300 }, { "epoch": 0.5245447347585115, "grad_norm": 3.444295644760132, "learning_rate": 4.588709677419355e-05, "loss": 0.2874, "step": 1325 }, { "epoch": 0.5344418052256532, "grad_norm": 15.61107063293457, "learning_rate": 4.570381231671555e-05, "loss": 0.3161, "step": 1350 }, { "epoch": 0.5443388756927949, "grad_norm": 12.160496711730957, "learning_rate": 4.552052785923754e-05, "loss": 0.2242, "step": 1375 }, { "epoch": 0.5542359461599367, "grad_norm": 7.081170558929443, "learning_rate": 4.533724340175953e-05, "loss": 0.2625, "step": 1400 }, { "epoch": 0.5641330166270784, "grad_norm": 2.664806365966797, "learning_rate": 4.5153958944281525e-05, "loss": 0.2885, "step": 1425 }, { "epoch": 0.5740300870942201, "grad_norm": 5.956775188446045, "learning_rate": 4.497067448680352e-05, "loss": 0.3064, "step": 1450 }, { "epoch": 0.5839271575613618, "grad_norm": 10.270496368408203, "learning_rate": 4.478739002932552e-05, "loss": 0.2519, "step": 1475 }, { "epoch": 0.5938242280285035, "grad_norm": 11.354063987731934, "learning_rate": 4.460410557184751e-05, "loss": 0.2874, "step": 1500 }, { "epoch": 0.6037212984956453, "grad_norm": 4.400442600250244, "learning_rate": 4.44208211143695e-05, "loss": 0.248, "step": 1525 }, { "epoch": 0.613618368962787, "grad_norm": 5.145227909088135, "learning_rate": 4.4237536656891496e-05, "loss": 0.2757, "step": 1550 }, { "epoch": 0.6235154394299287, "grad_norm": 6.018128395080566, "learning_rate": 4.4054252199413495e-05, "loss": 0.2929, "step": 1575 }, { "epoch": 0.6334125098970704, "grad_norm": 11.10319709777832, "learning_rate": 4.387096774193549e-05, "loss": 0.2645, "step": 1600 }, { "epoch": 0.6433095803642122, "grad_norm": 4.846808433532715, "learning_rate": 4.368768328445748e-05, "loss": 0.1884, "step": 1625 }, { "epoch": 0.6532066508313539, "grad_norm": 20.15575408935547, "learning_rate": 4.3504398826979474e-05, "loss": 0.2996, "step": 1650 }, { "epoch": 0.6631037212984956, "grad_norm": 15.41659164428711, "learning_rate": 4.3321114369501466e-05, "loss": 0.2613, "step": 1675 }, { "epoch": 0.6730007917656373, "grad_norm": 9.153544425964355, "learning_rate": 4.3137829912023466e-05, "loss": 0.2867, "step": 1700 }, { "epoch": 0.6828978622327792, "grad_norm": 6.918684482574463, "learning_rate": 4.295454545454546e-05, "loss": 0.1998, "step": 1725 }, { "epoch": 0.6927949326999209, "grad_norm": 3.967953681945801, "learning_rate": 4.277126099706745e-05, "loss": 0.2632, "step": 1750 }, { "epoch": 0.7026920031670626, "grad_norm": 6.128458023071289, "learning_rate": 4.2587976539589444e-05, "loss": 0.2471, "step": 1775 }, { "epoch": 0.7125890736342043, "grad_norm": 3.5776052474975586, "learning_rate": 4.2404692082111444e-05, "loss": 0.2609, "step": 1800 }, { "epoch": 0.722486144101346, "grad_norm": 8.483747482299805, "learning_rate": 4.222140762463344e-05, "loss": 0.2468, "step": 1825 }, { "epoch": 0.7323832145684878, "grad_norm": 3.365809679031372, "learning_rate": 4.203812316715543e-05, "loss": 0.2166, "step": 1850 }, { "epoch": 0.7422802850356295, "grad_norm": 6.7934489250183105, "learning_rate": 4.1854838709677415e-05, "loss": 0.207, "step": 1875 }, { "epoch": 0.7521773555027712, "grad_norm": 10.689802169799805, "learning_rate": 4.1671554252199415e-05, "loss": 0.2137, "step": 1900 }, { "epoch": 0.7620744259699129, "grad_norm": 8.500500679016113, "learning_rate": 4.148826979472141e-05, "loss": 0.2068, "step": 1925 }, { "epoch": 0.7719714964370546, "grad_norm": 3.0795400142669678, "learning_rate": 4.13049853372434e-05, "loss": 0.2335, "step": 1950 }, { "epoch": 0.7818685669041964, "grad_norm": 6.6960768699646, "learning_rate": 4.112170087976539e-05, "loss": 0.3198, "step": 1975 }, { "epoch": 0.7917656373713381, "grad_norm": 9.185827255249023, "learning_rate": 4.093841642228739e-05, "loss": 0.2917, "step": 2000 }, { "epoch": 0.8016627078384798, "grad_norm": 10.589933395385742, "learning_rate": 4.0755131964809386e-05, "loss": 0.2764, "step": 2025 }, { "epoch": 0.8115597783056215, "grad_norm": 4.64451789855957, "learning_rate": 4.057184750733138e-05, "loss": 0.2737, "step": 2050 }, { "epoch": 0.8214568487727633, "grad_norm": 17.72431182861328, "learning_rate": 4.038856304985337e-05, "loss": 0.2484, "step": 2075 }, { "epoch": 0.831353919239905, "grad_norm": 10.842966079711914, "learning_rate": 4.0205278592375364e-05, "loss": 0.2172, "step": 2100 }, { "epoch": 0.8412509897070467, "grad_norm": 4.673035621643066, "learning_rate": 4.0021994134897364e-05, "loss": 0.2094, "step": 2125 }, { "epoch": 0.8511480601741884, "grad_norm": 7.97542142868042, "learning_rate": 3.9838709677419356e-05, "loss": 0.2674, "step": 2150 }, { "epoch": 0.8610451306413301, "grad_norm": 3.362852096557617, "learning_rate": 3.965542521994135e-05, "loss": 0.2136, "step": 2175 }, { "epoch": 0.8709422011084719, "grad_norm": 8.911001205444336, "learning_rate": 3.947214076246334e-05, "loss": 0.1915, "step": 2200 }, { "epoch": 0.8808392715756136, "grad_norm": 8.764837265014648, "learning_rate": 3.928885630498534e-05, "loss": 0.1691, "step": 2225 }, { "epoch": 0.8907363420427553, "grad_norm": 9.035571098327637, "learning_rate": 3.9105571847507334e-05, "loss": 0.2329, "step": 2250 }, { "epoch": 0.900633412509897, "grad_norm": 3.9743757247924805, "learning_rate": 3.892228739002933e-05, "loss": 0.2151, "step": 2275 }, { "epoch": 0.9105304829770388, "grad_norm": 4.488095283508301, "learning_rate": 3.873900293255132e-05, "loss": 0.2525, "step": 2300 }, { "epoch": 0.9204275534441805, "grad_norm": 7.458625316619873, "learning_rate": 3.855571847507331e-05, "loss": 0.225, "step": 2325 }, { "epoch": 0.9303246239113222, "grad_norm": 12.148482322692871, "learning_rate": 3.837243401759531e-05, "loss": 0.2642, "step": 2350 }, { "epoch": 0.9402216943784639, "grad_norm": 5.104764461517334, "learning_rate": 3.8189149560117305e-05, "loss": 0.2268, "step": 2375 }, { "epoch": 0.9501187648456056, "grad_norm": 4.338552951812744, "learning_rate": 3.80058651026393e-05, "loss": 0.217, "step": 2400 }, { "epoch": 0.9600158353127475, "grad_norm": 4.058621406555176, "learning_rate": 3.782258064516129e-05, "loss": 0.2058, "step": 2425 }, { "epoch": 0.9699129057798892, "grad_norm": 4.036930084228516, "learning_rate": 3.763929618768329e-05, "loss": 0.2721, "step": 2450 }, { "epoch": 0.9798099762470309, "grad_norm": 5.858448028564453, "learning_rate": 3.745601173020528e-05, "loss": 0.2379, "step": 2475 }, { "epoch": 0.9897070467141726, "grad_norm": 10.197368621826172, "learning_rate": 3.7272727272727276e-05, "loss": 0.2552, "step": 2500 }, { "epoch": 0.9996041171813144, "grad_norm": 3.942063093185425, "learning_rate": 3.708944281524927e-05, "loss": 0.1938, "step": 2525 }, { "epoch": 1.0, "eval_accuracy": 0.9472, "eval_f1_macro": 0.9359430887114246, "eval_f1_micro": 0.9472, "eval_f1_weighted": 0.947114042501874, "eval_loss": 0.20199425518512726, "eval_precision_macro": 0.9529085930911292, "eval_precision_micro": 0.9472, "eval_precision_weighted": 0.948726542737522, "eval_recall_macro": 0.9217770604016604, "eval_recall_micro": 0.9472, "eval_recall_weighted": 0.9472, "eval_runtime": 5.1028, "eval_samples_per_second": 979.852, "eval_steps_per_second": 15.482, "step": 2526 }, { "epoch": 1.009501187648456, "grad_norm": 2.8100507259368896, "learning_rate": 3.690615835777126e-05, "loss": 0.1958, "step": 2550 }, { "epoch": 1.0193982581155978, "grad_norm": 4.251704692840576, "learning_rate": 3.672287390029326e-05, "loss": 0.1684, "step": 2575 }, { "epoch": 1.0292953285827395, "grad_norm": 13.66088581085205, "learning_rate": 3.6539589442815254e-05, "loss": 0.1564, "step": 2600 }, { "epoch": 1.0391923990498813, "grad_norm": 6.338856220245361, "learning_rate": 3.6356304985337246e-05, "loss": 0.1673, "step": 2625 }, { "epoch": 1.049089469517023, "grad_norm": 0.7401424646377563, "learning_rate": 3.617302052785924e-05, "loss": 0.1579, "step": 2650 }, { "epoch": 1.0589865399841647, "grad_norm": 7.7578325271606445, "learning_rate": 3.598973607038124e-05, "loss": 0.1639, "step": 2675 }, { "epoch": 1.0688836104513064, "grad_norm": 10.281119346618652, "learning_rate": 3.580645161290323e-05, "loss": 0.1367, "step": 2700 }, { "epoch": 1.0787806809184481, "grad_norm": 9.214536666870117, "learning_rate": 3.562316715542522e-05, "loss": 0.1913, "step": 2725 }, { "epoch": 1.0886777513855899, "grad_norm": 7.096231937408447, "learning_rate": 3.543988269794721e-05, "loss": 0.1718, "step": 2750 }, { "epoch": 1.0985748218527316, "grad_norm": 1.1231356859207153, "learning_rate": 3.525659824046921e-05, "loss": 0.1623, "step": 2775 }, { "epoch": 1.1084718923198733, "grad_norm": 9.105703353881836, "learning_rate": 3.50733137829912e-05, "loss": 0.1516, "step": 2800 }, { "epoch": 1.118368962787015, "grad_norm": 2.2311670780181885, "learning_rate": 3.4890029325513195e-05, "loss": 0.1094, "step": 2825 }, { "epoch": 1.1282660332541568, "grad_norm": 5.654956817626953, "learning_rate": 3.470674486803519e-05, "loss": 0.2151, "step": 2850 }, { "epoch": 1.1381631037212985, "grad_norm": 0.6430861353874207, "learning_rate": 3.452346041055719e-05, "loss": 0.1211, "step": 2875 }, { "epoch": 1.1480601741884402, "grad_norm": 4.405457496643066, "learning_rate": 3.434017595307918e-05, "loss": 0.1062, "step": 2900 }, { "epoch": 1.157957244655582, "grad_norm": 3.2275402545928955, "learning_rate": 3.415689149560117e-05, "loss": 0.1378, "step": 2925 }, { "epoch": 1.1678543151227236, "grad_norm": 3.635753870010376, "learning_rate": 3.3973607038123166e-05, "loss": 0.1489, "step": 2950 }, { "epoch": 1.1777513855898654, "grad_norm": 2.695546865463257, "learning_rate": 3.379032258064516e-05, "loss": 0.1703, "step": 2975 }, { "epoch": 1.187648456057007, "grad_norm": 1.3425699472427368, "learning_rate": 3.360703812316716e-05, "loss": 0.1421, "step": 3000 }, { "epoch": 1.1975455265241488, "grad_norm": 11.01319408416748, "learning_rate": 3.342375366568915e-05, "loss": 0.1221, "step": 3025 }, { "epoch": 1.2074425969912905, "grad_norm": 12.686071395874023, "learning_rate": 3.3240469208211144e-05, "loss": 0.1699, "step": 3050 }, { "epoch": 1.2173396674584323, "grad_norm": 8.48775577545166, "learning_rate": 3.305718475073314e-05, "loss": 0.1505, "step": 3075 }, { "epoch": 1.227236737925574, "grad_norm": 5.593795299530029, "learning_rate": 3.2873900293255136e-05, "loss": 0.0906, "step": 3100 }, { "epoch": 1.2371338083927157, "grad_norm": 13.118975639343262, "learning_rate": 3.269061583577713e-05, "loss": 0.1389, "step": 3125 }, { "epoch": 1.2470308788598574, "grad_norm": 10.423203468322754, "learning_rate": 3.250733137829912e-05, "loss": 0.1789, "step": 3150 }, { "epoch": 1.2569279493269991, "grad_norm": 8.423437118530273, "learning_rate": 3.2324046920821115e-05, "loss": 0.1683, "step": 3175 }, { "epoch": 1.2668250197941409, "grad_norm": 1.197938323020935, "learning_rate": 3.214076246334311e-05, "loss": 0.1356, "step": 3200 }, { "epoch": 1.2767220902612826, "grad_norm": 4.034856796264648, "learning_rate": 3.195747800586511e-05, "loss": 0.2099, "step": 3225 }, { "epoch": 1.2866191607284243, "grad_norm": 6.324892997741699, "learning_rate": 3.17741935483871e-05, "loss": 0.1658, "step": 3250 }, { "epoch": 1.2965162311955662, "grad_norm": 7.48460054397583, "learning_rate": 3.159090909090909e-05, "loss": 0.131, "step": 3275 }, { "epoch": 1.3064133016627077, "grad_norm": 21.121984481811523, "learning_rate": 3.1407624633431085e-05, "loss": 0.1781, "step": 3300 }, { "epoch": 1.3163103721298497, "grad_norm": 3.5356669425964355, "learning_rate": 3.1224340175953085e-05, "loss": 0.1873, "step": 3325 }, { "epoch": 1.3262074425969912, "grad_norm": 9.41774845123291, "learning_rate": 3.104105571847508e-05, "loss": 0.1325, "step": 3350 }, { "epoch": 1.3361045130641331, "grad_norm": 14.932723045349121, "learning_rate": 3.085777126099707e-05, "loss": 0.1727, "step": 3375 }, { "epoch": 1.3460015835312746, "grad_norm": 6.5432233810424805, "learning_rate": 3.067448680351906e-05, "loss": 0.1487, "step": 3400 }, { "epoch": 1.3558986539984166, "grad_norm": 2.8642232418060303, "learning_rate": 3.049120234604106e-05, "loss": 0.1365, "step": 3425 }, { "epoch": 1.365795724465558, "grad_norm": 5.638850688934326, "learning_rate": 3.0307917888563052e-05, "loss": 0.1628, "step": 3450 }, { "epoch": 1.3756927949327, "grad_norm": 0.21325694024562836, "learning_rate": 3.0124633431085048e-05, "loss": 0.1555, "step": 3475 }, { "epoch": 1.3855898653998415, "grad_norm": 3.200031042098999, "learning_rate": 2.994134897360704e-05, "loss": 0.1263, "step": 3500 }, { "epoch": 1.3954869358669835, "grad_norm": 10.525226593017578, "learning_rate": 2.9758064516129037e-05, "loss": 0.1657, "step": 3525 }, { "epoch": 1.405384006334125, "grad_norm": 4.1915283203125, "learning_rate": 2.957478005865103e-05, "loss": 0.1551, "step": 3550 }, { "epoch": 1.415281076801267, "grad_norm": 9.446343421936035, "learning_rate": 2.9391495601173026e-05, "loss": 0.1293, "step": 3575 }, { "epoch": 1.4251781472684084, "grad_norm": 15.011846542358398, "learning_rate": 2.9208211143695012e-05, "loss": 0.1402, "step": 3600 }, { "epoch": 1.4350752177355504, "grad_norm": 0.7313398718833923, "learning_rate": 2.902492668621701e-05, "loss": 0.1057, "step": 3625 }, { "epoch": 1.444972288202692, "grad_norm": 19.252836227416992, "learning_rate": 2.8841642228739e-05, "loss": 0.1155, "step": 3650 }, { "epoch": 1.4548693586698338, "grad_norm": 1.615921974182129, "learning_rate": 2.8658357771260997e-05, "loss": 0.1466, "step": 3675 }, { "epoch": 1.4647664291369755, "grad_norm": 2.299511432647705, "learning_rate": 2.847507331378299e-05, "loss": 0.1147, "step": 3700 }, { "epoch": 1.4746634996041172, "grad_norm": 3.294553756713867, "learning_rate": 2.8291788856304986e-05, "loss": 0.1232, "step": 3725 }, { "epoch": 1.484560570071259, "grad_norm": 0.17168129980564117, "learning_rate": 2.810850439882698e-05, "loss": 0.1586, "step": 3750 }, { "epoch": 1.4944576405384007, "grad_norm": 12.83199691772461, "learning_rate": 2.7925219941348972e-05, "loss": 0.1096, "step": 3775 }, { "epoch": 1.5043547110055424, "grad_norm": 12.708085060119629, "learning_rate": 2.7741935483870968e-05, "loss": 0.1043, "step": 3800 }, { "epoch": 1.5142517814726841, "grad_norm": 4.487904071807861, "learning_rate": 2.755865102639296e-05, "loss": 0.1116, "step": 3825 }, { "epoch": 1.5241488519398259, "grad_norm": 5.373720645904541, "learning_rate": 2.7375366568914957e-05, "loss": 0.1456, "step": 3850 }, { "epoch": 1.5340459224069676, "grad_norm": 0.20354461669921875, "learning_rate": 2.719208211143695e-05, "loss": 0.1209, "step": 3875 }, { "epoch": 1.5439429928741093, "grad_norm": 5.304108142852783, "learning_rate": 2.7008797653958946e-05, "loss": 0.1816, "step": 3900 }, { "epoch": 1.553840063341251, "grad_norm": 0.092073954641819, "learning_rate": 2.682551319648094e-05, "loss": 0.1568, "step": 3925 }, { "epoch": 1.5637371338083927, "grad_norm": 9.854479789733887, "learning_rate": 2.6642228739002935e-05, "loss": 0.1741, "step": 3950 }, { "epoch": 1.5736342042755345, "grad_norm": 0.4590989947319031, "learning_rate": 2.6458944281524928e-05, "loss": 0.1702, "step": 3975 }, { "epoch": 1.5835312747426762, "grad_norm": 0.07429279386997223, "learning_rate": 2.6275659824046924e-05, "loss": 0.0975, "step": 4000 }, { "epoch": 1.593428345209818, "grad_norm": 5.391401290893555, "learning_rate": 2.6092375366568917e-05, "loss": 0.1497, "step": 4025 }, { "epoch": 1.6033254156769596, "grad_norm": 0.18375837802886963, "learning_rate": 2.590909090909091e-05, "loss": 0.116, "step": 4050 }, { "epoch": 1.6132224861441014, "grad_norm": 13.557960510253906, "learning_rate": 2.5725806451612905e-05, "loss": 0.1927, "step": 4075 }, { "epoch": 1.623119556611243, "grad_norm": 1.467595100402832, "learning_rate": 2.5542521994134898e-05, "loss": 0.1396, "step": 4100 }, { "epoch": 1.6330166270783848, "grad_norm": 5.85172700881958, "learning_rate": 2.5359237536656894e-05, "loss": 0.1363, "step": 4125 }, { "epoch": 1.6429136975455265, "grad_norm": 1.1925976276397705, "learning_rate": 2.5175953079178887e-05, "loss": 0.1604, "step": 4150 }, { "epoch": 1.6528107680126682, "grad_norm": 8.975228309631348, "learning_rate": 2.4992668621700883e-05, "loss": 0.1534, "step": 4175 }, { "epoch": 1.66270783847981, "grad_norm": 3.5665903091430664, "learning_rate": 2.4809384164222876e-05, "loss": 0.1466, "step": 4200 }, { "epoch": 1.6726049089469517, "grad_norm": 5.208387851715088, "learning_rate": 2.462609970674487e-05, "loss": 0.101, "step": 4225 }, { "epoch": 1.6825019794140934, "grad_norm": 8.346717834472656, "learning_rate": 2.444281524926686e-05, "loss": 0.0966, "step": 4250 }, { "epoch": 1.6923990498812351, "grad_norm": 1.0066956281661987, "learning_rate": 2.4259530791788858e-05, "loss": 0.1251, "step": 4275 }, { "epoch": 1.7022961203483769, "grad_norm": 8.269057273864746, "learning_rate": 2.407624633431085e-05, "loss": 0.1595, "step": 4300 }, { "epoch": 1.7121931908155186, "grad_norm": 6.28223180770874, "learning_rate": 2.3892961876832843e-05, "loss": 0.1732, "step": 4325 }, { "epoch": 1.7220902612826603, "grad_norm": 5.962674140930176, "learning_rate": 2.370967741935484e-05, "loss": 0.1601, "step": 4350 }, { "epoch": 1.731987331749802, "grad_norm": 4.525330066680908, "learning_rate": 2.3526392961876832e-05, "loss": 0.1427, "step": 4375 }, { "epoch": 1.7418844022169437, "grad_norm": 9.384072303771973, "learning_rate": 2.334310850439883e-05, "loss": 0.1489, "step": 4400 }, { "epoch": 1.7517814726840855, "grad_norm": 16.098506927490234, "learning_rate": 2.315982404692082e-05, "loss": 0.1101, "step": 4425 }, { "epoch": 1.7616785431512272, "grad_norm": 4.9497480392456055, "learning_rate": 2.2976539589442817e-05, "loss": 0.1364, "step": 4450 }, { "epoch": 1.771575613618369, "grad_norm": 4.449967384338379, "learning_rate": 2.279325513196481e-05, "loss": 0.1799, "step": 4475 }, { "epoch": 1.7814726840855108, "grad_norm": 3.7315053939819336, "learning_rate": 2.2609970674486806e-05, "loss": 0.1342, "step": 4500 }, { "epoch": 1.7913697545526523, "grad_norm": 3.46779727935791, "learning_rate": 2.24266862170088e-05, "loss": 0.1348, "step": 4525 }, { "epoch": 1.8012668250197943, "grad_norm": 6.9282402992248535, "learning_rate": 2.2243401759530792e-05, "loss": 0.1259, "step": 4550 }, { "epoch": 1.8111638954869358, "grad_norm": 6.039886951446533, "learning_rate": 2.2060117302052788e-05, "loss": 0.1177, "step": 4575 }, { "epoch": 1.8210609659540777, "grad_norm": 8.866342544555664, "learning_rate": 2.187683284457478e-05, "loss": 0.0904, "step": 4600 }, { "epoch": 1.8309580364212192, "grad_norm": 11.28046989440918, "learning_rate": 2.1693548387096777e-05, "loss": 0.1127, "step": 4625 }, { "epoch": 1.8408551068883612, "grad_norm": 0.10267776250839233, "learning_rate": 2.1510263929618766e-05, "loss": 0.1338, "step": 4650 }, { "epoch": 1.8507521773555027, "grad_norm": 4.542361259460449, "learning_rate": 2.1326979472140763e-05, "loss": 0.1465, "step": 4675 }, { "epoch": 1.8606492478226446, "grad_norm": 6.66448974609375, "learning_rate": 2.1143695014662755e-05, "loss": 0.1373, "step": 4700 }, { "epoch": 1.8705463182897861, "grad_norm": 5.7664690017700195, "learning_rate": 2.096041055718475e-05, "loss": 0.1329, "step": 4725 }, { "epoch": 1.880443388756928, "grad_norm": 6.261977195739746, "learning_rate": 2.0777126099706744e-05, "loss": 0.0891, "step": 4750 }, { "epoch": 1.8903404592240696, "grad_norm": 0.11381009221076965, "learning_rate": 2.059384164222874e-05, "loss": 0.1099, "step": 4775 }, { "epoch": 1.9002375296912115, "grad_norm": 2.1505606174468994, "learning_rate": 2.0410557184750733e-05, "loss": 0.157, "step": 4800 }, { "epoch": 1.910134600158353, "grad_norm": 8.369518280029297, "learning_rate": 2.022727272727273e-05, "loss": 0.1188, "step": 4825 }, { "epoch": 1.920031670625495, "grad_norm": 14.388636589050293, "learning_rate": 2.0043988269794722e-05, "loss": 0.1404, "step": 4850 }, { "epoch": 1.9299287410926365, "grad_norm": 0.20757636427879333, "learning_rate": 1.9860703812316715e-05, "loss": 0.1269, "step": 4875 }, { "epoch": 1.9398258115597784, "grad_norm": 4.5515875816345215, "learning_rate": 1.967741935483871e-05, "loss": 0.0991, "step": 4900 }, { "epoch": 1.94972288202692, "grad_norm": 0.5187767744064331, "learning_rate": 1.9494134897360704e-05, "loss": 0.0744, "step": 4925 }, { "epoch": 1.9596199524940618, "grad_norm": 7.752375602722168, "learning_rate": 1.93108504398827e-05, "loss": 0.1262, "step": 4950 }, { "epoch": 1.9695170229612033, "grad_norm": 4.192614555358887, "learning_rate": 1.9127565982404693e-05, "loss": 0.1082, "step": 4975 }, { "epoch": 1.9794140934283453, "grad_norm": 12.195773124694824, "learning_rate": 1.894428152492669e-05, "loss": 0.118, "step": 5000 }, { "epoch": 1.9893111638954868, "grad_norm": 4.470797061920166, "learning_rate": 1.8760997067448682e-05, "loss": 0.1131, "step": 5025 }, { "epoch": 1.9992082343626287, "grad_norm": 2.6608407497406006, "learning_rate": 1.8577712609970678e-05, "loss": 0.1176, "step": 5050 }, { "epoch": 2.0, "eval_accuracy": 0.962, "eval_f1_macro": 0.9527593111357376, "eval_f1_micro": 0.962, "eval_f1_weighted": 0.961887560772812, "eval_loss": 0.16408780217170715, "eval_precision_macro": 0.9611804451780929, "eval_precision_micro": 0.962, "eval_precision_weighted": 0.9624429314047246, "eval_recall_macro": 0.9460711525998422, "eval_recall_micro": 0.962, "eval_recall_weighted": 0.962, "eval_runtime": 5.1023, "eval_samples_per_second": 979.953, "eval_steps_per_second": 15.483, "step": 5052 }, { "epoch": 2.0091053048297702, "grad_norm": 6.416041374206543, "learning_rate": 1.8394428152492667e-05, "loss": 0.0793, "step": 5075 }, { "epoch": 2.019002375296912, "grad_norm": 9.853547096252441, "learning_rate": 1.8211143695014664e-05, "loss": 0.0642, "step": 5100 }, { "epoch": 2.0288994457640537, "grad_norm": 9.938668251037598, "learning_rate": 1.8027859237536656e-05, "loss": 0.0778, "step": 5125 }, { "epoch": 2.0387965162311956, "grad_norm": 0.6218538880348206, "learning_rate": 1.7844574780058653e-05, "loss": 0.07, "step": 5150 }, { "epoch": 2.048693586698337, "grad_norm": 7.014169216156006, "learning_rate": 1.7661290322580645e-05, "loss": 0.0591, "step": 5175 }, { "epoch": 2.058590657165479, "grad_norm": 0.011782053858041763, "learning_rate": 1.7478005865102638e-05, "loss": 0.0638, "step": 5200 }, { "epoch": 2.0684877276326206, "grad_norm": 2.117039918899536, "learning_rate": 1.7294721407624634e-05, "loss": 0.0571, "step": 5225 }, { "epoch": 2.0783847980997625, "grad_norm": 6.522469997406006, "learning_rate": 1.7111436950146627e-05, "loss": 0.0623, "step": 5250 }, { "epoch": 2.088281868566904, "grad_norm": 0.01880364678800106, "learning_rate": 1.6928152492668623e-05, "loss": 0.0487, "step": 5275 }, { "epoch": 2.098178939034046, "grad_norm": 0.3510414958000183, "learning_rate": 1.6744868035190616e-05, "loss": 0.0678, "step": 5300 }, { "epoch": 2.1080760095011875, "grad_norm": 0.7997303009033203, "learning_rate": 1.6561583577712612e-05, "loss": 0.0633, "step": 5325 }, { "epoch": 2.1179730799683294, "grad_norm": 0.026792127639055252, "learning_rate": 1.6378299120234605e-05, "loss": 0.0861, "step": 5350 }, { "epoch": 2.127870150435471, "grad_norm": 0.4307959973812103, "learning_rate": 1.61950146627566e-05, "loss": 0.0593, "step": 5375 }, { "epoch": 2.137767220902613, "grad_norm": 0.2015238106250763, "learning_rate": 1.6011730205278594e-05, "loss": 0.0576, "step": 5400 }, { "epoch": 2.147664291369755, "grad_norm": 0.7783300876617432, "learning_rate": 1.5828445747800587e-05, "loss": 0.0734, "step": 5425 }, { "epoch": 2.1575613618368963, "grad_norm": 0.03129582852125168, "learning_rate": 1.5645161290322583e-05, "loss": 0.0778, "step": 5450 }, { "epoch": 2.167458432304038, "grad_norm": 3.1593480110168457, "learning_rate": 1.5461876832844576e-05, "loss": 0.0872, "step": 5475 }, { "epoch": 2.1773555027711797, "grad_norm": 0.04930011183023453, "learning_rate": 1.527859237536657e-05, "loss": 0.0458, "step": 5500 }, { "epoch": 2.1872525732383217, "grad_norm": 8.676435470581055, "learning_rate": 1.5095307917888563e-05, "loss": 0.0268, "step": 5525 }, { "epoch": 2.197149643705463, "grad_norm": 13.317243576049805, "learning_rate": 1.4912023460410557e-05, "loss": 0.0375, "step": 5550 }, { "epoch": 2.2070467141726047, "grad_norm": 6.073598861694336, "learning_rate": 1.4728739002932552e-05, "loss": 0.045, "step": 5575 }, { "epoch": 2.2169437846397466, "grad_norm": 1.5578258037567139, "learning_rate": 1.4545454545454545e-05, "loss": 0.0858, "step": 5600 }, { "epoch": 2.2268408551068886, "grad_norm": 1.701669692993164, "learning_rate": 1.4362170087976539e-05, "loss": 0.0505, "step": 5625 }, { "epoch": 2.23673792557403, "grad_norm": 9.207208633422852, "learning_rate": 1.4178885630498534e-05, "loss": 0.0729, "step": 5650 }, { "epoch": 2.246634996041172, "grad_norm": 0.026624349877238274, "learning_rate": 1.3995601173020528e-05, "loss": 0.0368, "step": 5675 }, { "epoch": 2.2565320665083135, "grad_norm": 0.5754280686378479, "learning_rate": 1.3812316715542523e-05, "loss": 0.0518, "step": 5700 }, { "epoch": 2.2664291369754554, "grad_norm": 0.3010414242744446, "learning_rate": 1.3629032258064517e-05, "loss": 0.0675, "step": 5725 }, { "epoch": 2.276326207442597, "grad_norm": 0.0495084747672081, "learning_rate": 1.3445747800586511e-05, "loss": 0.0861, "step": 5750 }, { "epoch": 2.286223277909739, "grad_norm": 2.4626643657684326, "learning_rate": 1.3262463343108506e-05, "loss": 0.0792, "step": 5775 }, { "epoch": 2.2961203483768804, "grad_norm": 0.9338565468788147, "learning_rate": 1.30791788856305e-05, "loss": 0.0385, "step": 5800 }, { "epoch": 2.3060174188440223, "grad_norm": 3.80712628364563, "learning_rate": 1.2895894428152493e-05, "loss": 0.0907, "step": 5825 }, { "epoch": 2.315914489311164, "grad_norm": 6.940539836883545, "learning_rate": 1.2712609970674488e-05, "loss": 0.0802, "step": 5850 }, { "epoch": 2.325811559778306, "grad_norm": 4.521027088165283, "learning_rate": 1.2529325513196482e-05, "loss": 0.0752, "step": 5875 }, { "epoch": 2.3357086302454473, "grad_norm": 1.9812321662902832, "learning_rate": 1.2346041055718475e-05, "loss": 0.082, "step": 5900 }, { "epoch": 2.3456057007125892, "grad_norm": 11.957037925720215, "learning_rate": 1.216275659824047e-05, "loss": 0.0492, "step": 5925 }, { "epoch": 2.3555027711797307, "grad_norm": 0.16896741092205048, "learning_rate": 1.1979472140762464e-05, "loss": 0.0519, "step": 5950 }, { "epoch": 2.3653998416468727, "grad_norm": 0.6698777675628662, "learning_rate": 1.1796187683284458e-05, "loss": 0.1032, "step": 5975 }, { "epoch": 2.375296912114014, "grad_norm": 13.273818969726562, "learning_rate": 1.1612903225806453e-05, "loss": 0.0844, "step": 6000 }, { "epoch": 2.385193982581156, "grad_norm": 2.729861259460449, "learning_rate": 1.1429618768328447e-05, "loss": 0.0663, "step": 6025 }, { "epoch": 2.3950910530482976, "grad_norm": 15.863778114318848, "learning_rate": 1.124633431085044e-05, "loss": 0.039, "step": 6050 }, { "epoch": 2.4049881235154396, "grad_norm": 8.72951602935791, "learning_rate": 1.1063049853372435e-05, "loss": 0.0667, "step": 6075 }, { "epoch": 2.414885193982581, "grad_norm": 0.06009228155016899, "learning_rate": 1.0879765395894429e-05, "loss": 0.0962, "step": 6100 }, { "epoch": 2.424782264449723, "grad_norm": 1.6163275241851807, "learning_rate": 1.0696480938416424e-05, "loss": 0.0765, "step": 6125 }, { "epoch": 2.4346793349168645, "grad_norm": 0.023228373378515244, "learning_rate": 1.0513196480938416e-05, "loss": 0.0786, "step": 6150 }, { "epoch": 2.4445764053840064, "grad_norm": 0.010257094167172909, "learning_rate": 1.032991202346041e-05, "loss": 0.0398, "step": 6175 }, { "epoch": 2.454473475851148, "grad_norm": 5.985715389251709, "learning_rate": 1.0146627565982405e-05, "loss": 0.0757, "step": 6200 }, { "epoch": 2.46437054631829, "grad_norm": 0.06866980344057083, "learning_rate": 9.9633431085044e-06, "loss": 0.0554, "step": 6225 }, { "epoch": 2.4742676167854314, "grad_norm": 4.456401348114014, "learning_rate": 9.780058651026392e-06, "loss": 0.0496, "step": 6250 }, { "epoch": 2.4841646872525733, "grad_norm": 17.26448631286621, "learning_rate": 9.596774193548387e-06, "loss": 0.0679, "step": 6275 }, { "epoch": 2.494061757719715, "grad_norm": 8.371593475341797, "learning_rate": 9.413489736070381e-06, "loss": 0.0745, "step": 6300 }, { "epoch": 2.5039588281868568, "grad_norm": 0.02239610068500042, "learning_rate": 9.230205278592376e-06, "loss": 0.0557, "step": 6325 }, { "epoch": 2.5138558986539983, "grad_norm": 0.046766772866249084, "learning_rate": 9.04692082111437e-06, "loss": 0.0438, "step": 6350 }, { "epoch": 2.52375296912114, "grad_norm": 8.119983673095703, "learning_rate": 8.863636363636365e-06, "loss": 0.0713, "step": 6375 }, { "epoch": 2.5336500395882817, "grad_norm": 0.0916699767112732, "learning_rate": 8.68035190615836e-06, "loss": 0.1, "step": 6400 }, { "epoch": 2.5435471100554237, "grad_norm": 6.661031246185303, "learning_rate": 8.497067448680352e-06, "loss": 0.0949, "step": 6425 }, { "epoch": 2.553444180522565, "grad_norm": 5.773529529571533, "learning_rate": 8.313782991202347e-06, "loss": 0.0834, "step": 6450 }, { "epoch": 2.563341250989707, "grad_norm": 19.465478897094727, "learning_rate": 8.13049853372434e-06, "loss": 0.0927, "step": 6475 }, { "epoch": 2.5732383214568486, "grad_norm": 9.270087242126465, "learning_rate": 7.947214076246334e-06, "loss": 0.0591, "step": 6500 }, { "epoch": 2.5831353919239906, "grad_norm": 1.235212802886963, "learning_rate": 7.763929618768328e-06, "loss": 0.1145, "step": 6525 }, { "epoch": 2.5930324623911325, "grad_norm": 0.10507909208536148, "learning_rate": 7.580645161290323e-06, "loss": 0.0928, "step": 6550 }, { "epoch": 2.602929532858274, "grad_norm": 1.12295663356781, "learning_rate": 7.397360703812317e-06, "loss": 0.0602, "step": 6575 }, { "epoch": 2.6128266033254155, "grad_norm": 8.503447532653809, "learning_rate": 7.214076246334312e-06, "loss": 0.077, "step": 6600 }, { "epoch": 2.6227236737925574, "grad_norm": 0.9171582460403442, "learning_rate": 7.030791788856305e-06, "loss": 0.048, "step": 6625 }, { "epoch": 2.6326207442596994, "grad_norm": 0.28691366314888, "learning_rate": 6.8475073313783e-06, "loss": 0.0548, "step": 6650 }, { "epoch": 2.642517814726841, "grad_norm": 2.4092676639556885, "learning_rate": 6.664222873900293e-06, "loss": 0.0654, "step": 6675 }, { "epoch": 2.6524148851939824, "grad_norm": 0.248480886220932, "learning_rate": 6.480938416422287e-06, "loss": 0.0776, "step": 6700 }, { "epoch": 2.6623119556611243, "grad_norm": 1.0501718521118164, "learning_rate": 6.2976539589442816e-06, "loss": 0.0749, "step": 6725 }, { "epoch": 2.6722090261282663, "grad_norm": 0.16996045410633087, "learning_rate": 6.114369501466276e-06, "loss": 0.0448, "step": 6750 }, { "epoch": 2.6821060965954078, "grad_norm": 0.015705592930316925, "learning_rate": 5.93108504398827e-06, "loss": 0.043, "step": 6775 }, { "epoch": 2.6920031670625493, "grad_norm": 13.008760452270508, "learning_rate": 5.747800586510264e-06, "loss": 0.045, "step": 6800 }, { "epoch": 2.701900237529691, "grad_norm": 0.49796026945114136, "learning_rate": 5.564516129032259e-06, "loss": 0.0387, "step": 6825 }, { "epoch": 2.711797307996833, "grad_norm": 6.566326141357422, "learning_rate": 5.381231671554252e-06, "loss": 0.0412, "step": 6850 }, { "epoch": 2.7216943784639747, "grad_norm": 0.014399710111320019, "learning_rate": 5.197947214076247e-06, "loss": 0.0596, "step": 6875 }, { "epoch": 2.731591448931116, "grad_norm": 0.3872062563896179, "learning_rate": 5.014662756598241e-06, "loss": 0.0523, "step": 6900 }, { "epoch": 2.741488519398258, "grad_norm": 0.1425359696149826, "learning_rate": 4.831378299120235e-06, "loss": 0.0461, "step": 6925 }, { "epoch": 2.7513855898654, "grad_norm": 0.4896790683269501, "learning_rate": 4.6480938416422284e-06, "loss": 0.0616, "step": 6950 }, { "epoch": 2.7612826603325415, "grad_norm": 6.286714553833008, "learning_rate": 4.464809384164223e-06, "loss": 0.0896, "step": 6975 }, { "epoch": 2.771179730799683, "grad_norm": 0.058547962456941605, "learning_rate": 4.281524926686217e-06, "loss": 0.0648, "step": 7000 }, { "epoch": 2.781076801266825, "grad_norm": 0.3147684335708618, "learning_rate": 4.098240469208212e-06, "loss": 0.0701, "step": 7025 }, { "epoch": 2.790973871733967, "grad_norm": 2.779256582260132, "learning_rate": 3.9149560117302055e-06, "loss": 0.0352, "step": 7050 }, { "epoch": 2.8008709422011084, "grad_norm": 0.26303109526634216, "learning_rate": 3.7316715542521995e-06, "loss": 0.0663, "step": 7075 }, { "epoch": 2.81076801266825, "grad_norm": 11.363053321838379, "learning_rate": 3.5483870967741936e-06, "loss": 0.0883, "step": 7100 }, { "epoch": 2.820665083135392, "grad_norm": 0.36071789264678955, "learning_rate": 3.365102639296188e-06, "loss": 0.0762, "step": 7125 }, { "epoch": 2.830562153602534, "grad_norm": 0.014336947351694107, "learning_rate": 3.1818181818181817e-06, "loss": 0.0433, "step": 7150 }, { "epoch": 2.8404592240696753, "grad_norm": 8.244430541992188, "learning_rate": 2.9985337243401757e-06, "loss": 0.0436, "step": 7175 }, { "epoch": 2.850356294536817, "grad_norm": 0.018452562391757965, "learning_rate": 2.8152492668621702e-06, "loss": 0.0478, "step": 7200 }, { "epoch": 2.8602533650039588, "grad_norm": 0.9135558605194092, "learning_rate": 2.6319648093841647e-06, "loss": 0.0582, "step": 7225 }, { "epoch": 2.8701504354711007, "grad_norm": 0.03040502220392227, "learning_rate": 2.4486803519061583e-06, "loss": 0.0353, "step": 7250 }, { "epoch": 2.880047505938242, "grad_norm": 0.026754941791296005, "learning_rate": 2.265395894428153e-06, "loss": 0.0769, "step": 7275 }, { "epoch": 2.889944576405384, "grad_norm": 0.09019900858402252, "learning_rate": 2.082111436950147e-06, "loss": 0.0506, "step": 7300 }, { "epoch": 2.8998416468725257, "grad_norm": 0.04084889218211174, "learning_rate": 1.8988269794721409e-06, "loss": 0.0411, "step": 7325 }, { "epoch": 2.9097387173396676, "grad_norm": 2.3881072998046875, "learning_rate": 1.715542521994135e-06, "loss": 0.0549, "step": 7350 }, { "epoch": 2.919635787806809, "grad_norm": 7.836863040924072, "learning_rate": 1.532258064516129e-06, "loss": 0.0514, "step": 7375 }, { "epoch": 2.929532858273951, "grad_norm": 1.4827078580856323, "learning_rate": 1.3489736070381233e-06, "loss": 0.0608, "step": 7400 }, { "epoch": 2.9394299287410925, "grad_norm": 0.012712684459984303, "learning_rate": 1.1656891495601175e-06, "loss": 0.0542, "step": 7425 }, { "epoch": 2.9493269992082345, "grad_norm": 6.55382776260376, "learning_rate": 9.824046920821116e-07, "loss": 0.0518, "step": 7450 }, { "epoch": 2.959224069675376, "grad_norm": 0.21079152822494507, "learning_rate": 7.991202346041056e-07, "loss": 0.0618, "step": 7475 }, { "epoch": 2.969121140142518, "grad_norm": 4.107754707336426, "learning_rate": 6.158357771260998e-07, "loss": 0.0368, "step": 7500 }, { "epoch": 2.9790182106096594, "grad_norm": 0.4540683925151825, "learning_rate": 4.325513196480939e-07, "loss": 0.0595, "step": 7525 }, { "epoch": 2.9889152810768014, "grad_norm": 0.416092187166214, "learning_rate": 2.4926686217008803e-07, "loss": 0.0626, "step": 7550 }, { "epoch": 2.998812351543943, "grad_norm": 3.699631452560425, "learning_rate": 6.598240469208211e-08, "loss": 0.0729, "step": 7575 }, { "epoch": 3.0, "eval_accuracy": 0.9694, "eval_f1_macro": 0.9613886456444749, "eval_f1_micro": 0.9694, "eval_f1_weighted": 0.9693030681223207, "eval_loss": 0.157407745718956, "eval_precision_macro": 0.9679892485977634, "eval_precision_micro": 0.9694, "eval_precision_weighted": 0.9695713537396466, "eval_recall_macro": 0.9560667596679707, "eval_recall_micro": 0.9694, "eval_recall_weighted": 0.9694, "eval_runtime": 5.0753, "eval_samples_per_second": 985.172, "eval_steps_per_second": 15.566, "step": 7578 } ], "logging_steps": 25, "max_steps": 7578, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.381368787756646e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }