| { | |
| "best_metric": 0.157407745718956, | |
| "best_model_checkpoint": "hlbooks-topic-classifier-bert-multilingual-uncased/checkpoint-7578", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 7578, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009897070467141725, | |
| "grad_norm": 3.174437999725342, | |
| "learning_rate": 1.5171503957783642e-06, | |
| "loss": 3.0686, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01979414093428345, | |
| "grad_norm": 5.321998119354248, | |
| "learning_rate": 3.1002638522427443e-06, | |
| "loss": 2.9657, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029691211401425176, | |
| "grad_norm": 7.038234710693359, | |
| "learning_rate": 4.617414248021108e-06, | |
| "loss": 2.8352, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0395882818685669, | |
| "grad_norm": 6.537544250488281, | |
| "learning_rate": 6.200527704485489e-06, | |
| "loss": 2.738, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04948535233570863, | |
| "grad_norm": 6.735942840576172, | |
| "learning_rate": 7.849604221635884e-06, | |
| "loss": 2.6217, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05938242280285035, | |
| "grad_norm": 5.756272792816162, | |
| "learning_rate": 9.49868073878628e-06, | |
| "loss": 2.4216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06927949326999208, | |
| "grad_norm": 5.069972991943359, | |
| "learning_rate": 1.1147757255936676e-05, | |
| "loss": 2.2817, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0791765637371338, | |
| "grad_norm": 8.664924621582031, | |
| "learning_rate": 1.2796833773087072e-05, | |
| "loss": 2.1123, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08907363420427554, | |
| "grad_norm": 9.564645767211914, | |
| "learning_rate": 1.4445910290237468e-05, | |
| "loss": 1.8298, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.09897070467141726, | |
| "grad_norm": 18.612655639648438, | |
| "learning_rate": 1.6094986807387864e-05, | |
| "loss": 1.4781, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10886777513855898, | |
| "grad_norm": 7.96259069442749, | |
| "learning_rate": 1.774406332453826e-05, | |
| "loss": 1.176, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1187648456057007, | |
| "grad_norm": 9.508382797241211, | |
| "learning_rate": 1.9393139841688653e-05, | |
| "loss": 1.038, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12866191607284244, | |
| "grad_norm": 9.386768341064453, | |
| "learning_rate": 2.104221635883905e-05, | |
| "loss": 0.9101, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.13855898653998416, | |
| "grad_norm": 8.491036415100098, | |
| "learning_rate": 2.269129287598945e-05, | |
| "loss": 0.8534, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14845605700712589, | |
| "grad_norm": 13.790663719177246, | |
| "learning_rate": 2.4340369393139843e-05, | |
| "loss": 0.6119, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1583531274742676, | |
| "grad_norm": 14.771766662597656, | |
| "learning_rate": 2.5989445910290237e-05, | |
| "loss": 0.6075, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16825019794140933, | |
| "grad_norm": 5.174952030181885, | |
| "learning_rate": 2.763852242744063e-05, | |
| "loss": 0.6037, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.17814726840855108, | |
| "grad_norm": 12.023221015930176, | |
| "learning_rate": 2.9287598944591033e-05, | |
| "loss": 0.5757, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1880443388756928, | |
| "grad_norm": 6.669355869293213, | |
| "learning_rate": 3.093667546174143e-05, | |
| "loss": 0.5343, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.19794140934283452, | |
| "grad_norm": 11.855779647827148, | |
| "learning_rate": 3.258575197889182e-05, | |
| "loss": 0.5008, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20783847980997625, | |
| "grad_norm": 3.5531342029571533, | |
| "learning_rate": 3.423482849604222e-05, | |
| "loss": 0.4706, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.21773555027711797, | |
| "grad_norm": 26.04022216796875, | |
| "learning_rate": 3.588390501319262e-05, | |
| "loss": 0.483, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2276326207442597, | |
| "grad_norm": 7.481447696685791, | |
| "learning_rate": 3.753298153034301e-05, | |
| "loss": 0.4332, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.2375296912114014, | |
| "grad_norm": 18.177900314331055, | |
| "learning_rate": 3.9182058047493406e-05, | |
| "loss": 0.444, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24742676167854316, | |
| "grad_norm": 10.481646537780762, | |
| "learning_rate": 4.08311345646438e-05, | |
| "loss": 0.3346, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2573238321456849, | |
| "grad_norm": 15.196333885192871, | |
| "learning_rate": 4.2480211081794194e-05, | |
| "loss": 0.3647, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2672209026128266, | |
| "grad_norm": 8.303031921386719, | |
| "learning_rate": 4.412928759894459e-05, | |
| "loss": 0.4139, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.2771179730799683, | |
| "grad_norm": 9.322664260864258, | |
| "learning_rate": 4.577836411609499e-05, | |
| "loss": 0.4656, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.28701504354711005, | |
| "grad_norm": 7.468392848968506, | |
| "learning_rate": 4.7427440633245384e-05, | |
| "loss": 0.3996, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.29691211401425177, | |
| "grad_norm": 7.190126895904541, | |
| "learning_rate": 4.907651715039578e-05, | |
| "loss": 0.3771, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3068091844813935, | |
| "grad_norm": 8.512736320495605, | |
| "learning_rate": 4.991935483870968e-05, | |
| "loss": 0.4247, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.3167062549485352, | |
| "grad_norm": 15.00452995300293, | |
| "learning_rate": 4.973607038123168e-05, | |
| "loss": 0.3576, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.32660332541567694, | |
| "grad_norm": 19.776445388793945, | |
| "learning_rate": 4.955278592375367e-05, | |
| "loss": 0.347, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.33650039588281866, | |
| "grad_norm": 9.140093803405762, | |
| "learning_rate": 4.9369501466275664e-05, | |
| "loss": 0.3799, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.34639746634996044, | |
| "grad_norm": 12.198457717895508, | |
| "learning_rate": 4.918621700879766e-05, | |
| "loss": 0.4086, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.35629453681710216, | |
| "grad_norm": 3.6618921756744385, | |
| "learning_rate": 4.900293255131965e-05, | |
| "loss": 0.3457, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3661916072842439, | |
| "grad_norm": 17.983041763305664, | |
| "learning_rate": 4.881964809384165e-05, | |
| "loss": 0.364, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.3760886777513856, | |
| "grad_norm": 0.4606013000011444, | |
| "learning_rate": 4.863636363636364e-05, | |
| "loss": 0.2662, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3859857482185273, | |
| "grad_norm": 5.343178749084473, | |
| "learning_rate": 4.8453079178885635e-05, | |
| "loss": 0.3289, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.39588281868566905, | |
| "grad_norm": 11.04477596282959, | |
| "learning_rate": 4.826979472140763e-05, | |
| "loss": 0.3723, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.40577988915281077, | |
| "grad_norm": 8.415637016296387, | |
| "learning_rate": 4.808651026392962e-05, | |
| "loss": 0.29, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.4156769596199525, | |
| "grad_norm": 37.04660415649414, | |
| "learning_rate": 4.790322580645161e-05, | |
| "loss": 0.4181, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4255740300870942, | |
| "grad_norm": 11.579482078552246, | |
| "learning_rate": 4.7719941348973606e-05, | |
| "loss": 0.2785, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.43547110055423593, | |
| "grad_norm": 8.857477188110352, | |
| "learning_rate": 4.75366568914956e-05, | |
| "loss": 0.3266, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.44536817102137766, | |
| "grad_norm": 5.317532539367676, | |
| "learning_rate": 4.73533724340176e-05, | |
| "loss": 0.344, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.4552652414885194, | |
| "grad_norm": 4.178307056427002, | |
| "learning_rate": 4.717008797653959e-05, | |
| "loss": 0.3613, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4651623119556611, | |
| "grad_norm": 8.49862003326416, | |
| "learning_rate": 4.6986803519061584e-05, | |
| "loss": 0.2527, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.4750593824228028, | |
| "grad_norm": 0.5914684534072876, | |
| "learning_rate": 4.6803519061583577e-05, | |
| "loss": 0.258, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4849564528899446, | |
| "grad_norm": 3.90535306930542, | |
| "learning_rate": 4.662023460410557e-05, | |
| "loss": 0.3091, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.4948535233570863, | |
| "grad_norm": 7.211574077606201, | |
| "learning_rate": 4.643695014662757e-05, | |
| "loss": 0.3302, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.504750593824228, | |
| "grad_norm": 9.782793045043945, | |
| "learning_rate": 4.625366568914956e-05, | |
| "loss": 0.3487, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.5146476642913698, | |
| "grad_norm": 9.339737892150879, | |
| "learning_rate": 4.6070381231671554e-05, | |
| "loss": 0.2164, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5245447347585115, | |
| "grad_norm": 3.444295644760132, | |
| "learning_rate": 4.588709677419355e-05, | |
| "loss": 0.2874, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.5344418052256532, | |
| "grad_norm": 15.61107063293457, | |
| "learning_rate": 4.570381231671555e-05, | |
| "loss": 0.3161, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5443388756927949, | |
| "grad_norm": 12.160496711730957, | |
| "learning_rate": 4.552052785923754e-05, | |
| "loss": 0.2242, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.5542359461599367, | |
| "grad_norm": 7.081170558929443, | |
| "learning_rate": 4.533724340175953e-05, | |
| "loss": 0.2625, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5641330166270784, | |
| "grad_norm": 2.664806365966797, | |
| "learning_rate": 4.5153958944281525e-05, | |
| "loss": 0.2885, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.5740300870942201, | |
| "grad_norm": 5.956775188446045, | |
| "learning_rate": 4.497067448680352e-05, | |
| "loss": 0.3064, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5839271575613618, | |
| "grad_norm": 10.270496368408203, | |
| "learning_rate": 4.478739002932552e-05, | |
| "loss": 0.2519, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.5938242280285035, | |
| "grad_norm": 11.354063987731934, | |
| "learning_rate": 4.460410557184751e-05, | |
| "loss": 0.2874, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6037212984956453, | |
| "grad_norm": 4.400442600250244, | |
| "learning_rate": 4.44208211143695e-05, | |
| "loss": 0.248, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.613618368962787, | |
| "grad_norm": 5.145227909088135, | |
| "learning_rate": 4.4237536656891496e-05, | |
| "loss": 0.2757, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6235154394299287, | |
| "grad_norm": 6.018128395080566, | |
| "learning_rate": 4.4054252199413495e-05, | |
| "loss": 0.2929, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.6334125098970704, | |
| "grad_norm": 11.10319709777832, | |
| "learning_rate": 4.387096774193549e-05, | |
| "loss": 0.2645, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6433095803642122, | |
| "grad_norm": 4.846808433532715, | |
| "learning_rate": 4.368768328445748e-05, | |
| "loss": 0.1884, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.6532066508313539, | |
| "grad_norm": 20.15575408935547, | |
| "learning_rate": 4.3504398826979474e-05, | |
| "loss": 0.2996, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6631037212984956, | |
| "grad_norm": 15.41659164428711, | |
| "learning_rate": 4.3321114369501466e-05, | |
| "loss": 0.2613, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.6730007917656373, | |
| "grad_norm": 9.153544425964355, | |
| "learning_rate": 4.3137829912023466e-05, | |
| "loss": 0.2867, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6828978622327792, | |
| "grad_norm": 6.918684482574463, | |
| "learning_rate": 4.295454545454546e-05, | |
| "loss": 0.1998, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.6927949326999209, | |
| "grad_norm": 3.967953681945801, | |
| "learning_rate": 4.277126099706745e-05, | |
| "loss": 0.2632, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7026920031670626, | |
| "grad_norm": 6.128458023071289, | |
| "learning_rate": 4.2587976539589444e-05, | |
| "loss": 0.2471, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.7125890736342043, | |
| "grad_norm": 3.5776052474975586, | |
| "learning_rate": 4.2404692082111444e-05, | |
| "loss": 0.2609, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.722486144101346, | |
| "grad_norm": 8.483747482299805, | |
| "learning_rate": 4.222140762463344e-05, | |
| "loss": 0.2468, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.7323832145684878, | |
| "grad_norm": 3.365809679031372, | |
| "learning_rate": 4.203812316715543e-05, | |
| "loss": 0.2166, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7422802850356295, | |
| "grad_norm": 6.7934489250183105, | |
| "learning_rate": 4.1854838709677415e-05, | |
| "loss": 0.207, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.7521773555027712, | |
| "grad_norm": 10.689802169799805, | |
| "learning_rate": 4.1671554252199415e-05, | |
| "loss": 0.2137, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7620744259699129, | |
| "grad_norm": 8.500500679016113, | |
| "learning_rate": 4.148826979472141e-05, | |
| "loss": 0.2068, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.7719714964370546, | |
| "grad_norm": 3.0795400142669678, | |
| "learning_rate": 4.13049853372434e-05, | |
| "loss": 0.2335, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7818685669041964, | |
| "grad_norm": 6.6960768699646, | |
| "learning_rate": 4.112170087976539e-05, | |
| "loss": 0.3198, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.7917656373713381, | |
| "grad_norm": 9.185827255249023, | |
| "learning_rate": 4.093841642228739e-05, | |
| "loss": 0.2917, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8016627078384798, | |
| "grad_norm": 10.589933395385742, | |
| "learning_rate": 4.0755131964809386e-05, | |
| "loss": 0.2764, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.8115597783056215, | |
| "grad_norm": 4.64451789855957, | |
| "learning_rate": 4.057184750733138e-05, | |
| "loss": 0.2737, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8214568487727633, | |
| "grad_norm": 17.72431182861328, | |
| "learning_rate": 4.038856304985337e-05, | |
| "loss": 0.2484, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.831353919239905, | |
| "grad_norm": 10.842966079711914, | |
| "learning_rate": 4.0205278592375364e-05, | |
| "loss": 0.2172, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8412509897070467, | |
| "grad_norm": 4.673035621643066, | |
| "learning_rate": 4.0021994134897364e-05, | |
| "loss": 0.2094, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.8511480601741884, | |
| "grad_norm": 7.97542142868042, | |
| "learning_rate": 3.9838709677419356e-05, | |
| "loss": 0.2674, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8610451306413301, | |
| "grad_norm": 3.362852096557617, | |
| "learning_rate": 3.965542521994135e-05, | |
| "loss": 0.2136, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.8709422011084719, | |
| "grad_norm": 8.911001205444336, | |
| "learning_rate": 3.947214076246334e-05, | |
| "loss": 0.1915, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8808392715756136, | |
| "grad_norm": 8.764837265014648, | |
| "learning_rate": 3.928885630498534e-05, | |
| "loss": 0.1691, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.8907363420427553, | |
| "grad_norm": 9.035571098327637, | |
| "learning_rate": 3.9105571847507334e-05, | |
| "loss": 0.2329, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.900633412509897, | |
| "grad_norm": 3.9743757247924805, | |
| "learning_rate": 3.892228739002933e-05, | |
| "loss": 0.2151, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.9105304829770388, | |
| "grad_norm": 4.488095283508301, | |
| "learning_rate": 3.873900293255132e-05, | |
| "loss": 0.2525, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9204275534441805, | |
| "grad_norm": 7.458625316619873, | |
| "learning_rate": 3.855571847507331e-05, | |
| "loss": 0.225, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.9303246239113222, | |
| "grad_norm": 12.148482322692871, | |
| "learning_rate": 3.837243401759531e-05, | |
| "loss": 0.2642, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9402216943784639, | |
| "grad_norm": 5.104764461517334, | |
| "learning_rate": 3.8189149560117305e-05, | |
| "loss": 0.2268, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.9501187648456056, | |
| "grad_norm": 4.338552951812744, | |
| "learning_rate": 3.80058651026393e-05, | |
| "loss": 0.217, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9600158353127475, | |
| "grad_norm": 4.058621406555176, | |
| "learning_rate": 3.782258064516129e-05, | |
| "loss": 0.2058, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.9699129057798892, | |
| "grad_norm": 4.036930084228516, | |
| "learning_rate": 3.763929618768329e-05, | |
| "loss": 0.2721, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9798099762470309, | |
| "grad_norm": 5.858448028564453, | |
| "learning_rate": 3.745601173020528e-05, | |
| "loss": 0.2379, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.9897070467141726, | |
| "grad_norm": 10.197368621826172, | |
| "learning_rate": 3.7272727272727276e-05, | |
| "loss": 0.2552, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9996041171813144, | |
| "grad_norm": 3.942063093185425, | |
| "learning_rate": 3.708944281524927e-05, | |
| "loss": 0.1938, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9472, | |
| "eval_f1_macro": 0.9359430887114246, | |
| "eval_f1_micro": 0.9472, | |
| "eval_f1_weighted": 0.947114042501874, | |
| "eval_loss": 0.20199425518512726, | |
| "eval_precision_macro": 0.9529085930911292, | |
| "eval_precision_micro": 0.9472, | |
| "eval_precision_weighted": 0.948726542737522, | |
| "eval_recall_macro": 0.9217770604016604, | |
| "eval_recall_micro": 0.9472, | |
| "eval_recall_weighted": 0.9472, | |
| "eval_runtime": 5.1028, | |
| "eval_samples_per_second": 979.852, | |
| "eval_steps_per_second": 15.482, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 1.009501187648456, | |
| "grad_norm": 2.8100507259368896, | |
| "learning_rate": 3.690615835777126e-05, | |
| "loss": 0.1958, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.0193982581155978, | |
| "grad_norm": 4.251704692840576, | |
| "learning_rate": 3.672287390029326e-05, | |
| "loss": 0.1684, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.0292953285827395, | |
| "grad_norm": 13.66088581085205, | |
| "learning_rate": 3.6539589442815254e-05, | |
| "loss": 0.1564, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.0391923990498813, | |
| "grad_norm": 6.338856220245361, | |
| "learning_rate": 3.6356304985337246e-05, | |
| "loss": 0.1673, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.049089469517023, | |
| "grad_norm": 0.7401424646377563, | |
| "learning_rate": 3.617302052785924e-05, | |
| "loss": 0.1579, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.0589865399841647, | |
| "grad_norm": 7.7578325271606445, | |
| "learning_rate": 3.598973607038124e-05, | |
| "loss": 0.1639, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.0688836104513064, | |
| "grad_norm": 10.281119346618652, | |
| "learning_rate": 3.580645161290323e-05, | |
| "loss": 0.1367, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0787806809184481, | |
| "grad_norm": 9.214536666870117, | |
| "learning_rate": 3.562316715542522e-05, | |
| "loss": 0.1913, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.0886777513855899, | |
| "grad_norm": 7.096231937408447, | |
| "learning_rate": 3.543988269794721e-05, | |
| "loss": 0.1718, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.0985748218527316, | |
| "grad_norm": 1.1231356859207153, | |
| "learning_rate": 3.525659824046921e-05, | |
| "loss": 0.1623, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.1084718923198733, | |
| "grad_norm": 9.105703353881836, | |
| "learning_rate": 3.50733137829912e-05, | |
| "loss": 0.1516, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.118368962787015, | |
| "grad_norm": 2.2311670780181885, | |
| "learning_rate": 3.4890029325513195e-05, | |
| "loss": 0.1094, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.1282660332541568, | |
| "grad_norm": 5.654956817626953, | |
| "learning_rate": 3.470674486803519e-05, | |
| "loss": 0.2151, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.1381631037212985, | |
| "grad_norm": 0.6430861353874207, | |
| "learning_rate": 3.452346041055719e-05, | |
| "loss": 0.1211, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.1480601741884402, | |
| "grad_norm": 4.405457496643066, | |
| "learning_rate": 3.434017595307918e-05, | |
| "loss": 0.1062, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.157957244655582, | |
| "grad_norm": 3.2275402545928955, | |
| "learning_rate": 3.415689149560117e-05, | |
| "loss": 0.1378, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.1678543151227236, | |
| "grad_norm": 3.635753870010376, | |
| "learning_rate": 3.3973607038123166e-05, | |
| "loss": 0.1489, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.1777513855898654, | |
| "grad_norm": 2.695546865463257, | |
| "learning_rate": 3.379032258064516e-05, | |
| "loss": 0.1703, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.187648456057007, | |
| "grad_norm": 1.3425699472427368, | |
| "learning_rate": 3.360703812316716e-05, | |
| "loss": 0.1421, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.1975455265241488, | |
| "grad_norm": 11.01319408416748, | |
| "learning_rate": 3.342375366568915e-05, | |
| "loss": 0.1221, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.2074425969912905, | |
| "grad_norm": 12.686071395874023, | |
| "learning_rate": 3.3240469208211144e-05, | |
| "loss": 0.1699, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.2173396674584323, | |
| "grad_norm": 8.48775577545166, | |
| "learning_rate": 3.305718475073314e-05, | |
| "loss": 0.1505, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.227236737925574, | |
| "grad_norm": 5.593795299530029, | |
| "learning_rate": 3.2873900293255136e-05, | |
| "loss": 0.0906, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2371338083927157, | |
| "grad_norm": 13.118975639343262, | |
| "learning_rate": 3.269061583577713e-05, | |
| "loss": 0.1389, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.2470308788598574, | |
| "grad_norm": 10.423203468322754, | |
| "learning_rate": 3.250733137829912e-05, | |
| "loss": 0.1789, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.2569279493269991, | |
| "grad_norm": 8.423437118530273, | |
| "learning_rate": 3.2324046920821115e-05, | |
| "loss": 0.1683, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.2668250197941409, | |
| "grad_norm": 1.197938323020935, | |
| "learning_rate": 3.214076246334311e-05, | |
| "loss": 0.1356, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2767220902612826, | |
| "grad_norm": 4.034856796264648, | |
| "learning_rate": 3.195747800586511e-05, | |
| "loss": 0.2099, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.2866191607284243, | |
| "grad_norm": 6.324892997741699, | |
| "learning_rate": 3.17741935483871e-05, | |
| "loss": 0.1658, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.2965162311955662, | |
| "grad_norm": 7.48460054397583, | |
| "learning_rate": 3.159090909090909e-05, | |
| "loss": 0.131, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.3064133016627077, | |
| "grad_norm": 21.121984481811523, | |
| "learning_rate": 3.1407624633431085e-05, | |
| "loss": 0.1781, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.3163103721298497, | |
| "grad_norm": 3.5356669425964355, | |
| "learning_rate": 3.1224340175953085e-05, | |
| "loss": 0.1873, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.3262074425969912, | |
| "grad_norm": 9.41774845123291, | |
| "learning_rate": 3.104105571847508e-05, | |
| "loss": 0.1325, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.3361045130641331, | |
| "grad_norm": 14.932723045349121, | |
| "learning_rate": 3.085777126099707e-05, | |
| "loss": 0.1727, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.3460015835312746, | |
| "grad_norm": 6.5432233810424805, | |
| "learning_rate": 3.067448680351906e-05, | |
| "loss": 0.1487, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.3558986539984166, | |
| "grad_norm": 2.8642232418060303, | |
| "learning_rate": 3.049120234604106e-05, | |
| "loss": 0.1365, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.365795724465558, | |
| "grad_norm": 5.638850688934326, | |
| "learning_rate": 3.0307917888563052e-05, | |
| "loss": 0.1628, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.3756927949327, | |
| "grad_norm": 0.21325694024562836, | |
| "learning_rate": 3.0124633431085048e-05, | |
| "loss": 0.1555, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.3855898653998415, | |
| "grad_norm": 3.200031042098999, | |
| "learning_rate": 2.994134897360704e-05, | |
| "loss": 0.1263, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.3954869358669835, | |
| "grad_norm": 10.525226593017578, | |
| "learning_rate": 2.9758064516129037e-05, | |
| "loss": 0.1657, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.405384006334125, | |
| "grad_norm": 4.1915283203125, | |
| "learning_rate": 2.957478005865103e-05, | |
| "loss": 0.1551, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.415281076801267, | |
| "grad_norm": 9.446343421936035, | |
| "learning_rate": 2.9391495601173026e-05, | |
| "loss": 0.1293, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.4251781472684084, | |
| "grad_norm": 15.011846542358398, | |
| "learning_rate": 2.9208211143695012e-05, | |
| "loss": 0.1402, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.4350752177355504, | |
| "grad_norm": 0.7313398718833923, | |
| "learning_rate": 2.902492668621701e-05, | |
| "loss": 0.1057, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.444972288202692, | |
| "grad_norm": 19.252836227416992, | |
| "learning_rate": 2.8841642228739e-05, | |
| "loss": 0.1155, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.4548693586698338, | |
| "grad_norm": 1.615921974182129, | |
| "learning_rate": 2.8658357771260997e-05, | |
| "loss": 0.1466, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.4647664291369755, | |
| "grad_norm": 2.299511432647705, | |
| "learning_rate": 2.847507331378299e-05, | |
| "loss": 0.1147, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.4746634996041172, | |
| "grad_norm": 3.294553756713867, | |
| "learning_rate": 2.8291788856304986e-05, | |
| "loss": 0.1232, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.484560570071259, | |
| "grad_norm": 0.17168129980564117, | |
| "learning_rate": 2.810850439882698e-05, | |
| "loss": 0.1586, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.4944576405384007, | |
| "grad_norm": 12.83199691772461, | |
| "learning_rate": 2.7925219941348972e-05, | |
| "loss": 0.1096, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.5043547110055424, | |
| "grad_norm": 12.708085060119629, | |
| "learning_rate": 2.7741935483870968e-05, | |
| "loss": 0.1043, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5142517814726841, | |
| "grad_norm": 4.487904071807861, | |
| "learning_rate": 2.755865102639296e-05, | |
| "loss": 0.1116, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.5241488519398259, | |
| "grad_norm": 5.373720645904541, | |
| "learning_rate": 2.7375366568914957e-05, | |
| "loss": 0.1456, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.5340459224069676, | |
| "grad_norm": 0.20354461669921875, | |
| "learning_rate": 2.719208211143695e-05, | |
| "loss": 0.1209, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.5439429928741093, | |
| "grad_norm": 5.304108142852783, | |
| "learning_rate": 2.7008797653958946e-05, | |
| "loss": 0.1816, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.553840063341251, | |
| "grad_norm": 0.092073954641819, | |
| "learning_rate": 2.682551319648094e-05, | |
| "loss": 0.1568, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.5637371338083927, | |
| "grad_norm": 9.854479789733887, | |
| "learning_rate": 2.6642228739002935e-05, | |
| "loss": 0.1741, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.5736342042755345, | |
| "grad_norm": 0.4590989947319031, | |
| "learning_rate": 2.6458944281524928e-05, | |
| "loss": 0.1702, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.5835312747426762, | |
| "grad_norm": 0.07429279386997223, | |
| "learning_rate": 2.6275659824046924e-05, | |
| "loss": 0.0975, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.593428345209818, | |
| "grad_norm": 5.391401290893555, | |
| "learning_rate": 2.6092375366568917e-05, | |
| "loss": 0.1497, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.6033254156769596, | |
| "grad_norm": 0.18375837802886963, | |
| "learning_rate": 2.590909090909091e-05, | |
| "loss": 0.116, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.6132224861441014, | |
| "grad_norm": 13.557960510253906, | |
| "learning_rate": 2.5725806451612905e-05, | |
| "loss": 0.1927, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.623119556611243, | |
| "grad_norm": 1.467595100402832, | |
| "learning_rate": 2.5542521994134898e-05, | |
| "loss": 0.1396, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.6330166270783848, | |
| "grad_norm": 5.85172700881958, | |
| "learning_rate": 2.5359237536656894e-05, | |
| "loss": 0.1363, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.6429136975455265, | |
| "grad_norm": 1.1925976276397705, | |
| "learning_rate": 2.5175953079178887e-05, | |
| "loss": 0.1604, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.6528107680126682, | |
| "grad_norm": 8.975228309631348, | |
| "learning_rate": 2.4992668621700883e-05, | |
| "loss": 0.1534, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.66270783847981, | |
| "grad_norm": 3.5665903091430664, | |
| "learning_rate": 2.4809384164222876e-05, | |
| "loss": 0.1466, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.6726049089469517, | |
| "grad_norm": 5.208387851715088, | |
| "learning_rate": 2.462609970674487e-05, | |
| "loss": 0.101, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.6825019794140934, | |
| "grad_norm": 8.346717834472656, | |
| "learning_rate": 2.444281524926686e-05, | |
| "loss": 0.0966, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.6923990498812351, | |
| "grad_norm": 1.0066956281661987, | |
| "learning_rate": 2.4259530791788858e-05, | |
| "loss": 0.1251, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.7022961203483769, | |
| "grad_norm": 8.269057273864746, | |
| "learning_rate": 2.407624633431085e-05, | |
| "loss": 0.1595, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.7121931908155186, | |
| "grad_norm": 6.28223180770874, | |
| "learning_rate": 2.3892961876832843e-05, | |
| "loss": 0.1732, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.7220902612826603, | |
| "grad_norm": 5.962674140930176, | |
| "learning_rate": 2.370967741935484e-05, | |
| "loss": 0.1601, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.731987331749802, | |
| "grad_norm": 4.525330066680908, | |
| "learning_rate": 2.3526392961876832e-05, | |
| "loss": 0.1427, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.7418844022169437, | |
| "grad_norm": 9.384072303771973, | |
| "learning_rate": 2.334310850439883e-05, | |
| "loss": 0.1489, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.7517814726840855, | |
| "grad_norm": 16.098506927490234, | |
| "learning_rate": 2.315982404692082e-05, | |
| "loss": 0.1101, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.7616785431512272, | |
| "grad_norm": 4.9497480392456055, | |
| "learning_rate": 2.2976539589442817e-05, | |
| "loss": 0.1364, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.771575613618369, | |
| "grad_norm": 4.449967384338379, | |
| "learning_rate": 2.279325513196481e-05, | |
| "loss": 0.1799, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.7814726840855108, | |
| "grad_norm": 3.7315053939819336, | |
| "learning_rate": 2.2609970674486806e-05, | |
| "loss": 0.1342, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.7913697545526523, | |
| "grad_norm": 3.46779727935791, | |
| "learning_rate": 2.24266862170088e-05, | |
| "loss": 0.1348, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.8012668250197943, | |
| "grad_norm": 6.9282402992248535, | |
| "learning_rate": 2.2243401759530792e-05, | |
| "loss": 0.1259, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.8111638954869358, | |
| "grad_norm": 6.039886951446533, | |
| "learning_rate": 2.2060117302052788e-05, | |
| "loss": 0.1177, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.8210609659540777, | |
| "grad_norm": 8.866342544555664, | |
| "learning_rate": 2.187683284457478e-05, | |
| "loss": 0.0904, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.8309580364212192, | |
| "grad_norm": 11.28046989440918, | |
| "learning_rate": 2.1693548387096777e-05, | |
| "loss": 0.1127, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.8408551068883612, | |
| "grad_norm": 0.10267776250839233, | |
| "learning_rate": 2.1510263929618766e-05, | |
| "loss": 0.1338, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.8507521773555027, | |
| "grad_norm": 4.542361259460449, | |
| "learning_rate": 2.1326979472140763e-05, | |
| "loss": 0.1465, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.8606492478226446, | |
| "grad_norm": 6.66448974609375, | |
| "learning_rate": 2.1143695014662755e-05, | |
| "loss": 0.1373, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.8705463182897861, | |
| "grad_norm": 5.7664690017700195, | |
| "learning_rate": 2.096041055718475e-05, | |
| "loss": 0.1329, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.880443388756928, | |
| "grad_norm": 6.261977195739746, | |
| "learning_rate": 2.0777126099706744e-05, | |
| "loss": 0.0891, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.8903404592240696, | |
| "grad_norm": 0.11381009221076965, | |
| "learning_rate": 2.059384164222874e-05, | |
| "loss": 0.1099, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.9002375296912115, | |
| "grad_norm": 2.1505606174468994, | |
| "learning_rate": 2.0410557184750733e-05, | |
| "loss": 0.157, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.910134600158353, | |
| "grad_norm": 8.369518280029297, | |
| "learning_rate": 2.022727272727273e-05, | |
| "loss": 0.1188, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.920031670625495, | |
| "grad_norm": 14.388636589050293, | |
| "learning_rate": 2.0043988269794722e-05, | |
| "loss": 0.1404, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.9299287410926365, | |
| "grad_norm": 0.20757636427879333, | |
| "learning_rate": 1.9860703812316715e-05, | |
| "loss": 0.1269, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.9398258115597784, | |
| "grad_norm": 4.5515875816345215, | |
| "learning_rate": 1.967741935483871e-05, | |
| "loss": 0.0991, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.94972288202692, | |
| "grad_norm": 0.5187767744064331, | |
| "learning_rate": 1.9494134897360704e-05, | |
| "loss": 0.0744, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 1.9596199524940618, | |
| "grad_norm": 7.752375602722168, | |
| "learning_rate": 1.93108504398827e-05, | |
| "loss": 0.1262, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.9695170229612033, | |
| "grad_norm": 4.192614555358887, | |
| "learning_rate": 1.9127565982404693e-05, | |
| "loss": 0.1082, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 1.9794140934283453, | |
| "grad_norm": 12.195773124694824, | |
| "learning_rate": 1.894428152492669e-05, | |
| "loss": 0.118, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.9893111638954868, | |
| "grad_norm": 4.470797061920166, | |
| "learning_rate": 1.8760997067448682e-05, | |
| "loss": 0.1131, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.9992082343626287, | |
| "grad_norm": 2.6608407497406006, | |
| "learning_rate": 1.8577712609970678e-05, | |
| "loss": 0.1176, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.962, | |
| "eval_f1_macro": 0.9527593111357376, | |
| "eval_f1_micro": 0.962, | |
| "eval_f1_weighted": 0.961887560772812, | |
| "eval_loss": 0.16408780217170715, | |
| "eval_precision_macro": 0.9611804451780929, | |
| "eval_precision_micro": 0.962, | |
| "eval_precision_weighted": 0.9624429314047246, | |
| "eval_recall_macro": 0.9460711525998422, | |
| "eval_recall_micro": 0.962, | |
| "eval_recall_weighted": 0.962, | |
| "eval_runtime": 5.1023, | |
| "eval_samples_per_second": 979.953, | |
| "eval_steps_per_second": 15.483, | |
| "step": 5052 | |
| }, | |
| { | |
| "epoch": 2.0091053048297702, | |
| "grad_norm": 6.416041374206543, | |
| "learning_rate": 1.8394428152492667e-05, | |
| "loss": 0.0793, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.019002375296912, | |
| "grad_norm": 9.853547096252441, | |
| "learning_rate": 1.8211143695014664e-05, | |
| "loss": 0.0642, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.0288994457640537, | |
| "grad_norm": 9.938668251037598, | |
| "learning_rate": 1.8027859237536656e-05, | |
| "loss": 0.0778, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.0387965162311956, | |
| "grad_norm": 0.6218538880348206, | |
| "learning_rate": 1.7844574780058653e-05, | |
| "loss": 0.07, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.048693586698337, | |
| "grad_norm": 7.014169216156006, | |
| "learning_rate": 1.7661290322580645e-05, | |
| "loss": 0.0591, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 2.058590657165479, | |
| "grad_norm": 0.011782053858041763, | |
| "learning_rate": 1.7478005865102638e-05, | |
| "loss": 0.0638, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.0684877276326206, | |
| "grad_norm": 2.117039918899536, | |
| "learning_rate": 1.7294721407624634e-05, | |
| "loss": 0.0571, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 2.0783847980997625, | |
| "grad_norm": 6.522469997406006, | |
| "learning_rate": 1.7111436950146627e-05, | |
| "loss": 0.0623, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.088281868566904, | |
| "grad_norm": 0.01880364678800106, | |
| "learning_rate": 1.6928152492668623e-05, | |
| "loss": 0.0487, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 2.098178939034046, | |
| "grad_norm": 0.3510414958000183, | |
| "learning_rate": 1.6744868035190616e-05, | |
| "loss": 0.0678, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.1080760095011875, | |
| "grad_norm": 0.7997303009033203, | |
| "learning_rate": 1.6561583577712612e-05, | |
| "loss": 0.0633, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 2.1179730799683294, | |
| "grad_norm": 0.026792127639055252, | |
| "learning_rate": 1.6378299120234605e-05, | |
| "loss": 0.0861, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.127870150435471, | |
| "grad_norm": 0.4307959973812103, | |
| "learning_rate": 1.61950146627566e-05, | |
| "loss": 0.0593, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 2.137767220902613, | |
| "grad_norm": 0.2015238106250763, | |
| "learning_rate": 1.6011730205278594e-05, | |
| "loss": 0.0576, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.147664291369755, | |
| "grad_norm": 0.7783300876617432, | |
| "learning_rate": 1.5828445747800587e-05, | |
| "loss": 0.0734, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 2.1575613618368963, | |
| "grad_norm": 0.03129582852125168, | |
| "learning_rate": 1.5645161290322583e-05, | |
| "loss": 0.0778, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.167458432304038, | |
| "grad_norm": 3.1593480110168457, | |
| "learning_rate": 1.5461876832844576e-05, | |
| "loss": 0.0872, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 2.1773555027711797, | |
| "grad_norm": 0.04930011183023453, | |
| "learning_rate": 1.527859237536657e-05, | |
| "loss": 0.0458, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.1872525732383217, | |
| "grad_norm": 8.676435470581055, | |
| "learning_rate": 1.5095307917888563e-05, | |
| "loss": 0.0268, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 2.197149643705463, | |
| "grad_norm": 13.317243576049805, | |
| "learning_rate": 1.4912023460410557e-05, | |
| "loss": 0.0375, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.2070467141726047, | |
| "grad_norm": 6.073598861694336, | |
| "learning_rate": 1.4728739002932552e-05, | |
| "loss": 0.045, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 2.2169437846397466, | |
| "grad_norm": 1.5578258037567139, | |
| "learning_rate": 1.4545454545454545e-05, | |
| "loss": 0.0858, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.2268408551068886, | |
| "grad_norm": 1.701669692993164, | |
| "learning_rate": 1.4362170087976539e-05, | |
| "loss": 0.0505, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 2.23673792557403, | |
| "grad_norm": 9.207208633422852, | |
| "learning_rate": 1.4178885630498534e-05, | |
| "loss": 0.0729, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.246634996041172, | |
| "grad_norm": 0.026624349877238274, | |
| "learning_rate": 1.3995601173020528e-05, | |
| "loss": 0.0368, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 2.2565320665083135, | |
| "grad_norm": 0.5754280686378479, | |
| "learning_rate": 1.3812316715542523e-05, | |
| "loss": 0.0518, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.2664291369754554, | |
| "grad_norm": 0.3010414242744446, | |
| "learning_rate": 1.3629032258064517e-05, | |
| "loss": 0.0675, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 2.276326207442597, | |
| "grad_norm": 0.0495084747672081, | |
| "learning_rate": 1.3445747800586511e-05, | |
| "loss": 0.0861, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.286223277909739, | |
| "grad_norm": 2.4626643657684326, | |
| "learning_rate": 1.3262463343108506e-05, | |
| "loss": 0.0792, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 2.2961203483768804, | |
| "grad_norm": 0.9338565468788147, | |
| "learning_rate": 1.30791788856305e-05, | |
| "loss": 0.0385, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.3060174188440223, | |
| "grad_norm": 3.80712628364563, | |
| "learning_rate": 1.2895894428152493e-05, | |
| "loss": 0.0907, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 2.315914489311164, | |
| "grad_norm": 6.940539836883545, | |
| "learning_rate": 1.2712609970674488e-05, | |
| "loss": 0.0802, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.325811559778306, | |
| "grad_norm": 4.521027088165283, | |
| "learning_rate": 1.2529325513196482e-05, | |
| "loss": 0.0752, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 2.3357086302454473, | |
| "grad_norm": 1.9812321662902832, | |
| "learning_rate": 1.2346041055718475e-05, | |
| "loss": 0.082, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.3456057007125892, | |
| "grad_norm": 11.957037925720215, | |
| "learning_rate": 1.216275659824047e-05, | |
| "loss": 0.0492, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 2.3555027711797307, | |
| "grad_norm": 0.16896741092205048, | |
| "learning_rate": 1.1979472140762464e-05, | |
| "loss": 0.0519, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.3653998416468727, | |
| "grad_norm": 0.6698777675628662, | |
| "learning_rate": 1.1796187683284458e-05, | |
| "loss": 0.1032, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 2.375296912114014, | |
| "grad_norm": 13.273818969726562, | |
| "learning_rate": 1.1612903225806453e-05, | |
| "loss": 0.0844, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.385193982581156, | |
| "grad_norm": 2.729861259460449, | |
| "learning_rate": 1.1429618768328447e-05, | |
| "loss": 0.0663, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 2.3950910530482976, | |
| "grad_norm": 15.863778114318848, | |
| "learning_rate": 1.124633431085044e-05, | |
| "loss": 0.039, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.4049881235154396, | |
| "grad_norm": 8.72951602935791, | |
| "learning_rate": 1.1063049853372435e-05, | |
| "loss": 0.0667, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 2.414885193982581, | |
| "grad_norm": 0.06009228155016899, | |
| "learning_rate": 1.0879765395894429e-05, | |
| "loss": 0.0962, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.424782264449723, | |
| "grad_norm": 1.6163275241851807, | |
| "learning_rate": 1.0696480938416424e-05, | |
| "loss": 0.0765, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 2.4346793349168645, | |
| "grad_norm": 0.023228373378515244, | |
| "learning_rate": 1.0513196480938416e-05, | |
| "loss": 0.0786, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.4445764053840064, | |
| "grad_norm": 0.010257094167172909, | |
| "learning_rate": 1.032991202346041e-05, | |
| "loss": 0.0398, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 2.454473475851148, | |
| "grad_norm": 5.985715389251709, | |
| "learning_rate": 1.0146627565982405e-05, | |
| "loss": 0.0757, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.46437054631829, | |
| "grad_norm": 0.06866980344057083, | |
| "learning_rate": 9.9633431085044e-06, | |
| "loss": 0.0554, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 2.4742676167854314, | |
| "grad_norm": 4.456401348114014, | |
| "learning_rate": 9.780058651026392e-06, | |
| "loss": 0.0496, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.4841646872525733, | |
| "grad_norm": 17.26448631286621, | |
| "learning_rate": 9.596774193548387e-06, | |
| "loss": 0.0679, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 2.494061757719715, | |
| "grad_norm": 8.371593475341797, | |
| "learning_rate": 9.413489736070381e-06, | |
| "loss": 0.0745, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.5039588281868568, | |
| "grad_norm": 0.02239610068500042, | |
| "learning_rate": 9.230205278592376e-06, | |
| "loss": 0.0557, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 2.5138558986539983, | |
| "grad_norm": 0.046766772866249084, | |
| "learning_rate": 9.04692082111437e-06, | |
| "loss": 0.0438, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.52375296912114, | |
| "grad_norm": 8.119983673095703, | |
| "learning_rate": 8.863636363636365e-06, | |
| "loss": 0.0713, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 2.5336500395882817, | |
| "grad_norm": 0.0916699767112732, | |
| "learning_rate": 8.68035190615836e-06, | |
| "loss": 0.1, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.5435471100554237, | |
| "grad_norm": 6.661031246185303, | |
| "learning_rate": 8.497067448680352e-06, | |
| "loss": 0.0949, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 2.553444180522565, | |
| "grad_norm": 5.773529529571533, | |
| "learning_rate": 8.313782991202347e-06, | |
| "loss": 0.0834, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.563341250989707, | |
| "grad_norm": 19.465478897094727, | |
| "learning_rate": 8.13049853372434e-06, | |
| "loss": 0.0927, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 2.5732383214568486, | |
| "grad_norm": 9.270087242126465, | |
| "learning_rate": 7.947214076246334e-06, | |
| "loss": 0.0591, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.5831353919239906, | |
| "grad_norm": 1.235212802886963, | |
| "learning_rate": 7.763929618768328e-06, | |
| "loss": 0.1145, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 2.5930324623911325, | |
| "grad_norm": 0.10507909208536148, | |
| "learning_rate": 7.580645161290323e-06, | |
| "loss": 0.0928, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.602929532858274, | |
| "grad_norm": 1.12295663356781, | |
| "learning_rate": 7.397360703812317e-06, | |
| "loss": 0.0602, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 2.6128266033254155, | |
| "grad_norm": 8.503447532653809, | |
| "learning_rate": 7.214076246334312e-06, | |
| "loss": 0.077, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.6227236737925574, | |
| "grad_norm": 0.9171582460403442, | |
| "learning_rate": 7.030791788856305e-06, | |
| "loss": 0.048, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 2.6326207442596994, | |
| "grad_norm": 0.28691366314888, | |
| "learning_rate": 6.8475073313783e-06, | |
| "loss": 0.0548, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.642517814726841, | |
| "grad_norm": 2.4092676639556885, | |
| "learning_rate": 6.664222873900293e-06, | |
| "loss": 0.0654, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 2.6524148851939824, | |
| "grad_norm": 0.248480886220932, | |
| "learning_rate": 6.480938416422287e-06, | |
| "loss": 0.0776, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.6623119556611243, | |
| "grad_norm": 1.0501718521118164, | |
| "learning_rate": 6.2976539589442816e-06, | |
| "loss": 0.0749, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 2.6722090261282663, | |
| "grad_norm": 0.16996045410633087, | |
| "learning_rate": 6.114369501466276e-06, | |
| "loss": 0.0448, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.6821060965954078, | |
| "grad_norm": 0.015705592930316925, | |
| "learning_rate": 5.93108504398827e-06, | |
| "loss": 0.043, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 2.6920031670625493, | |
| "grad_norm": 13.008760452270508, | |
| "learning_rate": 5.747800586510264e-06, | |
| "loss": 0.045, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.701900237529691, | |
| "grad_norm": 0.49796026945114136, | |
| "learning_rate": 5.564516129032259e-06, | |
| "loss": 0.0387, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 2.711797307996833, | |
| "grad_norm": 6.566326141357422, | |
| "learning_rate": 5.381231671554252e-06, | |
| "loss": 0.0412, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.7216943784639747, | |
| "grad_norm": 0.014399710111320019, | |
| "learning_rate": 5.197947214076247e-06, | |
| "loss": 0.0596, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 2.731591448931116, | |
| "grad_norm": 0.3872062563896179, | |
| "learning_rate": 5.014662756598241e-06, | |
| "loss": 0.0523, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.741488519398258, | |
| "grad_norm": 0.1425359696149826, | |
| "learning_rate": 4.831378299120235e-06, | |
| "loss": 0.0461, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 2.7513855898654, | |
| "grad_norm": 0.4896790683269501, | |
| "learning_rate": 4.6480938416422284e-06, | |
| "loss": 0.0616, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.7612826603325415, | |
| "grad_norm": 6.286714553833008, | |
| "learning_rate": 4.464809384164223e-06, | |
| "loss": 0.0896, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 2.771179730799683, | |
| "grad_norm": 0.058547962456941605, | |
| "learning_rate": 4.281524926686217e-06, | |
| "loss": 0.0648, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.781076801266825, | |
| "grad_norm": 0.3147684335708618, | |
| "learning_rate": 4.098240469208212e-06, | |
| "loss": 0.0701, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 2.790973871733967, | |
| "grad_norm": 2.779256582260132, | |
| "learning_rate": 3.9149560117302055e-06, | |
| "loss": 0.0352, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.8008709422011084, | |
| "grad_norm": 0.26303109526634216, | |
| "learning_rate": 3.7316715542521995e-06, | |
| "loss": 0.0663, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 2.81076801266825, | |
| "grad_norm": 11.363053321838379, | |
| "learning_rate": 3.5483870967741936e-06, | |
| "loss": 0.0883, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.820665083135392, | |
| "grad_norm": 0.36071789264678955, | |
| "learning_rate": 3.365102639296188e-06, | |
| "loss": 0.0762, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 2.830562153602534, | |
| "grad_norm": 0.014336947351694107, | |
| "learning_rate": 3.1818181818181817e-06, | |
| "loss": 0.0433, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.8404592240696753, | |
| "grad_norm": 8.244430541992188, | |
| "learning_rate": 2.9985337243401757e-06, | |
| "loss": 0.0436, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 2.850356294536817, | |
| "grad_norm": 0.018452562391757965, | |
| "learning_rate": 2.8152492668621702e-06, | |
| "loss": 0.0478, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.8602533650039588, | |
| "grad_norm": 0.9135558605194092, | |
| "learning_rate": 2.6319648093841647e-06, | |
| "loss": 0.0582, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 2.8701504354711007, | |
| "grad_norm": 0.03040502220392227, | |
| "learning_rate": 2.4486803519061583e-06, | |
| "loss": 0.0353, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.880047505938242, | |
| "grad_norm": 0.026754941791296005, | |
| "learning_rate": 2.265395894428153e-06, | |
| "loss": 0.0769, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 2.889944576405384, | |
| "grad_norm": 0.09019900858402252, | |
| "learning_rate": 2.082111436950147e-06, | |
| "loss": 0.0506, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.8998416468725257, | |
| "grad_norm": 0.04084889218211174, | |
| "learning_rate": 1.8988269794721409e-06, | |
| "loss": 0.0411, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 2.9097387173396676, | |
| "grad_norm": 2.3881072998046875, | |
| "learning_rate": 1.715542521994135e-06, | |
| "loss": 0.0549, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.919635787806809, | |
| "grad_norm": 7.836863040924072, | |
| "learning_rate": 1.532258064516129e-06, | |
| "loss": 0.0514, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 2.929532858273951, | |
| "grad_norm": 1.4827078580856323, | |
| "learning_rate": 1.3489736070381233e-06, | |
| "loss": 0.0608, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.9394299287410925, | |
| "grad_norm": 0.012712684459984303, | |
| "learning_rate": 1.1656891495601175e-06, | |
| "loss": 0.0542, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 2.9493269992082345, | |
| "grad_norm": 6.55382776260376, | |
| "learning_rate": 9.824046920821116e-07, | |
| "loss": 0.0518, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.959224069675376, | |
| "grad_norm": 0.21079152822494507, | |
| "learning_rate": 7.991202346041056e-07, | |
| "loss": 0.0618, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 2.969121140142518, | |
| "grad_norm": 4.107754707336426, | |
| "learning_rate": 6.158357771260998e-07, | |
| "loss": 0.0368, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.9790182106096594, | |
| "grad_norm": 0.4540683925151825, | |
| "learning_rate": 4.325513196480939e-07, | |
| "loss": 0.0595, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 2.9889152810768014, | |
| "grad_norm": 0.416092187166214, | |
| "learning_rate": 2.4926686217008803e-07, | |
| "loss": 0.0626, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.998812351543943, | |
| "grad_norm": 3.699631452560425, | |
| "learning_rate": 6.598240469208211e-08, | |
| "loss": 0.0729, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9694, | |
| "eval_f1_macro": 0.9613886456444749, | |
| "eval_f1_micro": 0.9694, | |
| "eval_f1_weighted": 0.9693030681223207, | |
| "eval_loss": 0.157407745718956, | |
| "eval_precision_macro": 0.9679892485977634, | |
| "eval_precision_micro": 0.9694, | |
| "eval_precision_weighted": 0.9695713537396466, | |
| "eval_recall_macro": 0.9560667596679707, | |
| "eval_recall_micro": 0.9694, | |
| "eval_recall_weighted": 0.9694, | |
| "eval_runtime": 5.0753, | |
| "eval_samples_per_second": 985.172, | |
| "eval_steps_per_second": 15.566, | |
| "step": 7578 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 7578, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.381368787756646e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |