| { | |
| "best_metric": 0.6428677439689636, | |
| "best_model_checkpoint": "/mnt/data/shesj/Trained/RL4CoT/DPO/llama2fullcontinue_largebeta_initialData_iter1Self_resetScoreExp_iter1_2_lowerLr16.json/checkpoint-1000", | |
| "epoch": 0.5, | |
| "eval_steps": 100, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-09, | |
| "logits/chosen": -1.5196828842163086, | |
| "logits/rejected": -1.53714919090271, | |
| "logps/chosen": -15.696101188659668, | |
| "logps/rejected": -19.717897415161133, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.3062500059604645, | |
| "rewards/chosen": -4.1580526158213615e-05, | |
| "rewards/margins": 0.0009152223356068134, | |
| "rewards/rejected": -0.000956802919972688, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1e-08, | |
| "logits/chosen": -1.4667831659317017, | |
| "logits/rejected": -1.474163293838501, | |
| "logps/chosen": -13.819122314453125, | |
| "logps/rejected": -15.621783256530762, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.0003026240156032145, | |
| "rewards/margins": 0.0002748106198851019, | |
| "rewards/rejected": -0.0005774348974227905, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.5e-08, | |
| "logits/chosen": -1.4452192783355713, | |
| "logits/rejected": -1.4530740976333618, | |
| "logps/chosen": -16.253719329833984, | |
| "logps/rejected": -19.002716064453125, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.000418459705542773, | |
| "rewards/margins": 0.0002198911242885515, | |
| "rewards/rejected": 0.00019856853759847581, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2e-08, | |
| "logits/chosen": -1.4347673654556274, | |
| "logits/rejected": -1.457056999206543, | |
| "logps/chosen": -16.931005477905273, | |
| "logps/rejected": -17.90321159362793, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.0022916034795343876, | |
| "rewards/margins": -0.003587479470297694, | |
| "rewards/rejected": 0.0012958759907633066, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.5e-08, | |
| "logits/chosen": -1.5136417150497437, | |
| "logits/rejected": -1.5317498445510864, | |
| "logps/chosen": -13.484731674194336, | |
| "logps/rejected": -17.72646141052246, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0013993926113471389, | |
| "rewards/margins": 0.003486391855403781, | |
| "rewards/rejected": -0.002086999360471964, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3e-08, | |
| "logits/chosen": -1.4325120449066162, | |
| "logits/rejected": -1.423964262008667, | |
| "logps/chosen": -16.99893569946289, | |
| "logps/rejected": -21.196269989013672, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0006942325853742659, | |
| "rewards/margins": -0.00022374764375854284, | |
| "rewards/rejected": -0.0004704846942331642, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.4999999999999996e-08, | |
| "logits/chosen": -1.503702163696289, | |
| "logits/rejected": -1.5359680652618408, | |
| "logps/chosen": -15.859466552734375, | |
| "logps/rejected": -18.37940788269043, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.002690280321985483, | |
| "rewards/margins": 0.0054735760204494, | |
| "rewards/rejected": -0.002783295465633273, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4e-08, | |
| "logits/chosen": -1.474712610244751, | |
| "logits/rejected": -1.4741761684417725, | |
| "logps/chosen": -13.360185623168945, | |
| "logps/rejected": -16.91265296936035, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0006134170107543468, | |
| "rewards/margins": 0.0004067299305461347, | |
| "rewards/rejected": -0.0010201467666774988, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.5e-08, | |
| "logits/chosen": -1.4315367937088013, | |
| "logits/rejected": -1.4630348682403564, | |
| "logps/chosen": -14.167867660522461, | |
| "logps/rejected": -18.600988388061523, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.0011325415689498186, | |
| "rewards/margins": -7.549161182396347e-06, | |
| "rewards/rejected": 0.0011400904040783644, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-08, | |
| "logits/chosen": -1.466339349746704, | |
| "logits/rejected": -1.4680755138397217, | |
| "logps/chosen": -13.571925163269043, | |
| "logps/rejected": -16.360881805419922, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.0013324546162039042, | |
| "rewards/margins": -5.83843320782762e-05, | |
| "rewards/rejected": -0.0012740703532472253, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.5e-08, | |
| "logits/chosen": -1.4321575164794922, | |
| "logits/rejected": -1.4344770908355713, | |
| "logps/chosen": -14.864067077636719, | |
| "logps/rejected": -20.08001708984375, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.000479007518151775, | |
| "rewards/margins": 0.001067839446477592, | |
| "rewards/rejected": -0.0005888319574296474, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6e-08, | |
| "logits/chosen": -1.5018717050552368, | |
| "logits/rejected": -1.5294861793518066, | |
| "logps/chosen": -13.418993949890137, | |
| "logps/rejected": -17.377300262451172, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.0013912434224039316, | |
| "rewards/margins": -0.0009791270131245255, | |
| "rewards/rejected": -0.0004121163801755756, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.5e-08, | |
| "logits/chosen": -1.40516197681427, | |
| "logits/rejected": -1.4193370342254639, | |
| "logps/chosen": -13.224818229675293, | |
| "logps/rejected": -15.901044845581055, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.0004983447724953294, | |
| "rewards/margins": 0.0009676685440354049, | |
| "rewards/rejected": -0.0014660133747383952, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.999999999999999e-08, | |
| "logits/chosen": -1.5142648220062256, | |
| "logits/rejected": -1.5050678253173828, | |
| "logps/chosen": -15.645828247070312, | |
| "logps/rejected": -20.22635269165039, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.0008540893904864788, | |
| "rewards/margins": 0.0006531132385134697, | |
| "rewards/rejected": -0.0015072030946612358, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.5e-08, | |
| "logits/chosen": -1.4419431686401367, | |
| "logits/rejected": -1.4324930906295776, | |
| "logps/chosen": -14.863853454589844, | |
| "logps/rejected": -16.58720588684082, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0013656382216140628, | |
| "rewards/margins": 0.0006723797414451838, | |
| "rewards/rejected": 0.0006932583637535572, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8e-08, | |
| "logits/chosen": -1.532387375831604, | |
| "logits/rejected": -1.5147764682769775, | |
| "logps/chosen": -15.975011825561523, | |
| "logps/rejected": -19.93300437927246, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0007912625442259014, | |
| "rewards/margins": 0.00153960264287889, | |
| "rewards/rejected": -0.0023308652453124523, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.5e-08, | |
| "logits/chosen": -1.4505486488342285, | |
| "logits/rejected": -1.4808809757232666, | |
| "logps/chosen": -17.687313079833984, | |
| "logps/rejected": -24.51900863647461, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.00017726181249599904, | |
| "rewards/margins": 0.0016221065307036042, | |
| "rewards/rejected": -0.001799368066713214, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9e-08, | |
| "logits/chosen": -1.4988192319869995, | |
| "logits/rejected": -1.513806939125061, | |
| "logps/chosen": -14.488324165344238, | |
| "logps/rejected": -19.464611053466797, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0011351148132234812, | |
| "rewards/margins": 0.004005365073680878, | |
| "rewards/rejected": -0.00287025049328804, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.499999999999999e-08, | |
| "logits/chosen": -1.403247594833374, | |
| "logits/rejected": -1.4147038459777832, | |
| "logps/chosen": -16.571857452392578, | |
| "logps/rejected": -16.457307815551758, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.00036610427196137607, | |
| "rewards/margins": 0.004560294561088085, | |
| "rewards/rejected": -0.004194191191345453, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -1.486196756362915, | |
| "logits/rejected": -1.504416823387146, | |
| "logps/chosen": -14.919549942016602, | |
| "logps/rejected": -19.339908599853516, | |
| "loss": 0.691, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.003806027816608548, | |
| "rewards/margins": 0.004251133184880018, | |
| "rewards/rejected": -0.008057162165641785, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_logits/chosen": -1.7919771671295166, | |
| "eval_logits/rejected": -1.8722617626190186, | |
| "eval_logps/chosen": -15.013982772827148, | |
| "eval_logps/rejected": -18.596895217895508, | |
| "eval_loss": 0.6906684637069702, | |
| "eval_rewards/accuracies": 0.5674920082092285, | |
| "eval_rewards/chosen": -8.375057223020121e-05, | |
| "eval_rewards/margins": 0.005779640283435583, | |
| "eval_rewards/rejected": -0.005863390862941742, | |
| "eval_runtime": 306.7, | |
| "eval_samples_per_second": 65.21, | |
| "eval_steps_per_second": 1.021, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.999238475781956e-08, | |
| "logits/chosen": -1.415111780166626, | |
| "logits/rejected": -1.409263253211975, | |
| "logps/chosen": -15.464300155639648, | |
| "logps/rejected": -18.61844253540039, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0006141990306787193, | |
| "rewards/margins": 0.004516326356679201, | |
| "rewards/rejected": -0.00513052474707365, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.996954135095479e-08, | |
| "logits/chosen": -1.4188635349273682, | |
| "logits/rejected": -1.432936668395996, | |
| "logps/chosen": -15.272178649902344, | |
| "logps/rejected": -19.53142738342285, | |
| "loss": 0.689, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.00477779284119606, | |
| "rewards/margins": 0.0066839540377259254, | |
| "rewards/rejected": -0.011461746878921986, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.993147673772868e-08, | |
| "logits/chosen": -1.4598616361618042, | |
| "logits/rejected": -1.4905925989151, | |
| "logps/chosen": -15.878756523132324, | |
| "logps/rejected": -18.827163696289062, | |
| "loss": 0.688, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.002627016045153141, | |
| "rewards/margins": 0.01106287818402052, | |
| "rewards/rejected": -0.008435862138867378, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.98782025129912e-08, | |
| "logits/chosen": -1.4647516012191772, | |
| "logits/rejected": -1.4928150177001953, | |
| "logps/chosen": -13.536163330078125, | |
| "logps/rejected": -18.504684448242188, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.006495442241430283, | |
| "rewards/margins": 0.0007650878978893161, | |
| "rewards/rejected": -0.007260529790073633, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.980973490458727e-08, | |
| "logits/chosen": -1.4349157810211182, | |
| "logits/rejected": -1.4921131134033203, | |
| "logps/chosen": -16.818748474121094, | |
| "logps/rejected": -23.732484817504883, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.014983393251895905, | |
| "rewards/margins": 0.003936333581805229, | |
| "rewards/rejected": -0.018919726833701134, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.972609476841366e-08, | |
| "logits/chosen": -1.4056415557861328, | |
| "logits/rejected": -1.4126735925674438, | |
| "logps/chosen": -14.3095121383667, | |
| "logps/rejected": -18.44950294494629, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.005277615040540695, | |
| "rewards/margins": 0.011254631914198399, | |
| "rewards/rejected": -0.01653224602341652, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.96273075820661e-08, | |
| "logits/chosen": -1.5334028005599976, | |
| "logits/rejected": -1.5822241306304932, | |
| "logps/chosen": -15.960824966430664, | |
| "logps/rejected": -21.098979949951172, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.016458828002214432, | |
| "rewards/margins": 0.006202386226505041, | |
| "rewards/rejected": -0.02266121283173561, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.951340343707851e-08, | |
| "logits/chosen": -1.5316803455352783, | |
| "logits/rejected": -1.5605201721191406, | |
| "logps/chosen": -13.550819396972656, | |
| "logps/rejected": -17.886049270629883, | |
| "loss": 0.6851, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.012303833849728107, | |
| "rewards/margins": 0.02252637967467308, | |
| "rewards/rejected": -0.03483021631836891, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.938441702975688e-08, | |
| "logits/chosen": -1.4327102899551392, | |
| "logits/rejected": -1.446903944015503, | |
| "logps/chosen": -13.809109687805176, | |
| "logps/rejected": -17.256572723388672, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.016683299094438553, | |
| "rewards/margins": 0.029044728726148605, | |
| "rewards/rejected": -0.04572802782058716, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.92403876506104e-08, | |
| "logits/chosen": -1.4938914775848389, | |
| "logits/rejected": -1.5160696506500244, | |
| "logps/chosen": -13.700773239135742, | |
| "logps/rejected": -19.660675048828125, | |
| "loss": 0.6815, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.03682475537061691, | |
| "rewards/margins": 0.010936172679066658, | |
| "rewards/rejected": -0.04776093363761902, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.90813591723832e-08, | |
| "logits/chosen": -1.4303719997406006, | |
| "logits/rejected": -1.4469249248504639, | |
| "logps/chosen": -14.483743667602539, | |
| "logps/rejected": -20.158580780029297, | |
| "loss": 0.6835, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03273214027285576, | |
| "rewards/margins": 0.030618244782090187, | |
| "rewards/rejected": -0.06335039436817169, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.890738003669028e-08, | |
| "logits/chosen": -1.4257906675338745, | |
| "logits/rejected": -1.440292477607727, | |
| "logps/chosen": -13.087536811828613, | |
| "logps/rejected": -18.774667739868164, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.042621154338121414, | |
| "rewards/margins": 0.019855033606290817, | |
| "rewards/rejected": -0.06247618794441223, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.871850323926176e-08, | |
| "logits/chosen": -1.506829023361206, | |
| "logits/rejected": -1.4980268478393555, | |
| "logps/chosen": -15.794275283813477, | |
| "logps/rejected": -22.559085845947266, | |
| "loss": 0.6832, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.04176632687449455, | |
| "rewards/margins": 0.014226436614990234, | |
| "rewards/rejected": -0.05599276348948479, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.851478631379982e-08, | |
| "logits/chosen": -1.5072795152664185, | |
| "logits/rejected": -1.5367839336395264, | |
| "logps/chosen": -16.79397964477539, | |
| "logps/rejected": -18.064043045043945, | |
| "loss": 0.6843, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.037377405911684036, | |
| "rewards/margins": 0.04563738405704498, | |
| "rewards/rejected": -0.08301478624343872, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.82962913144534e-08, | |
| "logits/chosen": -1.507598876953125, | |
| "logits/rejected": -1.5444573163986206, | |
| "logps/chosen": -17.070449829101562, | |
| "logps/rejected": -20.720073699951172, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.06209941580891609, | |
| "rewards/margins": 0.03557855635881424, | |
| "rewards/rejected": -0.09767796844244003, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.806308479691594e-08, | |
| "logits/chosen": -1.4471648931503296, | |
| "logits/rejected": -1.474890947341919, | |
| "logps/chosen": -16.156829833984375, | |
| "logps/rejected": -22.047147750854492, | |
| "loss": 0.68, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.0555298812687397, | |
| "rewards/margins": 0.0073929824866354465, | |
| "rewards/rejected": -0.06292285770177841, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.781523779815177e-08, | |
| "logits/chosen": -1.5135043859481812, | |
| "logits/rejected": -1.5078579187393188, | |
| "logps/chosen": -14.832931518554688, | |
| "logps/rejected": -17.844093322753906, | |
| "loss": 0.6827, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.06777982413768768, | |
| "rewards/margins": 0.03908789902925491, | |
| "rewards/rejected": -0.106867715716362, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.755282581475768e-08, | |
| "logits/chosen": -1.4608676433563232, | |
| "logits/rejected": -1.4990969896316528, | |
| "logps/chosen": -14.89118766784668, | |
| "logps/rejected": -20.47022247314453, | |
| "loss": 0.6749, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.05767616629600525, | |
| "rewards/margins": 0.0543893501162529, | |
| "rewards/rejected": -0.11206551641225815, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.727592877996584e-08, | |
| "logits/chosen": -1.4986900091171265, | |
| "logits/rejected": -1.5200297832489014, | |
| "logps/chosen": -14.921911239624023, | |
| "logps/rejected": -19.319089889526367, | |
| "loss": 0.6782, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.07736112922430038, | |
| "rewards/margins": 0.037412129342556, | |
| "rewards/rejected": -0.11477325856685638, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.698463103929542e-08, | |
| "logits/chosen": -1.4672738313674927, | |
| "logits/rejected": -1.4699958562850952, | |
| "logps/chosen": -16.190366744995117, | |
| "logps/rejected": -19.112018585205078, | |
| "loss": 0.6787, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.06623920053243637, | |
| "rewards/margins": 0.05223413184285164, | |
| "rewards/rejected": -0.11847333610057831, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_logits/chosen": -1.7878340482711792, | |
| "eval_logits/rejected": -1.868047833442688, | |
| "eval_logps/chosen": -15.84277057647705, | |
| "eval_logps/rejected": -19.780603408813477, | |
| "eval_loss": 0.679453432559967, | |
| "eval_rewards/accuracies": 0.5942491888999939, | |
| "eval_rewards/chosen": -0.08296255767345428, | |
| "eval_rewards/margins": 0.04127146303653717, | |
| "eval_rewards/rejected": -0.12423399835824966, | |
| "eval_runtime": 306.7549, | |
| "eval_samples_per_second": 65.199, | |
| "eval_steps_per_second": 1.02, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.667902132486008e-08, | |
| "logits/chosen": -1.4577996730804443, | |
| "logits/rejected": -1.458738088607788, | |
| "logps/chosen": -14.40186882019043, | |
| "logps/rejected": -19.614154815673828, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1032874584197998, | |
| "rewards/margins": 0.023597324267029762, | |
| "rewards/rejected": -0.12688478827476501, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.635919272833936e-08, | |
| "logits/chosen": -1.446189284324646, | |
| "logits/rejected": -1.4532312154769897, | |
| "logps/chosen": -13.800382614135742, | |
| "logps/rejected": -16.633275985717773, | |
| "loss": 0.6743, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.08655096590518951, | |
| "rewards/margins": 0.04802204668521881, | |
| "rewards/rejected": -0.13457301259040833, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.602524267262201e-08, | |
| "logits/chosen": -1.5442548990249634, | |
| "logits/rejected": -1.5252314805984497, | |
| "logps/chosen": -16.61358070373535, | |
| "logps/rejected": -19.388107299804688, | |
| "loss": 0.6745, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.08506567776203156, | |
| "rewards/margins": 0.061091698706150055, | |
| "rewards/rejected": -0.14615735411643982, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.567727288213003e-08, | |
| "logits/chosen": -1.5201714038848877, | |
| "logits/rejected": -1.5108391046524048, | |
| "logps/chosen": -16.2724609375, | |
| "logps/rejected": -19.838550567626953, | |
| "loss": 0.6755, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1071196049451828, | |
| "rewards/margins": 0.05034583806991577, | |
| "rewards/rejected": -0.15746544301509857, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.53153893518325e-08, | |
| "logits/chosen": -1.4277980327606201, | |
| "logits/rejected": -1.4468252658843994, | |
| "logps/chosen": -15.238229751586914, | |
| "logps/rejected": -21.562265396118164, | |
| "loss": 0.6815, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.11674360930919647, | |
| "rewards/margins": 0.013696810230612755, | |
| "rewards/rejected": -0.13044041395187378, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.493970231495834e-08, | |
| "logits/chosen": -1.449592113494873, | |
| "logits/rejected": -1.4903148412704468, | |
| "logps/chosen": -16.092803955078125, | |
| "logps/rejected": -21.503910064697266, | |
| "loss": 0.6787, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.08694227039813995, | |
| "rewards/margins": 0.06734081357717514, | |
| "rewards/rejected": -0.1542830765247345, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.455032620941839e-08, | |
| "logits/chosen": -1.4714128971099854, | |
| "logits/rejected": -1.5002821683883667, | |
| "logps/chosen": -18.0159969329834, | |
| "logps/rejected": -20.794544219970703, | |
| "loss": 0.6774, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.09392134845256805, | |
| "rewards/margins": 0.06613980978727341, | |
| "rewards/rejected": -0.16006115078926086, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.414737964294634e-08, | |
| "logits/chosen": -1.4862656593322754, | |
| "logits/rejected": -1.5194942951202393, | |
| "logps/chosen": -18.190040588378906, | |
| "logps/rejected": -20.4471492767334, | |
| "loss": 0.6773, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.12713944911956787, | |
| "rewards/margins": 0.034943290054798126, | |
| "rewards/rejected": -0.1620827466249466, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.373098535696979e-08, | |
| "logits/chosen": -1.4240232706069946, | |
| "logits/rejected": -1.439587116241455, | |
| "logps/chosen": -16.851634979248047, | |
| "logps/rejected": -21.63901138305664, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.11880864202976227, | |
| "rewards/margins": 0.0625835433602333, | |
| "rewards/rejected": -0.18139217793941498, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.330127018922194e-08, | |
| "logits/chosen": -1.490593433380127, | |
| "logits/rejected": -1.519042730331421, | |
| "logps/chosen": -15.26930046081543, | |
| "logps/rejected": -20.554950714111328, | |
| "loss": 0.6788, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.12308211624622345, | |
| "rewards/margins": 0.049174632877111435, | |
| "rewards/rejected": -0.17225676774978638, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.285836503510561e-08, | |
| "logits/chosen": -1.481011152267456, | |
| "logits/rejected": -1.482369065284729, | |
| "logps/chosen": -14.86046028137207, | |
| "logps/rejected": -20.206274032592773, | |
| "loss": 0.6683, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.09218485653400421, | |
| "rewards/margins": 0.0633331835269928, | |
| "rewards/rejected": -0.155518040060997, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.240240480782128e-08, | |
| "logits/chosen": -1.4365761280059814, | |
| "logits/rejected": -1.4659501314163208, | |
| "logps/chosen": -16.5053768157959, | |
| "logps/rejected": -20.457700729370117, | |
| "loss": 0.6781, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.1196146234869957, | |
| "rewards/margins": 0.0831097811460495, | |
| "rewards/rejected": -0.2027243822813034, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.19335283972712e-08, | |
| "logits/chosen": -1.4037281274795532, | |
| "logits/rejected": -1.4219300746917725, | |
| "logps/chosen": -17.773784637451172, | |
| "logps/rejected": -21.13812828063965, | |
| "loss": 0.6672, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.11854933202266693, | |
| "rewards/margins": 0.08150772750377655, | |
| "rewards/rejected": -0.20005705952644348, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.145187862775208e-08, | |
| "logits/chosen": -1.5054607391357422, | |
| "logits/rejected": -1.5367683172225952, | |
| "logps/chosen": -16.548221588134766, | |
| "logps/rejected": -20.993818283081055, | |
| "loss": 0.6649, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.1312890648841858, | |
| "rewards/margins": 0.06594224274158478, | |
| "rewards/rejected": -0.19723130762577057, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.095760221444959e-08, | |
| "logits/chosen": -1.5523430109024048, | |
| "logits/rejected": -1.5738317966461182, | |
| "logps/chosen": -16.80923080444336, | |
| "logps/rejected": -20.842090606689453, | |
| "loss": 0.6712, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.1634162962436676, | |
| "rewards/margins": 0.06340184062719345, | |
| "rewards/rejected": -0.22681812942028046, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.045084971874737e-08, | |
| "logits/chosen": -1.4899578094482422, | |
| "logits/rejected": -1.5115963220596313, | |
| "logps/chosen": -15.990486145019531, | |
| "logps/rejected": -21.041019439697266, | |
| "loss": 0.6678, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.1481240689754486, | |
| "rewards/margins": 0.04743753746151924, | |
| "rewards/rejected": -0.19556160271167755, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.993177550236463e-08, | |
| "logits/chosen": -1.484438180923462, | |
| "logits/rejected": -1.499731183052063, | |
| "logps/chosen": -16.02753448486328, | |
| "logps/rejected": -20.631275177001953, | |
| "loss": 0.6734, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.15427681803703308, | |
| "rewards/margins": 0.016259009018540382, | |
| "rewards/rejected": -0.1705358326435089, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.940053768033609e-08, | |
| "logits/chosen": -1.4610778093338013, | |
| "logits/rejected": -1.4643038511276245, | |
| "logps/chosen": -18.099334716796875, | |
| "logps/rejected": -21.200000762939453, | |
| "loss": 0.6758, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.15338566899299622, | |
| "rewards/margins": 0.03785661607980728, | |
| "rewards/rejected": -0.1912422627210617, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.885729807284853e-08, | |
| "logits/chosen": -1.5055420398712158, | |
| "logits/rejected": -1.5390150547027588, | |
| "logps/chosen": -14.061132431030273, | |
| "logps/rejected": -19.544492721557617, | |
| "loss": 0.6667, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.189667746424675, | |
| "rewards/margins": 0.06658226996660233, | |
| "rewards/rejected": -0.2562499940395355, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.83022221559489e-08, | |
| "logits/chosen": -1.3812211751937866, | |
| "logits/rejected": -1.394118070602417, | |
| "logps/chosen": -17.90454864501953, | |
| "logps/rejected": -21.622303009033203, | |
| "loss": 0.6701, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.177689790725708, | |
| "rewards/margins": 0.05468187481164932, | |
| "rewards/rejected": -0.23237165808677673, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_logits/chosen": -1.7818100452423096, | |
| "eval_logits/rejected": -1.8616912364959717, | |
| "eval_logps/chosen": -16.538070678710938, | |
| "eval_logps/rejected": -20.776979446411133, | |
| "eval_loss": 0.6713529229164124, | |
| "eval_rewards/accuracies": 0.6006389856338501, | |
| "eval_rewards/chosen": -0.15249261260032654, | |
| "eval_rewards/margins": 0.07137925922870636, | |
| "eval_rewards/rejected": -0.2238718867301941, | |
| "eval_runtime": 306.8553, | |
| "eval_samples_per_second": 65.177, | |
| "eval_steps_per_second": 1.02, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.77354790111386e-08, | |
| "logits/chosen": -1.4351907968521118, | |
| "logits/rejected": -1.4078080654144287, | |
| "logps/chosen": -15.99042797088623, | |
| "logps/rejected": -19.805822372436523, | |
| "loss": 0.6643, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.14661024510860443, | |
| "rewards/margins": 0.06514577567577362, | |
| "rewards/rejected": -0.21175602078437805, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.715724127386971e-08, | |
| "logits/chosen": -1.4537417888641357, | |
| "logits/rejected": -1.4652783870697021, | |
| "logps/chosen": -18.730056762695312, | |
| "logps/rejected": -22.277685165405273, | |
| "loss": 0.6727, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.18776853382587433, | |
| "rewards/margins": 0.04285965487360954, | |
| "rewards/rejected": -0.23062816262245178, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.656768508095852e-08, | |
| "logits/chosen": -1.400272250175476, | |
| "logits/rejected": -1.3987435102462769, | |
| "logps/chosen": -16.988048553466797, | |
| "logps/rejected": -19.94150161743164, | |
| "loss": 0.6662, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1827572137117386, | |
| "rewards/margins": 0.03802407532930374, | |
| "rewards/rejected": -0.22078128159046173, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.596699001693255e-08, | |
| "logits/chosen": -1.4178965091705322, | |
| "logits/rejected": -1.4355350732803345, | |
| "logps/chosen": -15.241655349731445, | |
| "logps/rejected": -23.634593963623047, | |
| "loss": 0.6629, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.16266849637031555, | |
| "rewards/margins": 0.08267641067504883, | |
| "rewards/rejected": -0.24534490704536438, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.535533905932736e-08, | |
| "logits/chosen": -1.432398796081543, | |
| "logits/rejected": -1.4598662853240967, | |
| "logps/chosen": -16.167316436767578, | |
| "logps/rejected": -20.810152053833008, | |
| "loss": 0.6686, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.18219885230064392, | |
| "rewards/margins": 0.06463146209716797, | |
| "rewards/rejected": -0.2468303143978119, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.473291852294986e-08, | |
| "logits/chosen": -1.4149553775787354, | |
| "logits/rejected": -1.4176180362701416, | |
| "logps/chosen": -15.214263916015625, | |
| "logps/rejected": -20.01323890686035, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.1562959998846054, | |
| "rewards/margins": 0.10322503745555878, | |
| "rewards/rejected": -0.2595210373401642, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.409991800312493e-08, | |
| "logits/chosen": -1.5221331119537354, | |
| "logits/rejected": -1.5471950769424438, | |
| "logps/chosen": -15.114236831665039, | |
| "logps/rejected": -18.62519645690918, | |
| "loss": 0.6609, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.12045025825500488, | |
| "rewards/margins": 0.14489233493804932, | |
| "rewards/rejected": -0.2653425931930542, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.34565303179429e-08, | |
| "logits/chosen": -1.4450016021728516, | |
| "logits/rejected": -1.4777119159698486, | |
| "logps/chosen": -15.960481643676758, | |
| "logps/rejected": -21.401214599609375, | |
| "loss": 0.6669, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.20441024005413055, | |
| "rewards/margins": 0.05542609095573425, | |
| "rewards/rejected": -0.259836345911026, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.280295144952536e-08, | |
| "logits/chosen": -1.3684567213058472, | |
| "logits/rejected": -1.356687307357788, | |
| "logps/chosen": -15.962387084960938, | |
| "logps/rejected": -20.549938201904297, | |
| "loss": 0.6613, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.15356414020061493, | |
| "rewards/margins": 0.054951734840869904, | |
| "rewards/rejected": -0.20851588249206543, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.213938048432696e-08, | |
| "logits/chosen": -1.4533028602600098, | |
| "logits/rejected": -1.47086763381958, | |
| "logps/chosen": -15.168508529663086, | |
| "logps/rejected": -20.278743743896484, | |
| "loss": 0.6692, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.1997944861650467, | |
| "rewards/margins": 0.06951111555099487, | |
| "rewards/rejected": -0.26930561661720276, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.146601955249188e-08, | |
| "logits/chosen": -1.4886163473129272, | |
| "logits/rejected": -1.5108848810195923, | |
| "logps/chosen": -18.865304946899414, | |
| "logps/rejected": -24.56046485900879, | |
| "loss": 0.6586, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.1855696439743042, | |
| "rewards/margins": 0.1037302240729332, | |
| "rewards/rejected": -0.289299875497818, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.07830737662829e-08, | |
| "logits/chosen": -1.4619147777557373, | |
| "logits/rejected": -1.4794903993606567, | |
| "logps/chosen": -13.954874992370605, | |
| "logps/rejected": -17.310880661010742, | |
| "loss": 0.6687, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.18103952705860138, | |
| "rewards/margins": 0.09420069307088852, | |
| "rewards/rejected": -0.2752402424812317, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.009075115760243e-08, | |
| "logits/chosen": -1.3863359689712524, | |
| "logits/rejected": -1.3789931535720825, | |
| "logps/chosen": -17.23479652404785, | |
| "logps/rejected": -19.347734451293945, | |
| "loss": 0.664, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.17440780997276306, | |
| "rewards/margins": 0.10439540445804596, | |
| "rewards/rejected": -0.2788031995296478, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 7.938926261462366e-08, | |
| "logits/chosen": -1.355503797531128, | |
| "logits/rejected": -1.3629181385040283, | |
| "logps/chosen": -18.74729347229004, | |
| "logps/rejected": -22.029096603393555, | |
| "loss": 0.6606, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.21672053635120392, | |
| "rewards/margins": 0.04532719776034355, | |
| "rewards/rejected": -0.2620477080345154, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 7.86788218175523e-08, | |
| "logits/chosen": -1.429302453994751, | |
| "logits/rejected": -1.457215666770935, | |
| "logps/chosen": -16.596994400024414, | |
| "logps/rejected": -21.360082626342773, | |
| "loss": 0.6667, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.18682262301445007, | |
| "rewards/margins": 0.1307360827922821, | |
| "rewards/rejected": -0.3175587058067322, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 7.795964517353733e-08, | |
| "logits/chosen": -1.4372557401657104, | |
| "logits/rejected": -1.4457218647003174, | |
| "logps/chosen": -17.684959411621094, | |
| "logps/rejected": -21.197628021240234, | |
| "loss": 0.6617, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1949060708284378, | |
| "rewards/margins": 0.0896020457148552, | |
| "rewards/rejected": -0.2845081090927124, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 7.723195175075135e-08, | |
| "logits/chosen": -1.4864705801010132, | |
| "logits/rejected": -1.4808794260025024, | |
| "logps/chosen": -16.80634880065918, | |
| "logps/rejected": -20.31828498840332, | |
| "loss": 0.6581, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.1962084323167801, | |
| "rewards/margins": 0.1290890872478485, | |
| "rewards/rejected": -0.3252975046634674, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.649596321166024e-08, | |
| "logits/chosen": -1.3273518085479736, | |
| "logits/rejected": -1.3579399585723877, | |
| "logps/chosen": -16.424484252929688, | |
| "logps/rejected": -19.864755630493164, | |
| "loss": 0.6686, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.17965315282344818, | |
| "rewards/margins": 0.09440762549638748, | |
| "rewards/rejected": -0.27406078577041626, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.575190374550272e-08, | |
| "logits/chosen": -1.4504528045654297, | |
| "logits/rejected": -1.4469316005706787, | |
| "logps/chosen": -17.03780174255371, | |
| "logps/rejected": -20.324657440185547, | |
| "loss": 0.6587, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.22678837180137634, | |
| "rewards/margins": 0.05626438185572624, | |
| "rewards/rejected": -0.2830527722835541, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.5e-08, | |
| "logits/chosen": -1.4595016241073608, | |
| "logits/rejected": -1.4589979648590088, | |
| "logps/chosen": -16.276988983154297, | |
| "logps/rejected": -22.162479400634766, | |
| "loss": 0.6676, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.2052340805530548, | |
| "rewards/margins": 0.09203705936670303, | |
| "rewards/rejected": -0.29727110266685486, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_logits/chosen": -1.7746305465698242, | |
| "eval_logits/rejected": -1.8544888496398926, | |
| "eval_logps/chosen": -16.942358016967773, | |
| "eval_logps/rejected": -21.468765258789062, | |
| "eval_loss": 0.6636533737182617, | |
| "eval_rewards/accuracies": 0.6174121499061584, | |
| "eval_rewards/chosen": -0.19292119145393372, | |
| "eval_rewards/margins": 0.10012920200824738, | |
| "eval_rewards/rejected": -0.2930504083633423, | |
| "eval_runtime": 306.9122, | |
| "eval_samples_per_second": 65.165, | |
| "eval_steps_per_second": 1.02, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.424048101231686e-08, | |
| "logits/chosen": -1.3691225051879883, | |
| "logits/rejected": -1.3902453184127808, | |
| "logps/chosen": -18.4908504486084, | |
| "logps/rejected": -25.587356567382812, | |
| "loss": 0.6637, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.2073945701122284, | |
| "rewards/margins": 0.09110216796398163, | |
| "rewards/rejected": -0.2984967529773712, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.347357813929453e-08, | |
| "logits/chosen": -1.4559385776519775, | |
| "logits/rejected": -1.4722373485565186, | |
| "logps/chosen": -14.663101196289062, | |
| "logps/rejected": -18.937580108642578, | |
| "loss": 0.658, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.19805128872394562, | |
| "rewards/margins": 0.08183437585830688, | |
| "rewards/rejected": -0.2798856794834137, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.269952498697734e-08, | |
| "logits/chosen": -1.4937471151351929, | |
| "logits/rejected": -1.508141279220581, | |
| "logps/chosen": -16.866802215576172, | |
| "logps/rejected": -19.958166122436523, | |
| "loss": 0.6675, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.16971763968467712, | |
| "rewards/margins": 0.0776999443769455, | |
| "rewards/rejected": -0.2474175989627838, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.191855733945387e-08, | |
| "logits/chosen": -1.4956650733947754, | |
| "logits/rejected": -1.4953656196594238, | |
| "logps/chosen": -15.598005294799805, | |
| "logps/rejected": -20.977344512939453, | |
| "loss": 0.6545, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.20860418677330017, | |
| "rewards/margins": 0.113286092877388, | |
| "rewards/rejected": -0.321890264749527, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.113091308703497e-08, | |
| "logits/chosen": -1.4557757377624512, | |
| "logits/rejected": -1.4881502389907837, | |
| "logps/chosen": -15.815752029418945, | |
| "logps/rejected": -23.03244400024414, | |
| "loss": 0.6648, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.18280960619449615, | |
| "rewards/margins": 0.10344807803630829, | |
| "rewards/rejected": -0.28625768423080444, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.033683215379002e-08, | |
| "logits/chosen": -1.5411797761917114, | |
| "logits/rejected": -1.52112877368927, | |
| "logps/chosen": -19.202138900756836, | |
| "logps/rejected": -21.153202056884766, | |
| "loss": 0.6586, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.21284401416778564, | |
| "rewards/margins": 0.06404918432235718, | |
| "rewards/rejected": -0.2768932282924652, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 6.953655642446368e-08, | |
| "logits/chosen": -1.3810409307479858, | |
| "logits/rejected": -1.3840851783752441, | |
| "logps/chosen": -19.21212387084961, | |
| "logps/rejected": -23.80167579650879, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.18992505967617035, | |
| "rewards/margins": 0.10662909597158432, | |
| "rewards/rejected": -0.2965541481971741, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 6.87303296707956e-08, | |
| "logits/chosen": -1.3490447998046875, | |
| "logits/rejected": -1.374977707862854, | |
| "logps/chosen": -15.331153869628906, | |
| "logps/rejected": -22.786605834960938, | |
| "loss": 0.6525, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2073613405227661, | |
| "rewards/margins": 0.13075020909309387, | |
| "rewards/rejected": -0.3381115794181824, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 6.7918397477265e-08, | |
| "logits/chosen": -1.4382171630859375, | |
| "logits/rejected": -1.4352459907531738, | |
| "logps/chosen": -17.559337615966797, | |
| "logps/rejected": -21.59587860107422, | |
| "loss": 0.6659, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.20677068829536438, | |
| "rewards/margins": 0.11824776977300644, | |
| "rewards/rejected": -0.3250184655189514, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 6.710100716628345e-08, | |
| "logits/chosen": -1.5328034162521362, | |
| "logits/rejected": -1.5469181537628174, | |
| "logps/chosen": -17.923799514770508, | |
| "logps/rejected": -21.902999877929688, | |
| "loss": 0.6573, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.21206100285053253, | |
| "rewards/margins": 0.102953240275383, | |
| "rewards/rejected": -0.3150142729282379, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 6.627840772285784e-08, | |
| "logits/chosen": -1.4326767921447754, | |
| "logits/rejected": -1.460436224937439, | |
| "logps/chosen": -16.84982681274414, | |
| "logps/rejected": -21.357173919677734, | |
| "loss": 0.6489, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.18491019308567047, | |
| "rewards/margins": 0.1146242767572403, | |
| "rewards/rejected": -0.29953449964523315, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 6.545084971874738e-08, | |
| "logits/chosen": -1.4505399465560913, | |
| "logits/rejected": -1.4668846130371094, | |
| "logps/chosen": -17.121883392333984, | |
| "logps/rejected": -21.807418823242188, | |
| "loss": 0.6563, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.22878995537757874, | |
| "rewards/margins": 0.12952670454978943, | |
| "rewards/rejected": -0.3583166301250458, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 6.461858523613683e-08, | |
| "logits/chosen": -1.4589532613754272, | |
| "logits/rejected": -1.4687752723693848, | |
| "logps/chosen": -16.5905818939209, | |
| "logps/rejected": -22.4600830078125, | |
| "loss": 0.6516, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.23870249092578888, | |
| "rewards/margins": 0.09481547772884369, | |
| "rewards/rejected": -0.3335179388523102, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 6.378186779084996e-08, | |
| "logits/chosen": -1.4866201877593994, | |
| "logits/rejected": -1.4896290302276611, | |
| "logps/chosen": -17.042253494262695, | |
| "logps/rejected": -23.417699813842773, | |
| "loss": 0.6498, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.22306545078754425, | |
| "rewards/margins": 0.13384439051151276, | |
| "rewards/rejected": -0.3569098114967346, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 6.294095225512604e-08, | |
| "logits/chosen": -1.4022520780563354, | |
| "logits/rejected": -1.3814033269882202, | |
| "logps/chosen": -16.337562561035156, | |
| "logps/rejected": -21.31450653076172, | |
| "loss": 0.6569, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2246430367231369, | |
| "rewards/margins": 0.15105651319026947, | |
| "rewards/rejected": -0.37569957971572876, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 6.209609477998338e-08, | |
| "logits/chosen": -1.479479193687439, | |
| "logits/rejected": -1.5034449100494385, | |
| "logps/chosen": -17.957107543945312, | |
| "logps/rejected": -23.098215103149414, | |
| "loss": 0.6629, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.24643921852111816, | |
| "rewards/margins": 0.1044594869017601, | |
| "rewards/rejected": -0.3508986830711365, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 6.124755271719325e-08, | |
| "logits/chosen": -1.4407473802566528, | |
| "logits/rejected": -1.49507737159729, | |
| "logps/chosen": -14.73084545135498, | |
| "logps/rejected": -20.570337295532227, | |
| "loss": 0.6481, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.19731993973255157, | |
| "rewards/margins": 0.14940151572227478, | |
| "rewards/rejected": -0.34672147035598755, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 6.039558454088796e-08, | |
| "logits/chosen": -1.5169572830200195, | |
| "logits/rejected": -1.5479459762573242, | |
| "logps/chosen": -13.957262992858887, | |
| "logps/rejected": -22.48187255859375, | |
| "loss": 0.6496, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.20421214401721954, | |
| "rewards/margins": 0.16293281316757202, | |
| "rewards/rejected": -0.36714497208595276, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 5.954044976882724e-08, | |
| "logits/chosen": -1.4714252948760986, | |
| "logits/rejected": -1.4994815587997437, | |
| "logps/chosen": -17.487274169921875, | |
| "logps/rejected": -23.63813591003418, | |
| "loss": 0.6526, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.2372887134552002, | |
| "rewards/margins": 0.12310683727264404, | |
| "rewards/rejected": -0.36039552092552185, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 5.868240888334653e-08, | |
| "logits/chosen": -1.4606778621673584, | |
| "logits/rejected": -1.4551252126693726, | |
| "logps/chosen": -16.36603355407715, | |
| "logps/rejected": -21.119382858276367, | |
| "loss": 0.6562, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.2265624701976776, | |
| "rewards/margins": 0.16614754498004913, | |
| "rewards/rejected": -0.39271003007888794, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_logits/chosen": -1.7672057151794434, | |
| "eval_logits/rejected": -1.8467962741851807, | |
| "eval_logps/chosen": -17.221099853515625, | |
| "eval_logps/rejected": -22.007465362548828, | |
| "eval_loss": 0.6560497283935547, | |
| "eval_rewards/accuracies": 0.634984016418457, | |
| "eval_rewards/chosen": -0.22079555690288544, | |
| "eval_rewards/margins": 0.12612493336200714, | |
| "eval_rewards/rejected": -0.3469204902648926, | |
| "eval_runtime": 306.6923, | |
| "eval_samples_per_second": 65.212, | |
| "eval_steps_per_second": 1.021, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 5.7821723252011546e-08, | |
| "logits/chosen": -1.4445126056671143, | |
| "logits/rejected": -1.4752463102340698, | |
| "logps/chosen": -18.014240264892578, | |
| "logps/rejected": -25.679813385009766, | |
| "loss": 0.6619, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.2500578463077545, | |
| "rewards/margins": 0.07094712555408478, | |
| "rewards/rejected": -0.3210049867630005, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 5.695865504800327e-08, | |
| "logits/chosen": -1.4805431365966797, | |
| "logits/rejected": -1.4579927921295166, | |
| "logps/chosen": -16.328933715820312, | |
| "logps/rejected": -23.8695068359375, | |
| "loss": 0.6543, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.2136952131986618, | |
| "rewards/margins": 0.15080443024635315, | |
| "rewards/rejected": -0.36449965834617615, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 5.6093467170257366e-08, | |
| "logits/chosen": -1.38763427734375, | |
| "logits/rejected": -1.429253339767456, | |
| "logps/chosen": -17.211883544921875, | |
| "logps/rejected": -21.488162994384766, | |
| "loss": 0.6454, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.23104102909564972, | |
| "rewards/margins": 0.11019430309534073, | |
| "rewards/rejected": -0.34123533964157104, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 5.5226423163382677e-08, | |
| "logits/chosen": -1.3689727783203125, | |
| "logits/rejected": -1.3758208751678467, | |
| "logps/chosen": -18.692577362060547, | |
| "logps/rejected": -21.925251007080078, | |
| "loss": 0.6466, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.21853986382484436, | |
| "rewards/margins": 0.09194198995828629, | |
| "rewards/rejected": -0.31048184633255005, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 5.435778713738292e-08, | |
| "logits/chosen": -1.3423216342926025, | |
| "logits/rejected": -1.355452299118042, | |
| "logps/chosen": -16.941513061523438, | |
| "logps/rejected": -20.827672958374023, | |
| "loss": 0.652, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.2414543330669403, | |
| "rewards/margins": 0.10109053552150726, | |
| "rewards/rejected": -0.3425448536872864, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5.3487823687206256e-08, | |
| "logits/chosen": -1.4701192378997803, | |
| "logits/rejected": -1.4796873331069946, | |
| "logps/chosen": -19.578105926513672, | |
| "logps/rejected": -25.23065757751465, | |
| "loss": 0.6484, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.251248836517334, | |
| "rewards/margins": 0.13180874288082123, | |
| "rewards/rejected": -0.3830576241016388, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5.261679781214719e-08, | |
| "logits/chosen": -1.435497522354126, | |
| "logits/rejected": -1.459380865097046, | |
| "logps/chosen": -15.287922859191895, | |
| "logps/rejected": -20.136037826538086, | |
| "loss": 0.6582, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.23688891530036926, | |
| "rewards/margins": 0.1384437382221222, | |
| "rewards/rejected": -0.37533265352249146, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5.1744974835125056e-08, | |
| "logits/chosen": -1.4165819883346558, | |
| "logits/rejected": -1.3940761089324951, | |
| "logps/chosen": -17.587196350097656, | |
| "logps/rejected": -21.539077758789062, | |
| "loss": 0.6443, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2061920464038849, | |
| "rewards/margins": 0.133754700422287, | |
| "rewards/rejected": -0.3399467468261719, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5.087262032186418e-08, | |
| "logits/chosen": -1.4642976522445679, | |
| "logits/rejected": -1.469395637512207, | |
| "logps/chosen": -19.382497787475586, | |
| "logps/rejected": -25.097797393798828, | |
| "loss": 0.6469, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.22812576591968536, | |
| "rewards/margins": 0.1407867670059204, | |
| "rewards/rejected": -0.3689125180244446, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 5e-08, | |
| "logits/chosen": -1.4709419012069702, | |
| "logits/rejected": -1.4961137771606445, | |
| "logps/chosen": -18.24771499633789, | |
| "logps/rejected": -22.102489471435547, | |
| "loss": 0.6616, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.23151636123657227, | |
| "rewards/margins": 0.10363288968801498, | |
| "rewards/rejected": -0.33514922857284546, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.912737967813582e-08, | |
| "logits/chosen": -1.425290584564209, | |
| "logits/rejected": -1.4705363512039185, | |
| "logps/chosen": -20.757022857666016, | |
| "logps/rejected": -25.79976463317871, | |
| "loss": 0.6412, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.2047382891178131, | |
| "rewards/margins": 0.147218257188797, | |
| "rewards/rejected": -0.3519565165042877, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.8255025164874966e-08, | |
| "logits/chosen": -1.4153515100479126, | |
| "logits/rejected": -1.4406566619873047, | |
| "logps/chosen": -15.806404113769531, | |
| "logps/rejected": -23.537967681884766, | |
| "loss": 0.6501, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.26336127519607544, | |
| "rewards/margins": 0.0594928041100502, | |
| "rewards/rejected": -0.32285410165786743, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.73832021878528e-08, | |
| "logits/chosen": -1.4600375890731812, | |
| "logits/rejected": -1.465736746788025, | |
| "logps/chosen": -14.703231811523438, | |
| "logps/rejected": -22.058746337890625, | |
| "loss": 0.6494, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.2418874204158783, | |
| "rewards/margins": 0.14205661416053772, | |
| "rewards/rejected": -0.383944034576416, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.651217631279373e-08, | |
| "logits/chosen": -1.488512396812439, | |
| "logits/rejected": -1.5095858573913574, | |
| "logps/chosen": -16.324678421020508, | |
| "logps/rejected": -24.393007278442383, | |
| "loss": 0.6459, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.22573427855968475, | |
| "rewards/margins": 0.1321270912885666, | |
| "rewards/rejected": -0.35786136984825134, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.5642212862617084e-08, | |
| "logits/chosen": -1.4317299127578735, | |
| "logits/rejected": -1.455801010131836, | |
| "logps/chosen": -15.989962577819824, | |
| "logps/rejected": -18.849998474121094, | |
| "loss": 0.6477, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.2763644754886627, | |
| "rewards/margins": 0.11443523317575455, | |
| "rewards/rejected": -0.39079970121383667, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.477357683661733e-08, | |
| "logits/chosen": -1.4575328826904297, | |
| "logits/rejected": -1.470690131187439, | |
| "logps/chosen": -18.188373565673828, | |
| "logps/rejected": -22.835786819458008, | |
| "loss": 0.6512, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.22157052159309387, | |
| "rewards/margins": 0.10469740629196167, | |
| "rewards/rejected": -0.32626792788505554, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.390653282974263e-08, | |
| "logits/chosen": -1.503989815711975, | |
| "logits/rejected": -1.519313931465149, | |
| "logps/chosen": -16.674240112304688, | |
| "logps/rejected": -21.952165603637695, | |
| "loss": 0.6365, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2247585952281952, | |
| "rewards/margins": 0.203842431306839, | |
| "rewards/rejected": -0.4286009669303894, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.304134495199674e-08, | |
| "logits/chosen": -1.4609280824661255, | |
| "logits/rejected": -1.4766910076141357, | |
| "logps/chosen": -19.406192779541016, | |
| "logps/rejected": -22.686025619506836, | |
| "loss": 0.6364, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.2958422303199768, | |
| "rewards/margins": 0.11995653063058853, | |
| "rewards/rejected": -0.4157988131046295, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.217827674798844e-08, | |
| "logits/chosen": -1.4871938228607178, | |
| "logits/rejected": -1.5188095569610596, | |
| "logps/chosen": -17.268047332763672, | |
| "logps/rejected": -23.33865737915039, | |
| "loss": 0.6532, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.31558969616889954, | |
| "rewards/margins": 0.11393972486257553, | |
| "rewards/rejected": -0.42952942848205566, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.131759111665348e-08, | |
| "logits/chosen": -1.4645878076553345, | |
| "logits/rejected": -1.4607641696929932, | |
| "logps/chosen": -17.968856811523438, | |
| "logps/rejected": -24.378482818603516, | |
| "loss": 0.6483, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.20308199524879456, | |
| "rewards/margins": 0.19312706589698792, | |
| "rewards/rejected": -0.3962090015411377, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_logits/chosen": -1.762748122215271, | |
| "eval_logits/rejected": -1.8424293994903564, | |
| "eval_logps/chosen": -17.52169418334961, | |
| "eval_logps/rejected": -22.536460876464844, | |
| "eval_loss": 0.65033358335495, | |
| "eval_rewards/accuracies": 0.6413738131523132, | |
| "eval_rewards/chosen": -0.2508549690246582, | |
| "eval_rewards/margins": 0.14896489679813385, | |
| "eval_rewards/rejected": -0.39981985092163086, | |
| "eval_runtime": 306.9145, | |
| "eval_samples_per_second": 65.165, | |
| "eval_steps_per_second": 1.02, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.0459550231172757e-08, | |
| "logits/chosen": -1.3895206451416016, | |
| "logits/rejected": -1.4285638332366943, | |
| "logps/chosen": -17.535720825195312, | |
| "logps/rejected": -24.525096893310547, | |
| "loss": 0.6521, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.2898118793964386, | |
| "rewards/margins": 0.08567583560943604, | |
| "rewards/rejected": -0.37548771500587463, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.960441545911204e-08, | |
| "logits/chosen": -1.4046005010604858, | |
| "logits/rejected": -1.4093401432037354, | |
| "logps/chosen": -19.01970863342285, | |
| "logps/rejected": -22.846820831298828, | |
| "loss": 0.6486, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.28651127219200134, | |
| "rewards/margins": 0.07938197255134583, | |
| "rewards/rejected": -0.36589327454566956, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.8752447282806754e-08, | |
| "logits/chosen": -1.4743293523788452, | |
| "logits/rejected": -1.481278419494629, | |
| "logps/chosen": -15.964106559753418, | |
| "logps/rejected": -22.171709060668945, | |
| "loss": 0.6402, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.21257051825523376, | |
| "rewards/margins": 0.2042216807603836, | |
| "rewards/rejected": -0.41679221391677856, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.7903905220016615e-08, | |
| "logits/chosen": -1.3876988887786865, | |
| "logits/rejected": -1.410398244857788, | |
| "logps/chosen": -17.697256088256836, | |
| "logps/rejected": -23.332763671875, | |
| "loss": 0.6429, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.23554810881614685, | |
| "rewards/margins": 0.13481906056404114, | |
| "rewards/rejected": -0.3703671991825104, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.705904774487396e-08, | |
| "logits/chosen": -1.383541464805603, | |
| "logits/rejected": -1.3796765804290771, | |
| "logps/chosen": -18.24690818786621, | |
| "logps/rejected": -24.86931610107422, | |
| "loss": 0.6398, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.23973222076892853, | |
| "rewards/margins": 0.16422876715660095, | |
| "rewards/rejected": -0.4039610028266907, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.621813220915004e-08, | |
| "logits/chosen": -1.450282335281372, | |
| "logits/rejected": -1.4380896091461182, | |
| "logps/chosen": -16.290973663330078, | |
| "logps/rejected": -21.838808059692383, | |
| "loss": 0.6481, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.2356175184249878, | |
| "rewards/margins": 0.14112675189971924, | |
| "rewards/rejected": -0.37674424052238464, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.538141476386316e-08, | |
| "logits/chosen": -1.4591354131698608, | |
| "logits/rejected": -1.456343412399292, | |
| "logps/chosen": -17.22078514099121, | |
| "logps/rejected": -21.944005966186523, | |
| "loss": 0.6491, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.2835259735584259, | |
| "rewards/margins": 0.10515954345464706, | |
| "rewards/rejected": -0.38868552446365356, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.4549150281252633e-08, | |
| "logits/chosen": -1.428468108177185, | |
| "logits/rejected": -1.4328949451446533, | |
| "logps/chosen": -16.396862030029297, | |
| "logps/rejected": -20.86203384399414, | |
| "loss": 0.6369, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.2767188251018524, | |
| "rewards/margins": 0.12265461683273315, | |
| "rewards/rejected": -0.39937347173690796, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.372159227714218e-08, | |
| "logits/chosen": -1.3802716732025146, | |
| "logits/rejected": -1.4310414791107178, | |
| "logps/chosen": -18.36510467529297, | |
| "logps/rejected": -20.808429718017578, | |
| "loss": 0.6429, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.2337242066860199, | |
| "rewards/margins": 0.1597466617822647, | |
| "rewards/rejected": -0.3934708833694458, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.2898992833716563e-08, | |
| "logits/chosen": -1.482743263244629, | |
| "logits/rejected": -1.4772045612335205, | |
| "logps/chosen": -15.889050483703613, | |
| "logps/rejected": -19.02865982055664, | |
| "loss": 0.6478, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.2292957603931427, | |
| "rewards/margins": 0.16532939672470093, | |
| "rewards/rejected": -0.39462512731552124, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.208160252273498e-08, | |
| "logits/chosen": -1.4797611236572266, | |
| "logits/rejected": -1.4938347339630127, | |
| "logps/chosen": -18.688114166259766, | |
| "logps/rejected": -22.962867736816406, | |
| "loss": 0.644, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.2989271283149719, | |
| "rewards/margins": 0.12799039483070374, | |
| "rewards/rejected": -0.4269174635410309, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.126967032920439e-08, | |
| "logits/chosen": -1.4451311826705933, | |
| "logits/rejected": -1.4879848957061768, | |
| "logps/chosen": -17.379915237426758, | |
| "logps/rejected": -23.540796279907227, | |
| "loss": 0.6363, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.23357510566711426, | |
| "rewards/margins": 0.11232779920101166, | |
| "rewards/rejected": -0.3459029197692871, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.046344357553632e-08, | |
| "logits/chosen": -1.5276529788970947, | |
| "logits/rejected": -1.5440245866775513, | |
| "logps/chosen": -17.0556640625, | |
| "logps/rejected": -22.744121551513672, | |
| "loss": 0.6308, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.23212318122386932, | |
| "rewards/margins": 0.16936565935611725, | |
| "rewards/rejected": -0.4014888405799866, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.9663167846209998e-08, | |
| "logits/chosen": -1.4147757291793823, | |
| "logits/rejected": -1.418082356452942, | |
| "logps/chosen": -19.272014617919922, | |
| "logps/rejected": -27.263992309570312, | |
| "loss": 0.6512, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.2706632614135742, | |
| "rewards/margins": 0.18588735163211823, | |
| "rewards/rejected": -0.45655059814453125, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.8869086912965035e-08, | |
| "logits/chosen": -1.4479643106460571, | |
| "logits/rejected": -1.4662295579910278, | |
| "logps/chosen": -17.57897186279297, | |
| "logps/rejected": -25.184152603149414, | |
| "loss": 0.6423, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.2373163402080536, | |
| "rewards/margins": 0.2066657841205597, | |
| "rewards/rejected": -0.4439820647239685, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.8081442660546124e-08, | |
| "logits/chosen": -1.36322820186615, | |
| "logits/rejected": -1.3949863910675049, | |
| "logps/chosen": -18.550283432006836, | |
| "logps/rejected": -23.298870086669922, | |
| "loss": 0.6416, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.25548475980758667, | |
| "rewards/margins": 0.18017227947711945, | |
| "rewards/rejected": -0.4356570839881897, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.730047501302266e-08, | |
| "logits/chosen": -1.4136111736297607, | |
| "logits/rejected": -1.4232200384140015, | |
| "logps/chosen": -17.45450210571289, | |
| "logps/rejected": -20.295621871948242, | |
| "loss": 0.6415, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.2979032099246979, | |
| "rewards/margins": 0.12163282930850983, | |
| "rewards/rejected": -0.4195360243320465, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.6526421860705472e-08, | |
| "logits/chosen": -1.384798526763916, | |
| "logits/rejected": -1.4583854675292969, | |
| "logps/chosen": -16.522525787353516, | |
| "logps/rejected": -23.442546844482422, | |
| "loss": 0.6288, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.23055878281593323, | |
| "rewards/margins": 0.23925617337226868, | |
| "rewards/rejected": -0.4698149561882019, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 2.5759518987683148e-08, | |
| "logits/chosen": -1.4321386814117432, | |
| "logits/rejected": -1.4662306308746338, | |
| "logps/chosen": -17.545879364013672, | |
| "logps/rejected": -20.307212829589844, | |
| "loss": 0.6431, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.31093353033065796, | |
| "rewards/margins": 0.1719907969236374, | |
| "rewards/rejected": -0.48292431235313416, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 2.500000000000001e-08, | |
| "logits/chosen": -1.4065908193588257, | |
| "logits/rejected": -1.4342257976531982, | |
| "logps/chosen": -17.379558563232422, | |
| "logps/rejected": -22.622777938842773, | |
| "loss": 0.6413, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.27703213691711426, | |
| "rewards/margins": 0.15564481914043427, | |
| "rewards/rejected": -0.4326769709587097, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_logits/chosen": -1.7605791091918945, | |
| "eval_logits/rejected": -1.8403115272521973, | |
| "eval_logps/chosen": -17.779977798461914, | |
| "eval_logps/rejected": -22.98041343688965, | |
| "eval_loss": 0.6457803845405579, | |
| "eval_rewards/accuracies": 0.6469648480415344, | |
| "eval_rewards/chosen": -0.27668341994285583, | |
| "eval_rewards/margins": 0.16753174364566803, | |
| "eval_rewards/rejected": -0.44421514868736267, | |
| "eval_runtime": 307.0164, | |
| "eval_samples_per_second": 65.143, | |
| "eval_steps_per_second": 1.019, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 2.4248096254497287e-08, | |
| "logits/chosen": -1.4401991367340088, | |
| "logits/rejected": -1.4596917629241943, | |
| "logps/chosen": -17.921367645263672, | |
| "logps/rejected": -23.322214126586914, | |
| "loss": 0.6378, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.29132723808288574, | |
| "rewards/margins": 0.19050189852714539, | |
| "rewards/rejected": -0.4818291664123535, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 2.350403678833976e-08, | |
| "logits/chosen": -1.4126722812652588, | |
| "logits/rejected": -1.420345425605774, | |
| "logps/chosen": -19.046810150146484, | |
| "logps/rejected": -25.77749252319336, | |
| "loss": 0.6416, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.30587273836135864, | |
| "rewards/margins": 0.18231990933418274, | |
| "rewards/rejected": -0.4881926476955414, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.2768048249248644e-08, | |
| "logits/chosen": -1.3649728298187256, | |
| "logits/rejected": -1.3823496103286743, | |
| "logps/chosen": -17.098003387451172, | |
| "logps/rejected": -21.837902069091797, | |
| "loss": 0.6356, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.33593738079071045, | |
| "rewards/margins": 0.1847352534532547, | |
| "rewards/rejected": -0.520672619342804, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.2040354826462664e-08, | |
| "logits/chosen": -1.3861441612243652, | |
| "logits/rejected": -1.3953332901000977, | |
| "logps/chosen": -17.742359161376953, | |
| "logps/rejected": -24.968896865844727, | |
| "loss": 0.6353, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.32852208614349365, | |
| "rewards/margins": 0.14815881848335266, | |
| "rewards/rejected": -0.4766809046268463, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.1321178182447707e-08, | |
| "logits/chosen": -1.3506596088409424, | |
| "logits/rejected": -1.3595778942108154, | |
| "logps/chosen": -19.100223541259766, | |
| "logps/rejected": -24.44327163696289, | |
| "loss": 0.6449, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.31565234065055847, | |
| "rewards/margins": 0.11223573982715607, | |
| "rewards/rejected": -0.42788806557655334, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.0610737385376347e-08, | |
| "logits/chosen": -1.4714252948760986, | |
| "logits/rejected": -1.4704372882843018, | |
| "logps/chosen": -20.749143600463867, | |
| "logps/rejected": -23.971107482910156, | |
| "loss": 0.6357, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.31912142038345337, | |
| "rewards/margins": 0.1428820788860321, | |
| "rewards/rejected": -0.4620034694671631, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.990924884239758e-08, | |
| "logits/chosen": -1.4702327251434326, | |
| "logits/rejected": -1.4725419282913208, | |
| "logps/chosen": -19.519115447998047, | |
| "logps/rejected": -21.988479614257812, | |
| "loss": 0.6404, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.30419957637786865, | |
| "rewards/margins": 0.16355463862419128, | |
| "rewards/rejected": -0.46775418519973755, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9216926233717085e-08, | |
| "logits/chosen": -1.4942646026611328, | |
| "logits/rejected": -1.5009443759918213, | |
| "logps/chosen": -16.522647857666016, | |
| "logps/rejected": -24.716516494750977, | |
| "loss": 0.6315, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2690926194190979, | |
| "rewards/margins": 0.16352799534797668, | |
| "rewards/rejected": -0.4326205849647522, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.8533980447508135e-08, | |
| "logits/chosen": -1.4108445644378662, | |
| "logits/rejected": -1.4322090148925781, | |
| "logps/chosen": -18.862674713134766, | |
| "logps/rejected": -23.953310012817383, | |
| "loss": 0.636, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.2986528277397156, | |
| "rewards/margins": 0.14546221494674683, | |
| "rewards/rejected": -0.4441150724887848, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.786061951567303e-08, | |
| "logits/chosen": -1.4277067184448242, | |
| "logits/rejected": -1.438820481300354, | |
| "logps/chosen": -19.40964126586914, | |
| "logps/rejected": -26.506017684936523, | |
| "loss": 0.6373, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.29312849044799805, | |
| "rewards/margins": 0.17660866677761078, | |
| "rewards/rejected": -0.46973714232444763, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.719704855047464e-08, | |
| "logits/chosen": -1.408469319343567, | |
| "logits/rejected": -1.4118237495422363, | |
| "logps/chosen": -17.609798431396484, | |
| "logps/rejected": -22.488494873046875, | |
| "loss": 0.636, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.29817792773246765, | |
| "rewards/margins": 0.1390009969472885, | |
| "rewards/rejected": -0.43717893958091736, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.6543469682057103e-08, | |
| "logits/chosen": -1.3890222311019897, | |
| "logits/rejected": -1.4223724603652954, | |
| "logps/chosen": -16.866146087646484, | |
| "logps/rejected": -21.382225036621094, | |
| "loss": 0.6432, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.23102489113807678, | |
| "rewards/margins": 0.185149148106575, | |
| "rewards/rejected": -0.4161740243434906, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.590008199687508e-08, | |
| "logits/chosen": -1.4136755466461182, | |
| "logits/rejected": -1.4180434942245483, | |
| "logps/chosen": -17.45306968688965, | |
| "logps/rejected": -20.812774658203125, | |
| "loss": 0.6425, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.29186105728149414, | |
| "rewards/margins": 0.115916408598423, | |
| "rewards/rejected": -0.40777745842933655, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.526708147705013e-08, | |
| "logits/chosen": -1.4240939617156982, | |
| "logits/rejected": -1.4586817026138306, | |
| "logps/chosen": -17.540027618408203, | |
| "logps/rejected": -22.493268966674805, | |
| "loss": 0.6404, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2557734549045563, | |
| "rewards/margins": 0.17367486655712128, | |
| "rewards/rejected": -0.42944836616516113, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.4644660940672625e-08, | |
| "logits/chosen": -1.3837788105010986, | |
| "logits/rejected": -1.4116965532302856, | |
| "logps/chosen": -17.056177139282227, | |
| "logps/rejected": -25.01016616821289, | |
| "loss": 0.6338, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.28285446763038635, | |
| "rewards/margins": 0.2001866102218628, | |
| "rewards/rejected": -0.48304110765457153, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.4033009983067451e-08, | |
| "logits/chosen": -1.4337577819824219, | |
| "logits/rejected": -1.446617603302002, | |
| "logps/chosen": -16.082469940185547, | |
| "logps/rejected": -22.56252670288086, | |
| "loss": 0.6439, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.2657018005847931, | |
| "rewards/margins": 0.24111859500408173, | |
| "rewards/rejected": -0.5068204402923584, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3432314919041476e-08, | |
| "logits/chosen": -1.47090744972229, | |
| "logits/rejected": -1.508124589920044, | |
| "logps/chosen": -14.742795944213867, | |
| "logps/rejected": -19.045040130615234, | |
| "loss": 0.6327, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.2083434760570526, | |
| "rewards/margins": 0.19431748986244202, | |
| "rewards/rejected": -0.402660995721817, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.2842758726130282e-08, | |
| "logits/chosen": -1.404345989227295, | |
| "logits/rejected": -1.4321860074996948, | |
| "logps/chosen": -18.46529769897461, | |
| "logps/rejected": -22.419506072998047, | |
| "loss": 0.6581, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.3104281723499298, | |
| "rewards/margins": 0.12459827959537506, | |
| "rewards/rejected": -0.43502646684646606, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.2264520988861398e-08, | |
| "logits/chosen": -1.528306007385254, | |
| "logits/rejected": -1.548837661743164, | |
| "logps/chosen": -18.500003814697266, | |
| "logps/rejected": -22.167816162109375, | |
| "loss": 0.6485, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.29536038637161255, | |
| "rewards/margins": 0.1245318204164505, | |
| "rewards/rejected": -0.41989216208457947, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.1697777844051105e-08, | |
| "logits/chosen": -1.4373068809509277, | |
| "logits/rejected": -1.4536762237548828, | |
| "logps/chosen": -19.13837242126465, | |
| "logps/rejected": -21.966625213623047, | |
| "loss": 0.6364, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.26146507263183594, | |
| "rewards/margins": 0.19086112082004547, | |
| "rewards/rejected": -0.4523262083530426, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_logits/chosen": -1.7574115991592407, | |
| "eval_logits/rejected": -1.8370665311813354, | |
| "eval_logps/chosen": -17.89972496032715, | |
| "eval_logps/rejected": -23.20018768310547, | |
| "eval_loss": 0.6436580419540405, | |
| "eval_rewards/accuracies": 0.6449680328369141, | |
| "eval_rewards/chosen": -0.28865811228752136, | |
| "eval_rewards/margins": 0.17753452062606812, | |
| "eval_rewards/rejected": -0.4661926031112671, | |
| "eval_runtime": 306.9231, | |
| "eval_samples_per_second": 65.163, | |
| "eval_steps_per_second": 1.02, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.1142701927151454e-08, | |
| "logits/chosen": -1.4606168270111084, | |
| "logits/rejected": -1.4665342569351196, | |
| "logps/chosen": -15.579089164733887, | |
| "logps/rejected": -22.30219268798828, | |
| "loss": 0.6483, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2538829445838928, | |
| "rewards/margins": 0.22965078055858612, | |
| "rewards/rejected": -0.4835337698459625, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.0599462319663904e-08, | |
| "logits/chosen": -1.3860244750976562, | |
| "logits/rejected": -1.3826844692230225, | |
| "logps/chosen": -17.648862838745117, | |
| "logps/rejected": -25.444860458374023, | |
| "loss": 0.6498, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.2895384132862091, | |
| "rewards/margins": 0.12358560413122177, | |
| "rewards/rejected": -0.4131239950656891, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.0068224497635369e-08, | |
| "logits/chosen": -1.4392060041427612, | |
| "logits/rejected": -1.460053563117981, | |
| "logps/chosen": -17.04248809814453, | |
| "logps/rejected": -25.65012550354004, | |
| "loss": 0.6428, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.26920509338378906, | |
| "rewards/margins": 0.1518903225660324, | |
| "rewards/rejected": -0.42109543085098267, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 9.549150281252633e-09, | |
| "logits/chosen": -1.380948781967163, | |
| "logits/rejected": -1.38584566116333, | |
| "logps/chosen": -19.414995193481445, | |
| "logps/rejected": -24.585783004760742, | |
| "loss": 0.6346, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.28162115812301636, | |
| "rewards/margins": 0.15368224680423737, | |
| "rewards/rejected": -0.43530339002609253, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 9.042397785550404e-09, | |
| "logits/chosen": -1.3312206268310547, | |
| "logits/rejected": -1.3463561534881592, | |
| "logps/chosen": -15.558464050292969, | |
| "logps/rejected": -20.260160446166992, | |
| "loss": 0.6395, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.2858559489250183, | |
| "rewards/margins": 0.15887019038200378, | |
| "rewards/rejected": -0.4447261691093445, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 8.548121372247919e-09, | |
| "logits/chosen": -1.403304100036621, | |
| "logits/rejected": -1.3955562114715576, | |
| "logps/chosen": -17.097698211669922, | |
| "logps/rejected": -23.62860679626465, | |
| "loss": 0.6436, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.3152267336845398, | |
| "rewards/margins": 0.09969434142112732, | |
| "rewards/rejected": -0.4149211049079895, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 8.066471602728803e-09, | |
| "logits/chosen": -1.4208085536956787, | |
| "logits/rejected": -1.4421093463897705, | |
| "logps/chosen": -16.878705978393555, | |
| "logps/rejected": -18.381912231445312, | |
| "loss": 0.6342, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.2497948855161667, | |
| "rewards/margins": 0.19130581617355347, | |
| "rewards/rejected": -0.44110068678855896, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.597595192178703e-09, | |
| "logits/chosen": -1.4911779165267944, | |
| "logits/rejected": -1.4802215099334717, | |
| "logps/chosen": -16.187580108642578, | |
| "logps/rejected": -20.237377166748047, | |
| "loss": 0.6369, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.30556631088256836, | |
| "rewards/margins": 0.13740003108978271, | |
| "rewards/rejected": -0.4429663121700287, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.1416349648943884e-09, | |
| "logits/chosen": -1.4348905086517334, | |
| "logits/rejected": -1.4343273639678955, | |
| "logps/chosen": -18.559795379638672, | |
| "logps/rejected": -23.7816219329834, | |
| "loss": 0.6424, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.3174014091491699, | |
| "rewards/margins": 0.12262705713510513, | |
| "rewards/rejected": -0.44002848863601685, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.698729810778064e-09, | |
| "logits/chosen": -1.4852615594863892, | |
| "logits/rejected": -1.4959180355072021, | |
| "logps/chosen": -18.27218246459961, | |
| "logps/rejected": -25.438560485839844, | |
| "loss": 0.651, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.3046795427799225, | |
| "rewards/margins": 0.14905641973018646, | |
| "rewards/rejected": -0.45373591780662537, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.269014643030213e-09, | |
| "logits/chosen": -1.4191768169403076, | |
| "logits/rejected": -1.4377117156982422, | |
| "logps/chosen": -17.66164207458496, | |
| "logps/rejected": -24.158462524414062, | |
| "loss": 0.6427, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.29421794414520264, | |
| "rewards/margins": 0.1277884691953659, | |
| "rewards/rejected": -0.4220064580440521, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.8526203570536504e-09, | |
| "logits/chosen": -1.4127318859100342, | |
| "logits/rejected": -1.4357506036758423, | |
| "logps/chosen": -17.93946075439453, | |
| "logps/rejected": -25.222787857055664, | |
| "loss": 0.6391, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3292141258716583, | |
| "rewards/margins": 0.20317533612251282, | |
| "rewards/rejected": -0.5323894619941711, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.44967379058161e-09, | |
| "logits/chosen": -1.3914811611175537, | |
| "logits/rejected": -1.4102437496185303, | |
| "logps/chosen": -16.657636642456055, | |
| "logps/rejected": -23.440505981445312, | |
| "loss": 0.6564, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3101702630519867, | |
| "rewards/margins": 0.18297256529331207, | |
| "rewards/rejected": -0.49314290285110474, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.060297685041659e-09, | |
| "logits/chosen": -1.3386437892913818, | |
| "logits/rejected": -1.3571398258209229, | |
| "logps/chosen": -19.986854553222656, | |
| "logps/rejected": -26.74784278869629, | |
| "loss": 0.6279, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.284962922334671, | |
| "rewards/margins": 0.19053895771503448, | |
| "rewards/rejected": -0.4755018651485443, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.684610648167503e-09, | |
| "logits/chosen": -1.3065433502197266, | |
| "logits/rejected": -1.3413054943084717, | |
| "logps/chosen": -17.01144027709961, | |
| "logps/rejected": -24.851526260375977, | |
| "loss": 0.6387, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.34229403734207153, | |
| "rewards/margins": 0.10157414525747299, | |
| "rewards/rejected": -0.44386816024780273, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.322727117869951e-09, | |
| "logits/chosen": -1.3795998096466064, | |
| "logits/rejected": -1.4208852052688599, | |
| "logps/chosen": -18.107046127319336, | |
| "logps/rejected": -24.59166145324707, | |
| "loss": 0.6355, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.282823383808136, | |
| "rewards/margins": 0.2188301533460617, | |
| "rewards/rejected": -0.5016534924507141, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.974757327377981e-09, | |
| "logits/chosen": -1.436051368713379, | |
| "logits/rejected": -1.4475667476654053, | |
| "logps/chosen": -18.486120223999023, | |
| "logps/rejected": -22.97800064086914, | |
| "loss": 0.6469, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.3353201746940613, | |
| "rewards/margins": 0.08744947612285614, | |
| "rewards/rejected": -0.4227696359157562, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.640807271660634e-09, | |
| "logits/chosen": -1.464347004890442, | |
| "logits/rejected": -1.4781545400619507, | |
| "logps/chosen": -18.07439613342285, | |
| "logps/rejected": -23.41990089416504, | |
| "loss": 0.6422, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.259718656539917, | |
| "rewards/margins": 0.25415921211242676, | |
| "rewards/rejected": -0.5138779282569885, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.3209786751399183e-09, | |
| "logits/chosen": -1.3919214010238647, | |
| "logits/rejected": -1.4111039638519287, | |
| "logps/chosen": -19.07809066772461, | |
| "logps/rejected": -24.558177947998047, | |
| "loss": 0.6417, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3080528676509857, | |
| "rewards/margins": 0.1815052181482315, | |
| "rewards/rejected": -0.4895581305027008, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.015368960704584e-09, | |
| "logits/chosen": -1.348677158355713, | |
| "logits/rejected": -1.379531979560852, | |
| "logps/chosen": -17.733901977539062, | |
| "logps/rejected": -21.527729034423828, | |
| "loss": 0.6456, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.34989431500434875, | |
| "rewards/margins": 0.13681560754776, | |
| "rewards/rejected": -0.48670992255210876, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_logits/chosen": -1.7564817667007446, | |
| "eval_logits/rejected": -1.8361190557479858, | |
| "eval_logps/chosen": -17.921520233154297, | |
| "eval_logps/rejected": -23.25269317626953, | |
| "eval_loss": 0.6429938077926636, | |
| "eval_rewards/accuracies": 0.6417731642723083, | |
| "eval_rewards/chosen": -0.2908374071121216, | |
| "eval_rewards/margins": 0.1806056946516037, | |
| "eval_rewards/rejected": -0.4714431166648865, | |
| "eval_runtime": 307.0371, | |
| "eval_samples_per_second": 65.139, | |
| "eval_steps_per_second": 1.019, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7240712200341577e-09, | |
| "logits/chosen": -1.511375069618225, | |
| "logits/rejected": -1.5339971780776978, | |
| "logps/chosen": -17.652019500732422, | |
| "logps/rejected": -21.41324234008789, | |
| "loss": 0.6269, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.26256105303764343, | |
| "rewards/margins": 0.18523547053337097, | |
| "rewards/rejected": -0.4477965235710144, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.4471741852423233e-09, | |
| "logits/chosen": -1.4349921941757202, | |
| "logits/rejected": -1.4263644218444824, | |
| "logps/chosen": -18.641862869262695, | |
| "logps/rejected": -23.057044982910156, | |
| "loss": 0.636, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.29802319407463074, | |
| "rewards/margins": 0.17096181213855743, | |
| "rewards/rejected": -0.468984991312027, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.184762201848228e-09, | |
| "logits/chosen": -1.370429277420044, | |
| "logits/rejected": -1.4054522514343262, | |
| "logps/chosen": -17.3892765045166, | |
| "logps/rejected": -24.669536590576172, | |
| "loss": 0.6347, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.2652004361152649, | |
| "rewards/margins": 0.2178630828857422, | |
| "rewards/rejected": -0.4830635190010071, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.9369152030840553e-09, | |
| "logits/chosen": -1.4306275844573975, | |
| "logits/rejected": -1.4435356855392456, | |
| "logps/chosen": -16.403339385986328, | |
| "logps/rejected": -23.840957641601562, | |
| "loss": 0.6289, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.23369142413139343, | |
| "rewards/margins": 0.23300664126873016, | |
| "rewards/rejected": -0.4666980803012848, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.70370868554659e-09, | |
| "logits/chosen": -1.3685413599014282, | |
| "logits/rejected": -1.3498773574829102, | |
| "logps/chosen": -17.239362716674805, | |
| "logps/rejected": -23.87692642211914, | |
| "loss": 0.6242, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.25025489926338196, | |
| "rewards/margins": 0.25468313694000244, | |
| "rewards/rejected": -0.504938006401062, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.4852136862001763e-09, | |
| "logits/chosen": -1.386918306350708, | |
| "logits/rejected": -1.3831664323806763, | |
| "logps/chosen": -19.643451690673828, | |
| "logps/rejected": -24.284130096435547, | |
| "loss": 0.6319, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.31439071893692017, | |
| "rewards/margins": 0.20531371235847473, | |
| "rewards/rejected": -0.5197044610977173, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.2814967607382432e-09, | |
| "logits/chosen": -1.454646348953247, | |
| "logits/rejected": -1.463849663734436, | |
| "logps/chosen": -17.558238983154297, | |
| "logps/rejected": -19.34381103515625, | |
| "loss": 0.642, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.29885101318359375, | |
| "rewards/margins": 0.1699037104845047, | |
| "rewards/rejected": -0.46875467896461487, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.0926199633097156e-09, | |
| "logits/chosen": -1.3675868511199951, | |
| "logits/rejected": -1.4033797979354858, | |
| "logps/chosen": -17.723363876342773, | |
| "logps/rejected": -21.526758193969727, | |
| "loss": 0.6345, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.29813557863235474, | |
| "rewards/margins": 0.24015481770038605, | |
| "rewards/rejected": -0.5382903814315796, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 9.186408276168012e-10, | |
| "logits/chosen": -1.4319554567337036, | |
| "logits/rejected": -1.4493201971054077, | |
| "logps/chosen": -17.649295806884766, | |
| "logps/rejected": -25.110567092895508, | |
| "loss": 0.6347, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.25603246688842773, | |
| "rewards/margins": 0.21903729438781738, | |
| "rewards/rejected": -0.4750697612762451, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.59612349389599e-10, | |
| "logits/chosen": -1.4553452730178833, | |
| "logits/rejected": -1.4630682468414307, | |
| "logps/chosen": -18.7999267578125, | |
| "logps/rejected": -22.81121253967285, | |
| "loss": 0.6351, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.32581713795661926, | |
| "rewards/margins": 0.13567964732646942, | |
| "rewards/rejected": -0.4614967703819275, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.15582970243117e-10, | |
| "logits/chosen": -1.4470014572143555, | |
| "logits/rejected": -1.4603230953216553, | |
| "logps/chosen": -18.624347686767578, | |
| "logps/rejected": -21.459732055664062, | |
| "loss": 0.643, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.3545226454734802, | |
| "rewards/margins": 0.13958732783794403, | |
| "rewards/rejected": -0.49411001801490784, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.865965629214819e-10, | |
| "logits/chosen": -1.4853070974349976, | |
| "logits/rejected": -1.5119067430496216, | |
| "logps/chosen": -18.299137115478516, | |
| "logps/rejected": -22.493701934814453, | |
| "loss": 0.6401, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.2782926559448242, | |
| "rewards/margins": 0.11381447315216064, | |
| "rewards/rejected": -0.39210715889930725, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.7269241793390084e-10, | |
| "logits/chosen": -1.416803002357483, | |
| "logits/rejected": -1.4471747875213623, | |
| "logps/chosen": -18.96782875061035, | |
| "logps/rejected": -25.271512985229492, | |
| "loss": 0.6433, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.30018851161003113, | |
| "rewards/margins": 0.2068859338760376, | |
| "rewards/rejected": -0.5070745348930359, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.739052315863355e-10, | |
| "logits/chosen": -1.475373387336731, | |
| "logits/rejected": -1.4798928499221802, | |
| "logps/chosen": -18.24342918395996, | |
| "logps/rejected": -26.389270782470703, | |
| "loss": 0.64, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.2784801125526428, | |
| "rewards/margins": 0.23683655261993408, | |
| "rewards/rejected": -0.5153166651725769, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.9026509541272272e-10, | |
| "logits/chosen": -1.4730727672576904, | |
| "logits/rejected": -1.482080101966858, | |
| "logps/chosen": -17.36160659790039, | |
| "logps/rejected": -26.266277313232422, | |
| "loss": 0.635, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.3075779676437378, | |
| "rewards/margins": 0.23826391994953156, | |
| "rewards/rejected": -0.5458418726921082, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.2179748700879012e-10, | |
| "logits/chosen": -1.476564645767212, | |
| "logits/rejected": -1.5101211071014404, | |
| "logps/chosen": -17.65967559814453, | |
| "logps/rejected": -22.493526458740234, | |
| "loss": 0.6488, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.29659706354141235, | |
| "rewards/margins": 0.1625634878873825, | |
| "rewards/rejected": -0.45916056632995605, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.852326227130833e-11, | |
| "logits/chosen": -1.3950709104537964, | |
| "logits/rejected": -1.396350622177124, | |
| "logps/chosen": -17.96800994873047, | |
| "logps/rejected": -23.398609161376953, | |
| "loss": 0.6321, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.26527708768844604, | |
| "rewards/margins": 0.17985311150550842, | |
| "rewards/rejected": -0.44513019919395447, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.0458649045211895e-11, | |
| "logits/chosen": -1.3564696311950684, | |
| "logits/rejected": -1.343924880027771, | |
| "logps/chosen": -18.672687530517578, | |
| "logps/rejected": -25.386516571044922, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.2513435184955597, | |
| "rewards/margins": 0.25813814997673035, | |
| "rewards/rejected": -0.50948166847229, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 7.615242180436521e-12, | |
| "logits/chosen": -1.428175926208496, | |
| "logits/rejected": -1.4705220460891724, | |
| "logps/chosen": -18.455589294433594, | |
| "logps/rejected": -23.739002227783203, | |
| "loss": 0.6388, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.34945839643478394, | |
| "rewards/margins": 0.16214387118816376, | |
| "rewards/rejected": -0.5116022825241089, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.3395625352859497, | |
| "logits/rejected": -1.3556944131851196, | |
| "logps/chosen": -17.86135482788086, | |
| "logps/rejected": -21.11099624633789, | |
| "loss": 0.6448, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.3157894015312195, | |
| "rewards/margins": 0.17568814754486084, | |
| "rewards/rejected": -0.4914775788784027, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_logits/chosen": -1.7563655376434326, | |
| "eval_logits/rejected": -1.8358914852142334, | |
| "eval_logps/chosen": -17.932043075561523, | |
| "eval_logps/rejected": -23.265531539916992, | |
| "eval_loss": 0.6428677439689636, | |
| "eval_rewards/accuracies": 0.6453673839569092, | |
| "eval_rewards/chosen": -0.2918897271156311, | |
| "eval_rewards/margins": 0.18083742260932922, | |
| "eval_rewards/rejected": -0.47272711992263794, | |
| "eval_runtime": 307.0181, | |
| "eval_samples_per_second": 65.143, | |
| "eval_steps_per_second": 1.019, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |