context : fix reorder logic (#13267)

ggerganov · web-flow · commit a75cb30dc9e6 · 2025-05-02T20:54:13.000+03:00
ggml-ci
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
     // finalize the batch processing
     kv_guard.commit();
 
+    // set to total number of outputs in the batch, for use in llama_get_logits_ith
+    n_outputs = n_outputs_all;
+
     // set output mappings
     {
         bool sorted_output = true;
@@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
         }
     }
 
-    // set to total number of outputs in the batch, for use in llama_get_logits_ith
-    n_outputs = n_outputs_all;
-
     // wait for the computation to finish (automatically done when obtaining the model output)
     //synchronize();
 

Original file line number	Diff line number	Diff line change
`@@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {`
`1050`	`1050`	`// finalize the batch processing`
`1051`	`1051`	`kv_guard.commit();`
`1052`	`1052`
	`1053`	`+ // set to total number of outputs in the batch, for use in llama_get_logits_ith`
	`1054`	`+ n_outputs = n_outputs_all;`
	`1055`	`+`
`1053`	`1056`	`// set output mappings`
`1054`	`1057`	`{`
`1055`	`1058`	`bool sorted_output = true;`
`@@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {`
`1103`	`1106`	`}`
`1104`	`1107`	`}`
`1105`	`1108`
`1106`		`- // set to total number of outputs in the batch, for use in llama_get_logits_ith`
`1107`		`- n_outputs = n_outputs_all;`
`1108`		`-`
`1109`	`1109`	`// wait for the computation to finish (automatically done when obtaining the model output)`
`1110`	`1110`	`//synchronize();`
`1111`	`1111`