diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 21c843c0d69..85cccf99feb 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1520,7 +1520,6 @@ llama_tokens format_prompt_infill( tokens_suffix.resize(n_suffix_take); tokens_prefix.insert(tokens_prefix.begin(), llama_vocab_fim_pre(vocab)); - tokens_prefix.insert(tokens_prefix.end(), tokens_prompt.begin(), tokens_prompt.end()); tokens_suffix.insert(tokens_suffix.begin(), llama_vocab_fim_suf(vocab)); auto embd_inp = spm_infill ? tokens_suffix : tokens_prefix; @@ -1530,14 +1529,20 @@ llama_tokens format_prompt_infill( embd_inp.insert(embd_inp.begin(), llama_vocab_bos(vocab)); } - SRV_DBG("extra: n_ctx = %d, n_extra_take = %d, n_extra = %d\n", n_ctx, n_extra_take, (int) extra_tokens.size()); + //Insert extra context (Repo-level tokens) at the very beginning + if (n_extra_take > 0) { + embd_inp.insert(embd_inp.begin(), extra_tokens.end() - n_extra_take, extra_tokens.end()); + } - // put the extra context before the FIM prefix - embd_inp.insert(embd_inp.begin(), extra_tokens.end() - n_extra_take, extra_tokens.end()); + SRV_DBG("extra: n_ctx = %d, n_extra_take = %d, n_extra = %d\n", n_ctx, n_extra_take, (int) extra_tokens.size()); embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end()); embd_inp.push_back(llama_vocab_fim_mid(vocab)); + if (!tokens_prompt.empty()) { + embd_inp.insert(embd_inp.end(), tokens_prompt.begin(), tokens_prompt.end()); + } + return embd_inp; }