Count file diff by token, not by length of string (#63)

* 1.1.23

* 1.1.24

* feat(package.json): add @dqbd/tiktoken dependency

refactor(generateCommitMessageFromGitDiff.ts): add tokenCount function to count the number of tokens in a string
refactor(generateCommitMessageFromGitDiff.ts): change the way the length of INIT_MESSAGES_PROMPT is calculated to use tokenCount function
refactor(generateCommitMessageFromGitDiff.ts): change the way the length of diff is calculated to use tokenCount function

refactor(generateCommitMessageFromGitDiff.ts): rename function parameter from diff to fileDiff and update function calls accordingly
feat(generateCommitMessageFromGitDiff.ts): add tokenCount function to count tokens in fileDiff and use it to check if fileDiff is bigger than MAX_REQ_TOKENS

feat(utils): add tokenCount function to count the number of tokens in a string
refactor(utils/mergeStrings.ts): use tokenCount function to count the number of tokens in a string instead of checking the length of the concatenated string

---------

Co-authored-by: di-sukharev <dim.sukharev@gmail.com>
This commit is contained in:
Raymond
2023-03-28 18:43:02 +08:00
committed by GitHub
parent 7c9feba3ba
commit 3103ae18b8
5 changed files with 33 additions and 12 deletions

14
src/utils/tokenCount.ts Normal file
View File

@@ -0,0 +1,14 @@
import { Tiktoken } from "@dqbd/tiktoken/lite"
import cl100k_base from "@dqbd/tiktoken/encoders/cl100k_base.json" assert{type: "json"}
export function tokenCount(content: string): number {
const encoding = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str
);
const tokens = encoding.encode(content);
encoding.free();
return tokens.length;
}