mirror of
https://github.com/di-sukharev/opencommit.git
synced 2026-01-31 08:28:29 -05:00
Count file diff by token, not by length of string (#63)
* 1.1.23 * 1.1.24 * feat(package.json): add @dqbd/tiktoken dependency refactor(generateCommitMessageFromGitDiff.ts): add tokenCount function to count the number of tokens in a string refactor(generateCommitMessageFromGitDiff.ts): change the way the length of INIT_MESSAGES_PROMPT is calculated to use tokenCount function refactor(generateCommitMessageFromGitDiff.ts): change the way the length of diff is calculated to use tokenCount function refactor(generateCommitMessageFromGitDiff.ts): rename function parameter from diff to fileDiff and update function calls accordingly feat(generateCommitMessageFromGitDiff.ts): add tokenCount function to count tokens in fileDiff and use it to check if fileDiff is bigger than MAX_REQ_TOKENS feat(utils): add tokenCount function to count the number of tokens in a string refactor(utils/mergeStrings.ts): use tokenCount function to count the number of tokens in a string instead of checking the length of the concatenated string --------- Co-authored-by: di-sukharev <dim.sukharev@gmail.com>
This commit is contained in:
14
src/utils/tokenCount.ts
Normal file
14
src/utils/tokenCount.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { Tiktoken } from "@dqbd/tiktoken/lite"
|
||||
import cl100k_base from "@dqbd/tiktoken/encoders/cl100k_base.json" assert{type: "json"}
|
||||
|
||||
export function tokenCount(content: string): number {
|
||||
const encoding = new Tiktoken(
|
||||
cl100k_base.bpe_ranks,
|
||||
cl100k_base.special_tokens,
|
||||
cl100k_base.pat_str
|
||||
);
|
||||
const tokens = encoding.encode(content);
|
||||
encoding.free();
|
||||
|
||||
return tokens.length;
|
||||
}
|
||||
Reference in New Issue
Block a user