diff --git a/package-lock.json b/package-lock.json index 0cc093e..d799cb4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "@clack/prompts": "^0.6.1", + "@dqbd/tiktoken": "^1.0.2", "axios": "^1.3.4", "chalk": "^5.2.0", "cleye": "^1.3.2", @@ -83,6 +84,11 @@ "node": ">=12" } }, + "node_modules/@dqbd/tiktoken": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.2.tgz", + "integrity": "sha512-AjGTBRWsMoVmVeN55NLyupyM8TNamOUBl6tj5t/leLDVup3CFGO9tVagNL1jf3GyZLkWZSTmYVbPQ/M2LEcNzw==" + }, "node_modules/@esbuild/android-arm": { "version": "0.15.18", "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.15.18.tgz", diff --git a/package.json b/package.json index 760b0c1..0ce4593 100644 --- a/package.json +++ b/package.json @@ -60,6 +60,7 @@ }, "dependencies": { "@clack/prompts": "^0.6.1", + "@dqbd/tiktoken": "^1.0.2", "axios": "^1.3.4", "chalk": "^5.2.0", "cleye": "^1.3.2", diff --git a/src/generateCommitMessageFromGitDiff.ts b/src/generateCommitMessageFromGitDiff.ts index 69100b0..af26956 100644 --- a/src/generateCommitMessageFromGitDiff.ts +++ b/src/generateCommitMessageFromGitDiff.ts @@ -6,6 +6,7 @@ import { api } from './api'; import { getConfig } from './commands/config'; import { mergeStrings } from './utils/mergeStrings'; import { i18n, I18nLocals } from './i18n'; +import { tokenCount } from './utils/tokenCount'; const config = getConfig(); const translation = i18n[(config?.language as I18nLocals) || 'en']; @@ -13,12 +14,10 @@ const translation = i18n[(config?.language as I18nLocals) || 'en']; const INIT_MESSAGES_PROMPT: Array = [ { role: ChatCompletionRequestMessageRoleEnum.System, - content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${ - config?.emoji - ? 'Use Gitmoji convention to preface the commit' - : 'Do not preface the commit with anything' - }, use the present tense. ${ - config?.description + content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${config?.emoji + ? 'Use Gitmoji convention to preface the commit' + : 'Do not preface the commit with anything' + }, use the present tense. ${config?.description ? 'Add a short description of what commit is about after the commit message. Don\'t start it with "This commit", just describe the changes.' : "Don't add any descriptions to the commit, only commit message." } Use ${translation.localLanguage} to answer.` @@ -80,16 +79,16 @@ interface GenerateCommitMessageError { } const INIT_MESSAGES_PROMPT_LENGTH = INIT_MESSAGES_PROMPT.map( - (msg) => msg.content -).join('').length; + (msg) => tokenCount(msg.content) + 4 +).reduce((a, b) => a + b, 0); const MAX_REQ_TOKENS = 3900 - INIT_MESSAGES_PROMPT_LENGTH; export const generateCommitMessageWithChatCompletion = async ( diff: string ): Promise => { - try { - if (diff.length >= MAX_REQ_TOKENS) { + try { + if (tokenCount(diff) >= MAX_REQ_TOKENS) { const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(diff); const commitMessages = await Promise.all(commitMessagePromises); @@ -144,7 +143,7 @@ function getCommitMsgsPromisesFromFileDiffs(diff: string) { const commitMessagePromises = []; for (const fileDiff of mergedFilesDiffs) { - if (fileDiff.length >= MAX_REQ_TOKENS) { + if (tokenCount(fileDiff) >= MAX_REQ_TOKENS) { // if file-diff is bigger than gpt context — split fileDiff into lineDiff const messagesPromises = getMessagesPromisesByLines(fileDiff, separator); diff --git a/src/utils/mergeStrings.ts b/src/utils/mergeStrings.ts index 7b55a99..ee35f50 100644 --- a/src/utils/mergeStrings.ts +++ b/src/utils/mergeStrings.ts @@ -1,8 +1,9 @@ +import { tokenCount } from './tokenCount' export function mergeStrings(arr: string[], maxStringLength: number): string[] { const mergedArr: string[] = []; let currentItem: string = arr[0]; for (const item of arr.slice(1)) { - if (currentItem.length + item.length <= maxStringLength) { + if (tokenCount(currentItem + item) <= maxStringLength) { currentItem += item; } else { mergedArr.push(currentItem); diff --git a/src/utils/tokenCount.ts b/src/utils/tokenCount.ts new file mode 100644 index 0000000..84e4f23 --- /dev/null +++ b/src/utils/tokenCount.ts @@ -0,0 +1,14 @@ +import { Tiktoken } from "@dqbd/tiktoken/lite" +import cl100k_base from "@dqbd/tiktoken/encoders/cl100k_base.json" assert{type: "json"} + +export function tokenCount(content: string): number { + const encoding = new Tiktoken( + cl100k_base.bpe_ranks, + cl100k_base.special_tokens, + cl100k_base.pat_str + ); + const tokens = encoding.encode(content); + encoding.free(); + + return tokens.length; +}