mirror of
https://github.com/di-sukharev/opencommit.git
synced 2026-04-20 03:02:51 -04:00
Count file diff by token, not by length of string (#63)
* 1.1.23 * 1.1.24 * feat(package.json): add @dqbd/tiktoken dependency refactor(generateCommitMessageFromGitDiff.ts): add tokenCount function to count the number of tokens in a string refactor(generateCommitMessageFromGitDiff.ts): change the way the length of INIT_MESSAGES_PROMPT is calculated to use tokenCount function refactor(generateCommitMessageFromGitDiff.ts): change the way the length of diff is calculated to use tokenCount function refactor(generateCommitMessageFromGitDiff.ts): rename function parameter from diff to fileDiff and update function calls accordingly feat(generateCommitMessageFromGitDiff.ts): add tokenCount function to count tokens in fileDiff and use it to check if fileDiff is bigger than MAX_REQ_TOKENS feat(utils): add tokenCount function to count the number of tokens in a string refactor(utils/mergeStrings.ts): use tokenCount function to count the number of tokens in a string instead of checking the length of the concatenated string --------- Co-authored-by: di-sukharev <dim.sukharev@gmail.com>
This commit is contained in:
6
package-lock.json
generated
6
package-lock.json
generated
@@ -10,6 +10,7 @@
|
|||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@clack/prompts": "^0.6.1",
|
"@clack/prompts": "^0.6.1",
|
||||||
|
"@dqbd/tiktoken": "^1.0.2",
|
||||||
"axios": "^1.3.4",
|
"axios": "^1.3.4",
|
||||||
"chalk": "^5.2.0",
|
"chalk": "^5.2.0",
|
||||||
"cleye": "^1.3.2",
|
"cleye": "^1.3.2",
|
||||||
@@ -83,6 +84,11 @@
|
|||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@dqbd/tiktoken": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-AjGTBRWsMoVmVeN55NLyupyM8TNamOUBl6tj5t/leLDVup3CFGO9tVagNL1jf3GyZLkWZSTmYVbPQ/M2LEcNzw=="
|
||||||
|
},
|
||||||
"node_modules/@esbuild/android-arm": {
|
"node_modules/@esbuild/android-arm": {
|
||||||
"version": "0.15.18",
|
"version": "0.15.18",
|
||||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.15.18.tgz",
|
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.15.18.tgz",
|
||||||
|
|||||||
@@ -60,6 +60,7 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@clack/prompts": "^0.6.1",
|
"@clack/prompts": "^0.6.1",
|
||||||
|
"@dqbd/tiktoken": "^1.0.2",
|
||||||
"axios": "^1.3.4",
|
"axios": "^1.3.4",
|
||||||
"chalk": "^5.2.0",
|
"chalk": "^5.2.0",
|
||||||
"cleye": "^1.3.2",
|
"cleye": "^1.3.2",
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { api } from './api';
|
|||||||
import { getConfig } from './commands/config';
|
import { getConfig } from './commands/config';
|
||||||
import { mergeStrings } from './utils/mergeStrings';
|
import { mergeStrings } from './utils/mergeStrings';
|
||||||
import { i18n, I18nLocals } from './i18n';
|
import { i18n, I18nLocals } from './i18n';
|
||||||
|
import { tokenCount } from './utils/tokenCount';
|
||||||
|
|
||||||
const config = getConfig();
|
const config = getConfig();
|
||||||
const translation = i18n[(config?.language as I18nLocals) || 'en'];
|
const translation = i18n[(config?.language as I18nLocals) || 'en'];
|
||||||
@@ -13,12 +14,10 @@ const translation = i18n[(config?.language as I18nLocals) || 'en'];
|
|||||||
const INIT_MESSAGES_PROMPT: Array<ChatCompletionRequestMessage> = [
|
const INIT_MESSAGES_PROMPT: Array<ChatCompletionRequestMessage> = [
|
||||||
{
|
{
|
||||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||||
content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${
|
content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${config?.emoji
|
||||||
config?.emoji
|
? 'Use Gitmoji convention to preface the commit'
|
||||||
? 'Use Gitmoji convention to preface the commit'
|
: 'Do not preface the commit with anything'
|
||||||
: 'Do not preface the commit with anything'
|
}, use the present tense. ${config?.description
|
||||||
}, use the present tense. ${
|
|
||||||
config?.description
|
|
||||||
? 'Add a short description of what commit is about after the commit message. Don\'t start it with "This commit", just describe the changes.'
|
? 'Add a short description of what commit is about after the commit message. Don\'t start it with "This commit", just describe the changes.'
|
||||||
: "Don't add any descriptions to the commit, only commit message."
|
: "Don't add any descriptions to the commit, only commit message."
|
||||||
} Use ${translation.localLanguage} to answer.`
|
} Use ${translation.localLanguage} to answer.`
|
||||||
@@ -80,16 +79,16 @@ interface GenerateCommitMessageError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const INIT_MESSAGES_PROMPT_LENGTH = INIT_MESSAGES_PROMPT.map(
|
const INIT_MESSAGES_PROMPT_LENGTH = INIT_MESSAGES_PROMPT.map(
|
||||||
(msg) => msg.content
|
(msg) => tokenCount(msg.content) + 4
|
||||||
).join('').length;
|
).reduce((a, b) => a + b, 0);
|
||||||
|
|
||||||
const MAX_REQ_TOKENS = 3900 - INIT_MESSAGES_PROMPT_LENGTH;
|
const MAX_REQ_TOKENS = 3900 - INIT_MESSAGES_PROMPT_LENGTH;
|
||||||
|
|
||||||
export const generateCommitMessageWithChatCompletion = async (
|
export const generateCommitMessageWithChatCompletion = async (
|
||||||
diff: string
|
diff: string
|
||||||
): Promise<string | GenerateCommitMessageError> => {
|
): Promise<string | GenerateCommitMessageError> => {
|
||||||
try {
|
try {
|
||||||
if (diff.length >= MAX_REQ_TOKENS) {
|
if (tokenCount(diff) >= MAX_REQ_TOKENS) {
|
||||||
const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(diff);
|
const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(diff);
|
||||||
|
|
||||||
const commitMessages = await Promise.all(commitMessagePromises);
|
const commitMessages = await Promise.all(commitMessagePromises);
|
||||||
@@ -144,7 +143,7 @@ function getCommitMsgsPromisesFromFileDiffs(diff: string) {
|
|||||||
const commitMessagePromises = [];
|
const commitMessagePromises = [];
|
||||||
|
|
||||||
for (const fileDiff of mergedFilesDiffs) {
|
for (const fileDiff of mergedFilesDiffs) {
|
||||||
if (fileDiff.length >= MAX_REQ_TOKENS) {
|
if (tokenCount(fileDiff) >= MAX_REQ_TOKENS) {
|
||||||
// if file-diff is bigger than gpt context — split fileDiff into lineDiff
|
// if file-diff is bigger than gpt context — split fileDiff into lineDiff
|
||||||
const messagesPromises = getMessagesPromisesByLines(fileDiff, separator);
|
const messagesPromises = getMessagesPromisesByLines(fileDiff, separator);
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
|
import { tokenCount } from './tokenCount'
|
||||||
export function mergeStrings(arr: string[], maxStringLength: number): string[] {
|
export function mergeStrings(arr: string[], maxStringLength: number): string[] {
|
||||||
const mergedArr: string[] = [];
|
const mergedArr: string[] = [];
|
||||||
let currentItem: string = arr[0];
|
let currentItem: string = arr[0];
|
||||||
for (const item of arr.slice(1)) {
|
for (const item of arr.slice(1)) {
|
||||||
if (currentItem.length + item.length <= maxStringLength) {
|
if (tokenCount(currentItem + item) <= maxStringLength) {
|
||||||
currentItem += item;
|
currentItem += item;
|
||||||
} else {
|
} else {
|
||||||
mergedArr.push(currentItem);
|
mergedArr.push(currentItem);
|
||||||
|
|||||||
14
src/utils/tokenCount.ts
Normal file
14
src/utils/tokenCount.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
import { Tiktoken } from "@dqbd/tiktoken/lite"
|
||||||
|
import cl100k_base from "@dqbd/tiktoken/encoders/cl100k_base.json" assert{type: "json"}
|
||||||
|
|
||||||
|
export function tokenCount(content: string): number {
|
||||||
|
const encoding = new Tiktoken(
|
||||||
|
cl100k_base.bpe_ranks,
|
||||||
|
cl100k_base.special_tokens,
|
||||||
|
cl100k_base.pat_str
|
||||||
|
);
|
||||||
|
const tokens = encoding.encode(content);
|
||||||
|
encoding.free();
|
||||||
|
|
||||||
|
return tokens.length;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user