Count file diff by token, not by length of string (#63)

* 1.1.23

* 1.1.24

* feat(package.json): add @dqbd/tiktoken dependency

refactor(generateCommitMessageFromGitDiff.ts): add tokenCount function to count the number of tokens in a string
refactor(generateCommitMessageFromGitDiff.ts): change the way the length of INIT_MESSAGES_PROMPT is calculated to use tokenCount function
refactor(generateCommitMessageFromGitDiff.ts): change the way the length of diff is calculated to use tokenCount function

refactor(generateCommitMessageFromGitDiff.ts): rename function parameter from diff to fileDiff and update function calls accordingly
feat(generateCommitMessageFromGitDiff.ts): add tokenCount function to count tokens in fileDiff and use it to check if fileDiff is bigger than MAX_REQ_TOKENS

feat(utils): add tokenCount function to count the number of tokens in a string
refactor(utils/mergeStrings.ts): use tokenCount function to count the number of tokens in a string instead of checking the length of the concatenated string

---------

Co-authored-by: di-sukharev <dim.sukharev@gmail.com>
This commit is contained in:
Raymond
2023-03-28 18:43:02 +08:00
committed by GitHub
parent 7c9feba3ba
commit 3103ae18b8
5 changed files with 33 additions and 12 deletions

View File

@@ -6,6 +6,7 @@ import { api } from './api';
import { getConfig } from './commands/config';
import { mergeStrings } from './utils/mergeStrings';
import { i18n, I18nLocals } from './i18n';
import { tokenCount } from './utils/tokenCount';
const config = getConfig();
const translation = i18n[(config?.language as I18nLocals) || 'en'];
@@ -13,12 +14,10 @@ const translation = i18n[(config?.language as I18nLocals) || 'en'];
const INIT_MESSAGES_PROMPT: Array<ChatCompletionRequestMessage> = [
{
role: ChatCompletionRequestMessageRoleEnum.System,
content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${
config?.emoji
? 'Use Gitmoji convention to preface the commit'
: 'Do not preface the commit with anything'
}, use the present tense. ${
config?.description
content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${config?.emoji
? 'Use Gitmoji convention to preface the commit'
: 'Do not preface the commit with anything'
}, use the present tense. ${config?.description
? 'Add a short description of what commit is about after the commit message. Don\'t start it with "This commit", just describe the changes.'
: "Don't add any descriptions to the commit, only commit message."
} Use ${translation.localLanguage} to answer.`
@@ -80,16 +79,16 @@ interface GenerateCommitMessageError {
}
const INIT_MESSAGES_PROMPT_LENGTH = INIT_MESSAGES_PROMPT.map(
(msg) => msg.content
).join('').length;
(msg) => tokenCount(msg.content) + 4
).reduce((a, b) => a + b, 0);
const MAX_REQ_TOKENS = 3900 - INIT_MESSAGES_PROMPT_LENGTH;
export const generateCommitMessageWithChatCompletion = async (
diff: string
): Promise<string | GenerateCommitMessageError> => {
try {
if (diff.length >= MAX_REQ_TOKENS) {
try {
if (tokenCount(diff) >= MAX_REQ_TOKENS) {
const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(diff);
const commitMessages = await Promise.all(commitMessagePromises);
@@ -144,7 +143,7 @@ function getCommitMsgsPromisesFromFileDiffs(diff: string) {
const commitMessagePromises = [];
for (const fileDiff of mergedFilesDiffs) {
if (fileDiff.length >= MAX_REQ_TOKENS) {
if (tokenCount(fileDiff) >= MAX_REQ_TOKENS) {
// if file-diff is bigger than gpt context — split fileDiff into lineDiff
const messagesPromises = getMessagesPromisesByLines(fileDiff, separator);