mirror of
https://github.com/FoxxMD/context-mod.git
synced 2026-01-12 23:28:08 -05:00
* Implement separate language detection functionality * Clearer/simpler sentiment processing * Add languageHints to help coerce low confidence language detection * Add test cases for lang detection, sentiment detection, and sentiment tests * Fix neutral range -- was not using normalized score range
343 lines
15 KiB
TypeScript
343 lines
15 KiB
TypeScript
import {describe, it} from 'mocha';
|
||
import chai,{assert} from 'chai';
|
||
import chaiAsPromised from 'chai-as-promised';
|
||
import {
|
||
getContentLanguage,
|
||
getLanguageTypeFromValue,
|
||
getStringSentiment, parseTextToNumberComparison,
|
||
testSentiment
|
||
} from "../src/Common/LangaugeProcessing";
|
||
import {GenericComparison, RangedComparison} from "../src/Common/Infrastructure/Comparisons";
|
||
|
||
chai.use(chaiAsPromised);
|
||
|
||
const longNeutralEnglish = "This is a normal english sentence without emotion";
|
||
const longNeutralEnglish2 = 'I am neutral on the current subject';
|
||
const longNeutralEnglish3 = 'The midterms were an election that happened';
|
||
const longNegativeEnglish = "I hate when idiots drive their bad cars terribly. 😡";
|
||
const longPositiveEnglish = "We love to be happy and laugh on this wonderful, amazing day";
|
||
|
||
const shortIndistinctEnglish = "metal gear";
|
||
const shortIndistinctEnglish2 = "idk hole ref";
|
||
|
||
const shortPositiveEnglish = "haha fun";
|
||
const shortNegativeEnglish = "fuck you";
|
||
const shortSlangPositiveEnglish = "lol lmao";
|
||
const shortSlangNegativeEnglish = "get fuked";
|
||
|
||
const longIndonesian = "setiap kali scroll mesti nampak dia nie haih";
|
||
const shortIndonesian = "Saya bangga saya rasis";
|
||
const shortPolish = 'Dobry wieczór';
|
||
const longRussian = 'Чит на золото для аватарии без скачивания бесплатно';
|
||
const longItalian = 'Sembra ormai passato un secolo, visto che gli anime sono praticamente scomparsi dalla televisione.';
|
||
|
||
const shortRomanian = 'Tu știi unde sta?';
|
||
const longRomanian = 'Deci , daca aveti chef de un mic protest , va astept la aceste coordonate';
|
||
|
||
const longFrench = "J’approuve et à ce moment là ça se soigne plus malheureusement";
|
||
|
||
const longSpanish = "La segunda parece una mezcla entre una convención de fanáticos de los monster truck y un vertedero.";
|
||
const longPositiveSpanish = 'me encanta esta hermosa cancion';
|
||
const longPositiveSpanish2 = 'Increíble muy divertido gracias por compartir';
|
||
|
||
const longGerman = "bin mir auch sicher, dass zb mein 65er halb so viel wiegt wie ein kasten Bier";
|
||
|
||
const shortEmojiNegative = "France 😫 😞 :(";
|
||
const shortEmojiPositive = "France 😂 😄 😁";
|
||
|
||
describe('Language Detection', function () {
|
||
|
||
describe('Derives language from user input', async function () {
|
||
it('gets from valid, case-insensitive alpha2', async function () {
|
||
const lang = await getLanguageTypeFromValue('eN');
|
||
assert.equal(lang.alpha2, 'en');
|
||
});
|
||
it('gets from valid, case-insensitive alpha3', async function () {
|
||
const lang = await getLanguageTypeFromValue('eNg');
|
||
assert.equal(lang.alpha2, 'en');
|
||
});
|
||
it('gets from valid, case-insensitive language name', async function () {
|
||
const lang = await getLanguageTypeFromValue('EnGlIsH');
|
||
assert.equal(lang.alpha2, 'en');
|
||
});
|
||
|
||
it('throws on invalid value', function () {
|
||
assert.isRejected(getLanguageTypeFromValue('pofdsfa'))
|
||
});
|
||
})
|
||
|
||
describe('Recognizes the language in moderately long content well', function () {
|
||
it('should recognize english', async function () {
|
||
const lang = await getContentLanguage(longPositiveEnglish);
|
||
assert.equal(lang.language.alpha2, 'en');
|
||
assert.isFalse(lang.usedDefault);
|
||
assert.isAtLeast(lang.bestGuess.score, 0.9);
|
||
});
|
||
it('should recognize french', async function () {
|
||
const lang = await getContentLanguage(longFrench);
|
||
assert.equal(lang.language.alpha2, 'fr');
|
||
assert.isFalse(lang.usedDefault);
|
||
assert.isAtLeast(lang.bestGuess.score, 0.9);
|
||
});
|
||
it('should recognize spanish', async function () {
|
||
const lang = await getContentLanguage(longSpanish);
|
||
assert.equal(lang.language.alpha2, 'es');
|
||
assert.isFalse(lang.usedDefault);
|
||
assert.isAtLeast(lang.bestGuess.score, 0.9);
|
||
});
|
||
it('should recognize german', async function () {
|
||
const lang = await getContentLanguage(longGerman);
|
||
assert.equal(lang.language.alpha2, 'de');
|
||
assert.isFalse(lang.usedDefault);
|
||
assert.isAtLeast(lang.bestGuess.score, 0.9);
|
||
});
|
||
it('should recognize indonesian', async function () {
|
||
const lang = await getContentLanguage(longIndonesian);
|
||
assert.equal(lang.language.alpha2, 'id');
|
||
assert.isFalse(lang.usedDefault);
|
||
assert.isAtLeast(lang.bestGuess.score, 0.9);
|
||
});
|
||
});
|
||
|
||
describe('Correctly handles short content classification', function () {
|
||
it('uses default language', async function () {
|
||
|
||
for (const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortIndonesian]) {
|
||
const lang = await getContentLanguage(content);
|
||
assert.equal(lang.language.alpha2, 'en', content);
|
||
assert.isTrue(lang.usedDefault, content);
|
||
}
|
||
});
|
||
|
||
it('uses best guess when default language is not provided', async function () {
|
||
|
||
for (const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortIndonesian]) {
|
||
const lang = await getContentLanguage(content, {defaultLanguage: false});
|
||
assert.isFalse(lang.usedDefault);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
|
||
describe('Sentiment', function() {
|
||
|
||
describe('Is conservative when no default language is used for short content', function() {
|
||
|
||
it('should return unusable result for short, ambiguous english content', async function() {
|
||
for(const content of [shortIndistinctEnglish, shortIndistinctEnglish2]) {
|
||
const res = await getStringSentiment(content, {defaultLanguage: false});
|
||
assert.isFalse(res.usableScore);
|
||
}
|
||
});
|
||
|
||
it('should return unusable result for short, non-english content', async function() {
|
||
for(const content of [shortIndonesian, shortPolish, shortRomanian]) {
|
||
const res = await getStringSentiment(content, {defaultLanguage: false});
|
||
assert.isFalse(res.usableScore);
|
||
}
|
||
});
|
||
|
||
});
|
||
|
||
describe('Is conservative when language confidence is high for unsupported languages', function() {
|
||
|
||
it('should return unusable result for long, non-english content', async function() {
|
||
for(const content of [longIndonesian, longRussian, longItalian, longRomanian]) {
|
||
const res = await getStringSentiment(content);
|
||
assert.isFalse(res.usableScore, content);
|
||
}
|
||
});
|
||
});
|
||
|
||
describe('vader/wink supersedes low confidence language guess', function() {
|
||
|
||
it('should return usable result when valid words found by vader/wink', async function() {
|
||
for(const content of [shortPositiveEnglish,shortNegativeEnglish]) {
|
||
const res = await getStringSentiment(content, {defaultLanguage: false});
|
||
assert.isTrue(res.usableScore);
|
||
}
|
||
});
|
||
|
||
it('should return usable result when valid slang found by vader/wink', async function() {
|
||
for(const content of [shortSlangPositiveEnglish,shortSlangNegativeEnglish]) {
|
||
const res = await getStringSentiment(content, {defaultLanguage: false});
|
||
assert.isTrue(res.usableScore);
|
||
}
|
||
});
|
||
|
||
it('should return usable result when valid emojis found by vader/wink', async function() {
|
||
for(const content of [shortEmojiPositive,shortEmojiNegative]) {
|
||
const res = await getStringSentiment(content, {defaultLanguage: false});
|
||
assert.isTrue(res.usableScore);
|
||
}
|
||
});
|
||
})
|
||
|
||
describe('Detects correct sentiment', function() {
|
||
|
||
describe('In English', function() {
|
||
|
||
it('should detect positive sentiment', async function() {
|
||
for(const content of [shortEmojiPositive,longPositiveEnglish, shortPositiveEnglish, shortSlangPositiveEnglish]) {
|
||
const res = await getStringSentiment(content);
|
||
assert.isTrue(res.usableScore);
|
||
assert.isAtLeast(res.scoreWeighted, 0.1);
|
||
}
|
||
});
|
||
|
||
it('should detect negative sentiment', async function() {
|
||
for(const content of [shortEmojiNegative,longNegativeEnglish, shortNegativeEnglish, shortSlangNegativeEnglish]) {
|
||
const res = await getStringSentiment(content);
|
||
assert.isTrue(res.usableScore);
|
||
assert.isAtMost(res.scoreWeighted, -0.1);
|
||
}
|
||
});
|
||
|
||
it('should detect neutral sentiment', async function() {
|
||
for(const content of [longNeutralEnglish, longNeutralEnglish2, longNeutralEnglish3]) {
|
||
const res = await getStringSentiment(content);
|
||
assert.isTrue(res.usableScore, content);
|
||
assert.isAtMost(res.scoreWeighted, 0.1, content);
|
||
assert.isAtLeast(res.scoreWeighted, -0.1, content);
|
||
}
|
||
});
|
||
|
||
it('should detect neutral sentiment for short content when english is default language', async function() {
|
||
for(const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortPolish]) {
|
||
const res = await getStringSentiment(content);
|
||
assert.isTrue(res.usableScore);
|
||
assert.isAtMost(res.scoreWeighted, 0.1, content);
|
||
assert.isAtLeast(res.scoreWeighted, -0.1, content);
|
||
}
|
||
});
|
||
});
|
||
|
||
describe('In Spanish', function() {
|
||
it('should detect positive ', async function() {
|
||
for(const content of [longPositiveSpanish, longPositiveSpanish2]) {
|
||
const res = await getStringSentiment(content);
|
||
assert.isTrue(res.usableScore, longPositiveSpanish2);
|
||
assert.isAtLeast(res.scoreWeighted, 0.1, longPositiveSpanish2);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
|
||
describe('Testing', function () {
|
||
|
||
describe('Parsing user input to comparison', function() {
|
||
|
||
it(`parses 'is neutral'`, function() {
|
||
const res = parseTextToNumberComparison('is neutral') as RangedComparison;
|
||
assert.deepEqual(res.range, [-0.1, 0.1]);
|
||
assert.isFalse(res.not);
|
||
});
|
||
|
||
it(`parses 'is not neutral'`, function() {
|
||
const res = parseTextToNumberComparison('is not neutral') as RangedComparison;
|
||
assert.deepEqual(res.range, [-0.1, 0.1]);
|
||
assert.isTrue(res.not);
|
||
});
|
||
|
||
it(`parses 'is positive'`, function() {
|
||
const res = parseTextToNumberComparison('is positive') as GenericComparison;
|
||
assert.equal(res.operator, '>=');
|
||
assert.equal(res.value, 0.1);
|
||
});
|
||
|
||
it(`parses 'is very positive'`, function() {
|
||
const res = parseTextToNumberComparison('is very positive') as GenericComparison;
|
||
assert.equal(res.operator, '>=');
|
||
assert.equal(res.value, 0.3);
|
||
});
|
||
|
||
it(`parses 'is extremely positive'`, function() {
|
||
const res = parseTextToNumberComparison('is extremely positive') as GenericComparison;
|
||
assert.equal(res.operator, '>=');
|
||
assert.equal(res.value, 0.6);
|
||
});
|
||
|
||
it(`parses 'is negative'`, function() {
|
||
const res = parseTextToNumberComparison('is negative') as GenericComparison;
|
||
assert.equal(res.operator, '<=');
|
||
assert.equal(res.value, -0.1);
|
||
});
|
||
|
||
it(`parses 'is very negative'`, function() {
|
||
const res = parseTextToNumberComparison('is very negative') as GenericComparison;
|
||
assert.equal(res.operator, '<=');
|
||
assert.equal(res.value, -0.3);
|
||
});
|
||
|
||
it(`parses 'is extremely negative'`, function() {
|
||
const res = parseTextToNumberComparison('is extremely negative') as GenericComparison;
|
||
assert.equal(res.operator, '<=');
|
||
assert.equal(res.value, -0.6);
|
||
});
|
||
|
||
it(`parses negative negations`, function() {
|
||
const res = parseTextToNumberComparison('is not extremely negative') as GenericComparison;
|
||
assert.equal(res.operator, '>');
|
||
assert.equal(res.value, -0.6);
|
||
});
|
||
|
||
it(`parses positive negations`, function() {
|
||
const res = parseTextToNumberComparison('is not positive') as GenericComparison;
|
||
assert.equal(res.operator, '<');
|
||
assert.equal(res.value, 0.1);
|
||
});
|
||
|
||
});
|
||
|
||
it('should fail test if score is unusable', async function() {
|
||
|
||
const comparison = parseTextToNumberComparison('is positive');
|
||
|
||
for(const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortPolish, longRomanian]) {
|
||
const sentimentResult = await getStringSentiment(content, {defaultLanguage: false});
|
||
|
||
const testResult = testSentiment(sentimentResult, comparison);
|
||
assert.isFalse(testResult.passes);
|
||
}
|
||
});
|
||
|
||
it('should handle generic comparisons', async function() {
|
||
|
||
const comparison = parseTextToNumberComparison('is positive');
|
||
|
||
for(const content of [shortEmojiPositive,longPositiveEnglish, shortPositiveEnglish, shortSlangPositiveEnglish]) {
|
||
const sentimentResult = await getStringSentiment(content, {defaultLanguage: false});
|
||
|
||
const testResult = testSentiment(sentimentResult, comparison);
|
||
assert.isTrue(testResult.passes);
|
||
}
|
||
});
|
||
|
||
it('should handle ranged comparisons', async function() {
|
||
|
||
const comparison = parseTextToNumberComparison('is neutral');
|
||
|
||
for(const content of [longNeutralEnglish, longNeutralEnglish2, longNeutralEnglish3]) {
|
||
const sentimentResult = await getStringSentiment(content, {defaultLanguage: false});
|
||
|
||
const testResult = testSentiment(sentimentResult, comparison);
|
||
assert.isTrue(testResult.passes);
|
||
}
|
||
});
|
||
|
||
it('should handle negated ranged comparisons', async function() {
|
||
|
||
const comparison = parseTextToNumberComparison('is not neutral');
|
||
|
||
for(const content of [longPositiveEnglish, longPositiveSpanish, longNegativeEnglish]) {
|
||
const sentimentResult = await getStringSentiment(content, {defaultLanguage: false});
|
||
|
||
const testResult = testSentiment(sentimentResult, comparison);
|
||
assert.isTrue(testResult.passes, content);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
|
||
|