import {describe, it} from 'mocha'; import chai,{assert} from 'chai'; import chaiAsPromised from 'chai-as-promised'; import { getContentLanguage, getLanguageTypeFromValue, getStringSentiment, parseTextToNumberComparison, testSentiment } from "../src/Common/LangaugeProcessing"; import {GenericComparison, RangedComparison} from "../src/Common/Infrastructure/Comparisons"; chai.use(chaiAsPromised); const longNeutralEnglish = "This is a normal english sentence without emotion"; const longNeutralEnglish2 = 'I am neutral on the current subject'; const longNeutralEnglish3 = 'The midterms were an election that happened'; const longNegativeEnglish = "I hate when idiots drive their bad cars terribly. 😡"; const longPositiveEnglish = "We love to be happy and laugh on this wonderful, amazing day"; const shortIndistinctEnglish = "metal gear"; const shortIndistinctEnglish2 = "idk hole ref"; const shortPositiveEnglish = "haha fun"; const shortNegativeEnglish = "fuck you"; const shortSlangPositiveEnglish = "lol lmao"; const shortSlangNegativeEnglish = "get fuked"; const longIndonesian = "setiap kali scroll mesti nampak dia nie haih"; const shortIndonesian = "Saya bangga saya rasis"; const shortPolish = 'Dobry wieczór'; const longRussian = 'Чит на золото для аватарии без скачивания бесплатно'; const longItalian = 'Sembra ormai passato un secolo, visto che gli anime sono praticamente scomparsi dalla televisione.'; const shortRomanian = 'Tu știi unde sta?'; const longRomanian = 'Deci , daca aveti chef de un mic protest , va astept la aceste coordonate'; const longFrench = "J’approuve et à ce moment là ça se soigne plus malheureusement"; const longSpanish = "La segunda parece una mezcla entre una convención de fanáticos de los monster truck y un vertedero."; const longPositiveSpanish = 'me encanta esta hermosa cancion'; const longPositiveSpanish2 = 'Increíble muy divertido gracias por compartir'; const longGerman = "bin mir auch sicher, dass zb mein 65er halb so viel wiegt wie ein kasten Bier"; const shortEmojiNegative = "France 😫 😞 :("; const shortEmojiPositive = "France 😂 😄 😁"; describe('Language Detection', function () { describe('Derives language from user input', async function () { it('gets from valid, case-insensitive alpha2', async function () { const lang = await getLanguageTypeFromValue('eN'); assert.equal(lang.alpha2, 'en'); }); it('gets from valid, case-insensitive alpha3', async function () { const lang = await getLanguageTypeFromValue('eNg'); assert.equal(lang.alpha2, 'en'); }); it('gets from valid, case-insensitive language name', async function () { const lang = await getLanguageTypeFromValue('EnGlIsH'); assert.equal(lang.alpha2, 'en'); }); it('throws on invalid value', function () { assert.isRejected(getLanguageTypeFromValue('pofdsfa')) }); }) describe('Recognizes the language in moderately long content well', function () { it('should recognize english', async function () { const lang = await getContentLanguage(longPositiveEnglish); assert.equal(lang.language.alpha2, 'en'); assert.isFalse(lang.usedDefault); assert.isAtLeast(lang.bestGuess.score, 0.9); }); it('should recognize french', async function () { const lang = await getContentLanguage(longFrench); assert.equal(lang.language.alpha2, 'fr'); assert.isFalse(lang.usedDefault); assert.isAtLeast(lang.bestGuess.score, 0.9); }); it('should recognize spanish', async function () { const lang = await getContentLanguage(longSpanish); assert.equal(lang.language.alpha2, 'es'); assert.isFalse(lang.usedDefault); assert.isAtLeast(lang.bestGuess.score, 0.9); }); it('should recognize german', async function () { const lang = await getContentLanguage(longGerman); assert.equal(lang.language.alpha2, 'de'); assert.isFalse(lang.usedDefault); assert.isAtLeast(lang.bestGuess.score, 0.9); }); it('should recognize indonesian', async function () { const lang = await getContentLanguage(longIndonesian); assert.equal(lang.language.alpha2, 'id'); assert.isFalse(lang.usedDefault); assert.isAtLeast(lang.bestGuess.score, 0.9); }); }); describe('Correctly handles short content classification', function () { it('uses default language', async function () { for (const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortIndonesian]) { const lang = await getContentLanguage(content); assert.equal(lang.language.alpha2, 'en', content); assert.isTrue(lang.usedDefault, content); } }); it('uses best guess when default language is not provided', async function () { for (const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortIndonesian]) { const lang = await getContentLanguage(content, {defaultLanguage: false}); assert.isFalse(lang.usedDefault); } }); }); }); describe('Sentiment', function() { describe('Is conservative when no default language is used for short content', function() { it('should return unusable result for short, ambiguous english content', async function() { for(const content of [shortIndistinctEnglish, shortIndistinctEnglish2]) { const res = await getStringSentiment(content, {defaultLanguage: false}); assert.isFalse(res.usableScore); } }); it('should return unusable result for short, non-english content', async function() { for(const content of [shortIndonesian, shortPolish, shortRomanian]) { const res = await getStringSentiment(content, {defaultLanguage: false}); assert.isFalse(res.usableScore); } }); }); describe('Is conservative when language confidence is high for unsupported languages', function() { it('should return unusable result for long, non-english content', async function() { for(const content of [longIndonesian, longRussian, longItalian, longRomanian]) { const res = await getStringSentiment(content); assert.isFalse(res.usableScore, content); } }); }); describe('vader/wink supersedes low confidence language guess', function() { it('should return usable result when valid words found by vader/wink', async function() { for(const content of [shortPositiveEnglish,shortNegativeEnglish]) { const res = await getStringSentiment(content, {defaultLanguage: false}); assert.isTrue(res.usableScore); } }); it('should return usable result when valid slang found by vader/wink', async function() { for(const content of [shortSlangPositiveEnglish,shortSlangNegativeEnglish]) { const res = await getStringSentiment(content, {defaultLanguage: false}); assert.isTrue(res.usableScore); } }); it('should return usable result when valid emojis found by vader/wink', async function() { for(const content of [shortEmojiPositive,shortEmojiNegative]) { const res = await getStringSentiment(content, {defaultLanguage: false}); assert.isTrue(res.usableScore); } }); }) describe('Detects correct sentiment', function() { describe('In English', function() { it('should detect positive sentiment', async function() { for(const content of [shortEmojiPositive,longPositiveEnglish, shortPositiveEnglish, shortSlangPositiveEnglish]) { const res = await getStringSentiment(content); assert.isTrue(res.usableScore); assert.isAtLeast(res.scoreWeighted, 0.1); } }); it('should detect negative sentiment', async function() { for(const content of [shortEmojiNegative,longNegativeEnglish, shortNegativeEnglish, shortSlangNegativeEnglish]) { const res = await getStringSentiment(content); assert.isTrue(res.usableScore); assert.isAtMost(res.scoreWeighted, -0.1); } }); it('should detect neutral sentiment', async function() { for(const content of [longNeutralEnglish, longNeutralEnglish2, longNeutralEnglish3]) { const res = await getStringSentiment(content); assert.isTrue(res.usableScore, content); assert.isAtMost(res.scoreWeighted, 0.1, content); assert.isAtLeast(res.scoreWeighted, -0.1, content); } }); it('should detect neutral sentiment for short content when english is default language', async function() { for(const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortPolish]) { const res = await getStringSentiment(content); assert.isTrue(res.usableScore); assert.isAtMost(res.scoreWeighted, 0.1, content); assert.isAtLeast(res.scoreWeighted, -0.1, content); } }); }); describe('In Spanish', function() { it('should detect positive ', async function() { for(const content of [longPositiveSpanish, longPositiveSpanish2]) { const res = await getStringSentiment(content); assert.isTrue(res.usableScore, longPositiveSpanish2); assert.isAtLeast(res.scoreWeighted, 0.1, longPositiveSpanish2); } }); }); }); describe('Testing', function () { describe('Parsing user input to comparison', function() { it(`parses 'is neutral'`, function() { const res = parseTextToNumberComparison('is neutral') as RangedComparison; assert.deepEqual(res.range, [-0.1, 0.1]); assert.isFalse(res.not); }); it(`parses 'is not neutral'`, function() { const res = parseTextToNumberComparison('is not neutral') as RangedComparison; assert.deepEqual(res.range, [-0.1, 0.1]); assert.isTrue(res.not); }); it(`parses 'is positive'`, function() { const res = parseTextToNumberComparison('is positive') as GenericComparison; assert.equal(res.operator, '>='); assert.equal(res.value, 0.1); }); it(`parses 'is very positive'`, function() { const res = parseTextToNumberComparison('is very positive') as GenericComparison; assert.equal(res.operator, '>='); assert.equal(res.value, 0.3); }); it(`parses 'is extremely positive'`, function() { const res = parseTextToNumberComparison('is extremely positive') as GenericComparison; assert.equal(res.operator, '>='); assert.equal(res.value, 0.6); }); it(`parses 'is negative'`, function() { const res = parseTextToNumberComparison('is negative') as GenericComparison; assert.equal(res.operator, '<='); assert.equal(res.value, -0.1); }); it(`parses 'is very negative'`, function() { const res = parseTextToNumberComparison('is very negative') as GenericComparison; assert.equal(res.operator, '<='); assert.equal(res.value, -0.3); }); it(`parses 'is extremely negative'`, function() { const res = parseTextToNumberComparison('is extremely negative') as GenericComparison; assert.equal(res.operator, '<='); assert.equal(res.value, -0.6); }); it(`parses negative negations`, function() { const res = parseTextToNumberComparison('is not extremely negative') as GenericComparison; assert.equal(res.operator, '>'); assert.equal(res.value, -0.6); }); it(`parses positive negations`, function() { const res = parseTextToNumberComparison('is not positive') as GenericComparison; assert.equal(res.operator, '<'); assert.equal(res.value, 0.1); }); }); it('should fail test if score is unusable', async function() { const comparison = parseTextToNumberComparison('is positive'); for(const content of [shortIndistinctEnglish, shortIndistinctEnglish2, shortPolish, longRomanian]) { const sentimentResult = await getStringSentiment(content, {defaultLanguage: false}); const testResult = testSentiment(sentimentResult, comparison); assert.isFalse(testResult.passes); } }); it('should handle generic comparisons', async function() { const comparison = parseTextToNumberComparison('is positive'); for(const content of [shortEmojiPositive,longPositiveEnglish, shortPositiveEnglish, shortSlangPositiveEnglish]) { const sentimentResult = await getStringSentiment(content, {defaultLanguage: false}); const testResult = testSentiment(sentimentResult, comparison); assert.isTrue(testResult.passes); } }); it('should handle ranged comparisons', async function() { const comparison = parseTextToNumberComparison('is neutral'); for(const content of [longNeutralEnglish, longNeutralEnglish2, longNeutralEnglish3]) { const sentimentResult = await getStringSentiment(content, {defaultLanguage: false}); const testResult = testSentiment(sentimentResult, comparison); assert.isTrue(testResult.passes); } }); it('should handle negated ranged comparisons', async function() { const comparison = parseTextToNumberComparison('is not neutral'); for(const content of [longPositiveEnglish, longPositiveSpanish, longNegativeEnglish]) { const sentimentResult = await getStringSentiment(content, {defaultLanguage: false}); const testResult = testSentiment(sentimentResult, comparison); assert.isTrue(testResult.passes, content); } }); }); });