thisandagain · jharrilim · May 19, 2019 · May 19, 2019 · May 19, 2019 · May 19, 2019
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,6 @@
+node_modules
+Dockerfile
+.circleci
+.vscode
+docs
+lib
diff --git a/.eslintrc b/.eslintrc
@@ -4,13 +4,17 @@
         "eol-last": [2],
         "indent": [2, 4],
         "quotes": [2, "single"],
-        "linebreak-style": [2, "unix"],
-        "max-len": [2, 80, 4],
         "semi": [2, "always"],
-        "strict": [2, "never"]
+        "strict": [2, "never"],
+        "no-console": "off"
     },
     "env": {
-        "node": true
+        "node": true,
+        "es6": true,
+        "jest": true
+    },
+    "parserOptions": {
+        "ecmaVersion": 2018
     },
     "extends": "eslint:recommended"
 }
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,6 @@ coverage
 
 /* Other */
 .notes.txt
+
+docs
+lib
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,21 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Run Unit Tests",
+            "program": "${workspaceFolder}/node_modules/jest/bin/jest",
+            "args": [
+                "-c",
+                "jest.config.unit.js",
+            ],
+            "console": "integratedTerminal",
+            "internalConsoleOptions": "neverOpen",
+            "protocol": "inspector"
+        }
+    ]
+}
diff --git a/README.md b/README.md
@@ -13,121 +13,229 @@ Sentiment is a Node.js module that uses the [AFINN-165](http://www2.imm.dtu.dk/p
 
 ## Table of contents
 
-- [Installation](#installation)
-- [Usage example](#usage-example)
-- [Adding new languages](#adding-new-languages)
-- [Adding and overwriting words](#adding-and-overwriting-words)
-- [API Reference](#api-reference)
-- [How it works](#how-it-works)
-- [Benchmarks](#benchmarks)
-- [Validation](#validation)
-- [Testing](#testing)
+- [sentiment](#sentiment)
+    - [AFINN-based sentiment analysis for Node.js](#afinn-based-sentiment-analysis-for-nodejs)
+  - [Table of contents](#table-of-contents)
+  - [Installation](#installation)
+  - [Usage example](#usage-example)
+  - [Adding new languages](#adding-new-languages)
+  - [Adding and overwriting words](#adding-and-overwriting-words)
+  - [API Reference](#api-reference)
+      - [`sentiment.analyze(phrase, [options], [callback])`](#sentimentanalyzephrase-options-callback)
+      - [`AnalyzeOptions`](#analyzeoptions)
+      - [`sentiment.registerLanguage(languageCode, language)`](#sentimentregisterlanguagelanguagecode-language)
+      - [`Language`](#language)
+      - [`ScoringStrategy`](#scoringstrategy)
+  - [How it works](#how-it-works)
+    - [AFINN](#afinn)
+    - [Tokenization](#tokenization)
+  - [Benchmarks](#benchmarks)
+  - [Validation](#validation)
+    - [Rand Accuracy (AFINN Only)](#rand-accuracy-afinn-only)
+    - [Rand Accuracy (AFINN + Additions)](#rand-accuracy-afinn--additions)
+  - [Testing](#testing)
 
 ## Installation
+
 ```bash
 npm install sentiment
 ```
 
 ## Usage example
+
+Javascript:
 ```js
-var Sentiment = require('sentiment');
-var sentiment = new Sentiment();
-var result = sentiment.analyze('Cats are stupid.');
-console.dir(result);    // Score: -2, Comparative: -0.666
+const Sentiment = require('sentiment');
+const sentiment = new Sentiment();
+const result = sentiment.analyze('Cats are stupid.');
+console.table(result);    // Score: -2, Comparative: -0.666
+```
+
+Typescript:
+```ts
+import { Sentiment } from 'sentiment';
+const sentiment = new Sentiment();
+const result = sentiment.analyze('Cats are stupid.');
+console.table(result);    // Score: -2, Comparative: -0.666
 ```
 
 ## Adding new languages
+
 You can add support for a new language by registering it using the `registerLanguage` method:
 
+Javascript:
 ```js
-var frLanguage = {
+const Sentiment = require('sentiment');
+const sentiment = new Sentiment(); 
+
+const frLanguage = {
+  labels: { 'stupide': -2 }
+};
+sentiment.registerLanguage('fr', frLanguage);
+
+const result = sentiment.analyze('Le chat est stupide.', { languageCode: 'fr' });
+console.dir(result);    // Score: -2, Comparative: -0.5
+```
+
+Typescript:
+```ts
+import { Sentiment, LanguageInput } from 'sentiment';
+
+const sentiment = new Sentiment();
+const frLanguage: LangaugeInput = {
   labels: { 'stupide': -2 }
 };
 sentiment.registerLanguage('fr', frLanguage);
 
-var result = sentiment.analyze('Le chat est stupide.', { language: 'fr' });
+const result = sentiment.analyze('Le chat est stupide.', { languageCode: 'fr' });
 console.dir(result);    // Score: -2, Comparative: -0.5
+
 ```
 
 You can also define custom scoring strategies to handle things like negation and emphasis on a per-language basis:
+
+Javascript:
 ```js
-var frLanguage = {
+const Sentiment = require('sentiment');
+const sentiment = new Sentiment();
+
+const frLanguage = {
   labels: { 'stupide': -2 },
-  scoringStrategy: {
-    apply: function(tokens, cursor, tokenScore) {
-      if (cursor > 0) {
-        var prevtoken = tokens[cursor - 1];
-        if (prevtoken === 'pas') {
-          tokenScore = -tokenScore;
-        }
+  scoringStrategy: function(tokens, cursor, tokenScore) {
+    if (cursor > 0) {
+      const prevtoken = tokens[cursor - 1];
+      if (prevtoken === 'pas') {
+        tokenScore = -tokenScore;
       }
-      return tokenScore;
     }
+    return tokenScore;
   }
 };
 sentiment.registerLanguage('fr', frLanguage);
 
-var result = sentiment.analyze('Le chat n\'est pas stupide', { language: 'fr' });
+const result = sentiment.analyze('Le chat n\'est pas stupide', { language: 'fr' });
 console.dir(result);    // Score: 2, Comparative: 0.4
 ```
 
+Typescript:
+```ts
+import { Sentiment, LanguageInput } from 'sentiment';
+
+const frLanguage: LanguageInput = {
+  labels: { 'stupide': -2 },
+  scoringStrategy: (tokens, cursor, tokenScore) => {
+    if (cursor > 0) {
+      const prevtoken = tokens[cursor - 1];
+      if (prevtoken === 'pas') {
+        tokenScore = -tokenScore;
+      }
+    }
+    return tokenScore;
+  }
+};
+
+const sentiment = new Sentiment();
+
+sentiment.registerLanguage('fr', frLanguage);
+
+const result = sentiment.analyze('Le chat n\'est pas stupide', { language: 'fr' });
+console.dir(result);    // Score: 2, Comparative: 0.4
+
+```
+
 ## Adding and overwriting words
+
 You can append and/or overwrite values from AFINN by simply injecting key/value pairs into a sentiment method call:
-```javascript
-var options = {
+
+Javascript:
+```js
+const Sentiment = require('sentiment');
+const sentiment = new Sentiment();
+
+const options = {
   extras: {
-    'cats': 5,
-    'amazing': 2
+    cats: 5,
+    amazing: 2
   }
 };
-var result = sentiment.analyze('Cats are totally amazing!', options);
+const result = sentiment.analyze('Cats are totally amazing!', options);
 console.dir(result);    // Score: 7, Comparative: 1.75
 ```
 
-## API Reference
+Typescript:
+```ts
+import { Sentiment, AnalyzeOptions } from 'sentiment';
 
-#### `var sentiment = new Sentiment([options])`
+const options: AnalyzeOptions = {
+  extras: {
+    cats: 5,
+    amazing: 2
+  }
+};
 
-| Argument | Type       | Required | Description                                                |
-|----------|------------|----------|------------------------------------------------------------|
-| options  | `object`   | `false`  | Configuration options _(no options supported currently)_   |
+const result = sentiment.analyze('Cats are totally amazing!', options);
+console.dir(result);    // Score: 7, Comparative: 1.75
 
----
+```
+## API Reference
 
 #### `sentiment.analyze(phrase, [options], [callback])`
 
-| Argument | Type       | Required | Description             |
-|----------|------------|----------|-------------------------|
-| phrase   | `string`   | `true`   | Input phrase to analyze |
-| options  | `object`   | `false`  | Options _(see below)_   |
-| callback | `function` | `false`  | If specified, the result is returned using this callback function |
+| Argument | Type             | Required | Description                  |
+| -------- | ---------------- | -------- | ---------------------------- |
+| phrase   | `string`         | `true`   | Input phrase to analyze      |
+| options  | `AnalyzeOptions` | `false`  | AnalyzeOptions _(see below)_ |
 
+---
 
-`options` object properties:
+#### `AnalyzeOptions`
 
-| Property | Type      | Default | Description                                                   |
-|----------|-----------|---------|---------------------------------------------------------------|
-| language | `string`  | `'en'`  | Language to use for sentiment analysis                        |
-| extras   | `object`  | `{}`    | Set of labels and their associated values to add or overwrite |
+| Property     | Type     | Default | Description                                                   |
+| ------------ | -------- | ------- | ------------------------------------------------------------- |
+| languageCode | `string` | `'en'`  | Language to use for sentiment analysis                        |
+| extras       | `object` | `{}`    | Set of labels and their associated values to add or overwrite |
 
 ---
 
 #### `sentiment.registerLanguage(languageCode, language)`
 
-| Argument     | Type     | Required | Description                                                         |
-|--------------|----------|----------|---------------------------------------------------------------------|
-| languageCode | `string` | `true`   | International two-digit code for the language to add                |
-| language     | `object` | `true`   | Language module (see [Adding new languages](#adding-new-languages)) |
+| Argument     | Type            | Required | Description                                                         |
+| ------------ | --------------- | -------- | ------------------------------------------------------------------- |
+| languageCode | `string`        | `true`   | International two-digit code for the language to add                |
+| language     | `LanguageInput` | `true`   | Language module (see [Adding new languages](#adding-new-languages)) |
 
 ---
 
+#### `Language`
+
+| Property        | Type                       | Default                  | Description                                                                                            |
+| --------------- | -------------------------- | ------------------------ | ------------------------------------------------------------------------------------------------------ |
+| labels          | `{[word: string]: number}` | `'en'`                   | Set of labels and their associated values                                                              |
+| scoringStrategy | `ScoringStrategy`          | `defaultScoringStrategy` | A function used to calculate the score for a word. The default function simply returns the tokenScore. |
+
+---
+
+#### `ScoringStrategy`
+
+| Argument   | Type     | Description                                        |
+| ---------- | -------- | -------------------------------------------------- |
+| tokens     | string[] | A list of tokens used for analysis                 |
+| cursor     | number   | An index that points to the current word in tokens |
+| tokenScore | number   | The score of the current word                      |
+
+| Returns | Description                                      |
+| ------- | ------------------------------------------------ |
+| number  | A numeric value representing the score of a word |
+
 ## How it works
+
 ### AFINN
 AFINN is a list of words rated for valence with an integer between minus five (negative) and plus five (positive). Sentiment analysis is performed by cross-checking the string tokens(words, emojis) with the AFINN list and getting their respective scores. The comparative score is simply: `sum of each token / number of tokens`. So for example let's take the following:
 
 `I love cats, but I am allergic to them.`
 
 That string results in the following:
+
 ```javascript
 {
     score: 1,
@@ -180,14 +288,24 @@ Tokenization works by splitting the lines of input string, then removing the spe
 ---
 
 ## Benchmarks
-A primary motivation for designing `sentiment` was performance. As such, it includes a benchmark script within the test directory that compares it against the [Sentimental](https://github.com/thinkroth/Sentimental) module which provides a nearly equivalent interface and approach. Based on these benchmarks, running on a MacBook Pro with Node v6.9.1, `sentiment` is nearly twice as fast as alternative implementations:
+A primary motivation for designing `sentiment` was performance. As such, it includes a benchmark script within the test directory that compares it against the [Sentimental](https://github.com/thinkroth/Sentimental) module which provides a nearly equivalent interface and approach. Based on these benchmarks using Node v11.10.1, `sentiment` is nearly twice as fast as alternative implementations.
+
+Bench specs:
+  - i5-8400 @ 2.80GHz 6 core
+  - 16 GB (8GB x 2) RAM DDR4 2666 MT/s
+  - WD Blue 3D NAND SSD - SATA III 6 Gb/s M.2
+
 
 ```bash
-sentiment (Latest) x 861,312 ops/sec ±0.87% (89 runs sampled)
-Sentimental (1.0.1) x 451,066 ops/sec ±0.99% (92 runs sampled)
+sentiment (Latest)  - Short: x 979,943 ops/sec ±2.01% (90 runs sampled)
+sentiment (Latest)  - Long : x 4,370 ops/sec ±1.04% (90 runs sampled)
+
+Sentimental (1.0.1) - Short: x 573,312 ops/sec ±1.17% (90 runs sampled)
+Sentimental (1.0.1) - Long : x 2,143 ops/sec ±0.37% (92 runs sampled)
 ```
 
 To run the benchmarks yourself:
+
 ```bash
 npm run test:benchmark
 ```
@@ -198,18 +316,21 @@ npm run test:benchmark
 While the accuracy provided by AFINN is quite good considering it's computational performance (see above) there is always room for improvement. Therefore the `sentiment` module is open to accepting PRs which modify or amend the AFINN / Emoji datasets or implementation given that they improve accuracy and maintain similar performance characteristics. In order to establish this, we test the `sentiment` module against [three labelled datasets provided by UCI](https://archive.ics.uci.edu/ml/datasets/Sentiment+Labelled+Sentences).
 
 To run the validation tests yourself:
+
 ```bash
 npm run test:validate
 ```
 
 ### Rand Accuracy (AFINN Only)
+
 ```
 Amazon:  0.70
 IMDB:    0.76
 Yelp:    0.67
 ```
 
 ### Rand Accuracy (AFINN + Additions)
+
 ```
 Amazon:  0.72 (+2%)
 IMDB:    0.76 (+0%)
@@ -219,6 +340,7 @@ Yelp:    0.69 (+2%)
 ---
 
 ## Testing
+
 ```bash
 npm test
 ```
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,6 @@ coverage @@
     /* Other */
     .notes.txt
+    docs
+    lib