AdaptiveSentimentClassifier/adaptiveSentimentClassifier.py at main · SkyReNewed/AdaptiveSentimentClassifier · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import torch
import torch.nn as nn
import json
import os

# Initial training data
initialTexts = [
    # Positive (50)
    "i love this movie", "great film amazing", "wonderful experience loved it",
    "best movie ever", "fantastic acting great story", "really enjoyed this",
    "absolutely brilliant film", "loved every minute", "perfect movie night choice",
    "highly recommend this", "amazing cinematography beautiful", "outstanding performance",
    "incredible story telling", "masterpiece of cinema", "thoroughly entertaining watch",
    "exceeded my expectations", "heartwarming and funny", "beautifully crafted film",
    "superb acting throughout", "captivating from start", "delightful and charming",
    "must see movie", "wonderful cast performance", "engaging and thrilling",
    "loved the characters", "excellent screenplay writing", "phenomenal direction style",
    "touching and emotional", "fun entertaining movie", "gripping intense drama",
    "stellar performances overall", "impressive visual effects", "compelling narrative arc",
    "genuinely funny comedy", "moving powerful story", "expertly directed film",
    "flawless execution throughout", "mesmerizing beautiful cinematography", "unforgettable viewing experience",
    "brilliant script writing", "amazing talent displayed", "thoroughly impressed satisfied",
    "pure cinematic joy", "remarkable achievement film", "stunning visual masterpiece",
    "exceptional quality production", "riveting start finish", "absolute delight watch",
    "pure entertainment gold", "magnificent epic film",
    # Negative (50)
    "terrible movie awful", "worst film ever", "hated it completely boring",
    "bad acting poor story", "waste of time", "disappointing and dull",
    "absolutely horrible film", "complete disaster movie", "unwatchable garbage trash",
    "painfully boring slow", "terrible waste money", "awful script writing",
    "dreadful acting throughout", "plot makes no sense", "horrible cinematography terrible",
    "worst movie year", "extremely disappointing film", "total letdown disaster",
    "unbearable to watch", "poorly executed concept", "lazy filmmaking evident",
    "cringeworthy dialogue throughout", "forgettable and bland", "mind numbingly boring",
    "weak plot development", "atrocious special effects", "insufferable characters annoying",
    "predictable and cliche", "lacks any originality", "painful viewing experience",
    "amateurish production quality", "fails on levels", "frustrating to watch",
    "incoherent mess film", "tedious and tiresome", "laughably bad movie",
    "cheap looking production", "nonsensical story plot", "irritating and annoying",
    "sloppy direction editing", "hollow empty film", "regret watching this",
    "skip this one", "not worth time", "complete failure movie",
    "avoid at costs", "utterly forgettable film", "dismal and depressing",
    "horrendous acting script", "trainwreck of film"
]
initialLabels = [1]*50 + [0]*50

class adaptiveSentimentClassifier:
    def __init__(self, confidenceThreshold=0.4, unknownThreshold=0.5, modelDir="model"):
        self.confidenceThreshold = confidenceThreshold
        self.unknownThreshold = unknownThreshold
        self.modelDir = modelDir
        self.vocab = {"<pad>": 0, "<unknown>": 1}
        self.model = None
        self.optimizer = None
        self.criterion = nn.BCEWithLogitsLoss()

        if not self.loadModel():
            self._buildVocab(initialTexts)
            self._buildModel()
            self._initialTrain()

    def _buildVocab(self, texts):
        for text in texts:
            for word in text.lower().split():
                if word not in self.vocab:
                    self.vocab[word] = len(self.vocab)

    def _buildModel(self):
        self.model = LSTMClassifier(len(self.vocab), embedDim=16, hiddenDim=32)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.01)

    def _encode(self, text):
        tokens = [self.vocab.get(w, self.vocab["<unknown>"]) for w in text.lower().split()]
        unknownCount = sum(1 for t in tokens if t == self.vocab["<unknown>"])
        unknownRatio = unknownCount / len(tokens) if tokens else 1.0
        return torch.tensor(tokens), unknownRatio

    def _initialTrain(self, epochs=50):
        self.model.train()
        for _ in range(epochs):
            for text, label in zip(initialTexts, initialLabels):
                encoded, _ = self._encode(text)
                self.optimizer.zero_grad()
                output = self.model(encoded.unsqueeze(0))
                loss = self.criterion(output, torch.tensor(float(label)))
                loss.backward()
                self.optimizer.step()

    def saveModel(self):
        os.makedirs(self.modelDir, exist_ok=True)
        torch.save({
            'modelState': self.model.state_dict(),
            'vocab': self.vocab
        }, f"{self.modelDir}/classifier.pt")

    def loadModel(self):
        path = f"{self.modelDir}/classifier.pt"
        if os.path.exists(path):
            checkpoint = torch.load(path, weights_only=False)
            self.vocab = checkpoint['vocab']
            self._buildModel()
            self.model.load_state_dict(checkpoint['modelState'])
            return True
        return False

    def predictWithConfidence(self, text):
        self.model.eval()
        encoded, unknownRatio = self._encode(text)

        if len(encoded) == 0:
            return None, 0.5, 1.0

        with torch.no_grad():
            output = self.model(encoded.unsqueeze(0))
            prob = torch.sigmoid(output).item()

        confidence = abs(prob - 0.5) * 2
        isUncertain = confidence < self.confidenceThreshold or unknownRatio > self.unknownThreshold

        if isUncertain:
            return None, prob, unknownRatio

        sentiment = "Positive" if prob > 0.5 else "Negative"
        return sentiment, prob, unknownRatio

    def learnFromLabel(self, text, label):
        for word in text.lower().split():
            if word not in self.vocab:
                self.vocab[word] = len(self.vocab)

        oldState = self.model.state_dict()
        self._buildModel()

        newState = self.model.state_dict()
        for key in oldState:
            if key in newState and oldState[key].shape == newState[key].shape:
                newState[key] = oldState[key]
            elif 'embedding' in key:
                oldSize = oldState[key].shape[0]
                newState[key][:oldSize] = oldState[key]
        self.model.load_state_dict(newState)

        self.model.train()
        for _ in range(10):
            encoded, _ = self._encode(text)
            self.optimizer.zero_grad()
            output = self.model(encoded.unsqueeze(0))
            loss = self.criterion(output, torch.tensor(float(label)))
            loss.backward()
            self.optimizer.step()

    def saveLabel(self, text, label):
        filepath = f"{self.modelDir}/newLabels.json"
        os.makedirs(self.modelDir, exist_ok=True)
        data = []
        if os.path.exists(filepath):
            with open(filepath, 'r') as f:
                data = json.load(f)
        data.append({"text": text, "label": label})
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)

class LSTMClassifier(nn.Module):
    def __init__(self, vocabSize, embedDim, hiddenDim):
        super().__init__()
        self.embedding = nn.Embedding(vocabSize, embedDim, padding_idx=0)
        self.lstm = nn.LSTM(embedDim, hiddenDim, batch_first=True)
        self.fc = nn.Linear(hiddenDim, 1)

    def forward(self, x):
        x = self.embedding(x)
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden.squeeze(0)).squeeze()

def interactiveSession():
    print("Loading model...")
    classifier = adaptiveSentimentClassifier()
    print("Ready!\n")

    while True:
        text = input("\nEnter text (or 'quit'): ").strip()
        if text.lower() == 'quit':
            classifier.saveModel()
            print("Model saved.")
            break

        sentiment, prob, unknownRatio = classifier.predictWithConfidence(text)

        if sentiment is None:
            print(f"I'm not sure about: '{text}'")
            print(f"  (confidence: {abs(prob-0.5)*2:.2f}, unknown words: {unknownRatio:.0%})")

            while True:
                labelInput = input("Positive (1), Negative (0), or 'skip': ").strip().lower()
                if labelInput in ['0', '1']:
                    label = int(labelInput)
                    classifier.saveLabel(text, label)
                    classifier.learnFromLabel(text, label)
                    print(f"Learned! '{text}' -> {'Positive' if label else 'Negative'}")
                    break
                elif labelInput == 'skip':
                    print("Skipped.")
                    break
                print("Please enter 0, 1, or 'skip'")
        else:
            print(f"Prediction: {sentiment} (confidence: {prob:.2f})")

if __name__ == "__main__":
    interactiveSession()