python
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
下载nltk的数据
nltk.download('punkt')
示例数据集
data = [
(I love this place!, positive),
(This is the worst thing I have ever experienced., negative),
(Absolutely fantastic!, positive),
(Terrible service and poor quality., negative),
(I am so happy with my purchase., positive),
(I can't stand this product., negative)
]
分离特征和标签
texts, labels = zip(*data)
创建一个基于词频的向量化器和朴素贝叶斯分类器
model = make_pipeline(CountVectorizer(), MultinomialNB())
测试几个例子
print(predict_sentiment(I really enjoyed this movie!)) 输出: positive
print(predict_sentiment(This is absolutely terrible.)) 输出: negative
print(predict_sentiment(I am not sure how I feel about it.)) 可能输出: negative 或 positive