【机器学习实战】使用sklearn中的朴素贝叶斯方法实现新闻文本分类
1. 数据集2. 实现2.1 代码from sklearn.datasets import fetch_20newsgroupsfrom sklearn.model_selection import train_test_splitfrom sklearn.feature_extraction.text import TfidfVectorizerfrom sklearn.naive_bayes
·
1. 数据集

2. 实现
2.1 代码
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
def bayesian_demo():
'''
朴素贝叶斯-文本分类
:return:
'''
# 1. 获取数据
news = fetch_20newsgroups(subset='all')
# 2. 划分数据集
x_train, x_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.2)
# 3. 特征工程
transfer = TfidfVectorizer()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4. 朴素贝叶斯算法预估器流程
estimator = MultinomialNB()
estimator.fit(x_train, y_train)
# 5. 模型评估
# 5.1 直接对比真实值、预测值
y_predict = estimator.predict(x_test)
print('y_predic:\n', y_predict)
print('直接对比真实值与预测值:\n', y_test == y_predict)
# 5.2 计算准确率
score = estimator.score(x_test,y_test)
print('准确率为:\n', score)
if __name__ == '__main__':
bayesian_demo()
2.2 结果

更多推荐



所有评论(0)