python中scikit-learn机器代码实例


Posted in Python onAugust 05, 2018

我们给大家带来了关于学习python中scikit-learn机器代码的相关具体实例,以下就是全部代码内容:

# -*- coding: utf-8 -*-
 
import numpy
from sklearn import metrics
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn import linear_model
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn import cross_validation
from sklearn import preprocessing
#import iris_data
 
def load_data():
  iris = load_iris()
  x, y = iris.data, iris.target
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
  return x_train,y_train,x_test,y_test
 
def train_clf3(train_data, train_tags):
  clf = LinearSVC(C=1100.0)#default with 'rbf' 
  clf.fit(train_data,train_tags)
  return clf
 
def train_clf(train_data, train_tags):
  clf = MultinomialNB(alpha=0.01)
  print numpy.asarray(train_tags)
  clf.fit(train_data, numpy.asarray(train_tags))
  return clf
 
def evaluate(actual, pred):
  m_precision = metrics.precision_score(actual, pred)
  m_recall = metrics.recall_score(actual, pred)
  print 'precision:{0:.3f}'.format(m_precision)
  print 'recall:{0:0.3f}'.format(m_recall)
  print 'f1-score:{0:.8f}'.format(metrics.f1_score(actual,pred));
 
x_train,y_train,x_test,y_test = load_data()
 
clf = train_clf(x_train, y_train)
 
pred = clf.predict(x_test)
evaluate(numpy.asarray(y_test), pred)
print metrics.classification_report(y_test, pred)
 
 
使用自定义数据
# coding: utf-8
 
import numpy
from sklearn import metrics
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
import codecs
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn import linear_model
 
train_corpus = [
   '我们 我们 好孩子 认证 。 就是',
   '我们 好孩子 认证 。 中国',
   '我们 好孩子 认证 。 孤独',
   '我们 好孩子 认证 。',
 ]
 
test_corpus = [
   '我 菲律宾 韩国',
   '我们 好孩子 认证 。 中国',
 ]
 
def input_data(train_file, test_file):
  train_words = []
  train_tags = []
  test_words = []
  test_tags = []
  f1 = codecs.open(train_file,'r','utf-8','ignore')
  for line in f1:
    tks = line.split(':', 1)
    word_list = tks[1]
    word_array = word_list[1:(len(word_list)-3)].split(", ")
    train_words.append(" ".join(word_array))
    train_tags.append(tks[0])
  f2 = codecs.open(test_file,'r','utf-8','ignore')
  for line in f2:
    tks = line.split(':', 1)
    word_list = tks[1]
    word_array = word_list[1:(len(word_list)-3)].split(", ")
    test_words.append(" ".join(word_array))
    test_tags.append(tks[0])
  return train_words, train_tags, test_words, test_tags
 
 
def vectorize(train_words, test_words):
  #v = HashingVectorizer(n_features=25000, non_negative=True)
  v = HashingVectorizer(non_negative=True)
  #v = CountVectorizer(min_df=1)
  train_data = v.fit_transform(train_words)
  test_data = v.fit_transform(test_words)
  return train_data, test_data
 
def vectorize1(train_words, test_words):
  tv = TfidfVectorizer(sublinear_tf = False,use_idf=True);
  train_data = tv.fit_transform(train_words);
  tv2 = TfidfVectorizer(vocabulary = tv.vocabulary_);
  test_data = tv2.fit_transform(test_words);
  return train_data, test_data
  
def vectorize2(train_words, test_words):
  count_v1= CountVectorizer(stop_words = 'english', max_df = 0.5); 
  counts_train = count_v1.fit_transform(train_words); 
   
  count_v2 = CountVectorizer(vocabulary=count_v1.vocabulary_);
  counts_test = count_v2.fit_transform(test_words);
   
  tfidftransformer = TfidfTransformer();
   
  train_data = tfidftransformer.fit(counts_train).transform(counts_train); 
  test_data = tfidftransformer.fit(counts_test).transform(counts_test);
  return train_data, test_data
 
def evaluate(actual, pred):
  m_precision = metrics.precision_score(actual, pred)
  m_recall = metrics.recall_score(actual, pred)
  print 'precision:{0:.3f}'.format(m_precision)
  print 'recall:{0:0.3f}'.format(m_recall)
  print 'f1-score:{0:.8f}'.format(metrics.f1_score(actual,pred));
 
 
def train_clf(train_data, train_tags):
  clf = MultinomialNB(alpha=0.01)
  clf.fit(train_data, numpy.asarray(train_tags))
  return clf
 
 
def train_clf1(train_data, train_tags):
  #KNN Classifier
  clf = KNeighborsClassifier()#default with k=5 
  clf.fit(train_data, numpy.asarray(train_tags)) 
  return clf
 
def train_clf2(train_data, train_tags):
  clf = linear_model.LogisticRegression(C=1e5) 
  clf.fit(train_data,train_tags)
  return clf
 
def train_clf3(train_data, train_tags):
  clf = LinearSVC(C=1100.0)#default with 'rbf' 
  clf.fit(train_data,train_tags)
  return clf
 
def train_clf4(train_data, train_tags):
  """
  随机森林,不可使用稀疏矩阵
  """
  clf = RandomForestClassifier(n_estimators=10)
  clf.fit(train_data.todense(),train_tags)
  return clf
 
#使用codecs逐行读取
def codecs_read_label_line(filename):
  label_list=[]
  f = codecs.open(filename,'r','utf-8','ignore')
  line = f.readline()
  while line:
    #label_list.append(line[0:len(line)-2])
    label_list.append(line[0:len(line)-1])
    line = f.readline()
  f.close()
  return label_list
 
def save_test_features(test_url, test_label):
  test_feature_list = codecs_read_label_line('test.dat')
  fw = open('test_labeded.dat',"w+")
  
  for (url,label) in zip(test_feature_list,test_label):
    fw.write(url+'\t'+label)
    fw.write('\n')
  fw.close()
 
def main():
  train_file = u'..\\file\\py_train.txt'
  test_file = u'..\\file\\py_test.txt'
  train_words, train_tags, test_words, test_tags = input_data(train_file, test_file)
  #print len(train_words), len(train_tags), len(test_words), len(test_words), 
  
  train_data, test_data = vectorize1(train_words, test_words)
  print type(train_data)
  print train_data.shape
  print test_data.shape
  print test_data[0].shape
  print numpy.asarray(test_data[0])
  
  clf = train_clf3(train_data, train_tags)
  
  scores = cross_validation.cross_val_score(
  clf, train_data, train_tags, cv=5, scoring="f1_weighted")
  print scores
 
  #predicted = cross_validation.cross_val_predict(clf, train_data,train_tags, cv=5)  
  '''
  
  '''
  pred = clf.predict(test_data)
  error_list=[]
  for (true_tag,predict_tag) in zip(test_tags,pred):
    if true_tag != predict_tag:
      print true_tag,predict_tag
      error_list.append(true_tag+' '+predict_tag)
  print len(error_list)
  evaluate(numpy.asarray(test_tags), pred)
  '''
  #输出打标签结果
  test_feature_list = codecs_read_label_line('test.dat')
  save_test_features(test_feature_list, pred)
  '''
  
 
if __name__ == '__main__':
  main()
Python 相关文章推荐
Python原始字符串(raw strings)用法实例
Oct 13 Python
Python使用dis模块把Python反编译为字节码的用法详解
Jun 14 Python
django使用图片延时加载引起后台404错误
Apr 18 Python
Python利用operator模块实现对象的多级排序详解
May 09 Python
python做量化投资系列之比特币初始配置
Jan 23 Python
利用Python进行数据可视化常见的9种方法!超实用!
Jul 11 Python
Python修改文件往指定行插入内容的实例
Jan 30 Python
Django实现学生管理系统
Feb 26 Python
python实现从ftp上下载文件的实例方法
Jul 19 Python
Pycharm安装python库的方法
Nov 24 Python
Python实现疫情地图可视化
Feb 05 Python
Python Matplotlib绘制两个Y轴图像
Apr 13 Python
解决使用pycharm提交代码时冲突之后文件丢失找回的方法
Aug 05 #Python
Python字符串、整数、和浮点型数相互转换实例
Aug 04 #Python
python与caffe改变通道顺序的方法
Aug 04 #Python
Python爬虫PyQuery库基本用法入门教程
Aug 04 #Python
python list转矩阵的实例讲解
Aug 04 #Python
Python 生成 -1~1 之间的随机数矩阵方法
Aug 04 #Python
Python爬虫框架scrapy实现downloader_middleware设置proxy代理功能示例
Aug 04 #Python
You might like
PHP变量的定义、可变变量、变量引用、销毁方法
2013/12/20 PHP
PHP使用PDO创建MySQL数据库、表及插入多条数据操作示例
2019/05/30 PHP
Javascript学习笔记1 数据类型
2010/01/11 Javascript
JS上传图片前的限制包括(jpg jpg gif及大小高宽)等
2012/12/19 Javascript
JavaScript在XHTML中的用法详解
2013/04/11 Javascript
nodejs教程之环境安装及运行
2014/11/21 NodeJs
JavaScript实现的一个日期格式化函数分享
2014/12/06 Javascript
js实现可得到不同颜色值的颜色选择器实例
2015/02/28 Javascript
引用jquery框架后出错的解决方法
2016/08/09 Javascript
jQuery实现的选择商品飞入文本框动画效果完整实例
2016/08/10 Javascript
Vuejs第十三篇之组件——杂项
2016/09/09 Javascript
原生js实现手风琴功能(支持横纵向调用)
2017/01/13 Javascript
Bootstrap3下拉菜单的实现
2017/02/22 Javascript
Vue之Watcher源码解析(2)
2017/07/19 Javascript
详解Vue如何支持JSX语法
2017/11/10 Javascript
Vue组件创建和传值的方法
2018/08/17 Javascript
浅析js实现网页截图的两种方式
2019/11/01 Javascript
vue el-tree 默认展开第一个节点的实现代码
2020/05/15 Javascript
js+canvas实现图片格式webp/png/jpeg在线转换
2020/08/22 Javascript
[03:49]DOTA2 2015国际邀请赛中国区预选赛第二日现场百态
2015/05/27 DOTA
Python通过解析网页实现看报程序的方法
2014/08/04 Python
python中enumerate函数遍历元素用法分析
2016/03/11 Python
Centos Python2 升级到Python3的简单实现
2016/06/21 Python
python 调用HBase的简单实例
2016/12/18 Python
Python实现的摇骰子猜大小功能小游戏示例
2017/12/18 Python
django使用xlwt导出excel文件实例代码
2018/02/06 Python
Python学习笔记之读取文件、OS模块、异常处理、with as语法示例
2019/06/04 Python
OpenCV读取与写入图片的实现
2020/10/13 Python
纯CSS绘制漂亮的圆形图案效果
2014/05/07 HTML / CSS
css3实现wifi信号逐渐增强效果实例
2017/08/09 HTML / CSS
印度尼西亚值得信赖的第一家网店:Bhinneka
2018/07/16 全球购物
运动会解说词100字
2014/01/31 职场文书
销售助理岗位职责
2014/02/21 职场文书
学校勤俭节约倡议书
2015/04/29 职场文书
小学总务工作总结
2015/08/13 职场文书
教师节主题班会教案
2015/08/17 职场文书