Python SVM(支持向量机)实现方法完整示例


Posted in Python onJune 19, 2018

本文实例讲述了Python SVM(支持向量机)实现方法。分享给大家供大家参考,具体如下:

运行环境

  • Pyhton3
  • numpy(科学计算包)
  • matplotlib(画图所需,不画图可不必)

计算过程

st=>start: 开始
e=>end: 结束
op1=>operation: 读入数据
op2=>operation: 格式化数据
cond=>condition: 是否达到迭代次数
op3=>operation: 寻找超平面分割最小间隔
ccond=>conditon: 数据是否改变
op4=>operation: 输出结果
st->op1->op2->cond
cond(yes)->op4->e
cond(no)->op3

啊,这markdown flow好难用,我决定就画到这吧=。=

输入样例

/* testSet.txt */
3.542485 1.977398 -1
3.018896 2.556416 -1
7.551510 -1.580030 1
2.114999 -0.004466 -1
8.127113 1.274372 1
7.108772 -0.986906 1
8.610639 2.046708 1
2.326297 0.265213 -1
3.634009 1.730537 -1
0.341367 -0.894998 -1
3.125951 0.293251 -1
2.123252 -0.783563 -1
0.887835 -2.797792 -1
7.139979 -2.329896 1
1.696414 -1.212496 -1
8.117032 0.623493 1
8.497162 -0.266649 1
4.658191 3.507396 -1
8.197181 1.545132 1
1.208047 0.213100 -1
1.928486 -0.321870 -1
2.175808 -0.014527 -1
7.886608 0.461755 1
3.223038 -0.552392 -1
3.628502 2.190585 -1
7.407860 -0.121961 1
7.286357 0.251077 1
2.301095 -0.533988 -1
-0.232542 -0.547690 -1
3.457096 -0.082216 -1
3.023938 -0.057392 -1
8.015003 0.885325 1
8.991748 0.923154 1
7.916831 -1.781735 1
7.616862 -0.217958 1
2.450939 0.744967 -1
7.270337 -2.507834 1
1.749721 -0.961902 -1
1.803111 -0.176349 -1
8.804461 3.044301 1
1.231257 -0.568573 -1
2.074915 1.410550 -1
-0.743036 -1.736103 -1
3.536555 3.964960 -1
8.410143 0.025606 1
7.382988 -0.478764 1
6.960661 -0.245353 1
8.234460 0.701868 1
8.168618 -0.903835 1
1.534187 -0.622492 -1
9.229518 2.066088 1
7.886242 0.191813 1
2.893743 -1.643468 -1
1.870457 -1.040420 -1
5.286862 -2.358286 1
6.080573 0.418886 1
2.544314 1.714165 -1
6.016004 -3.753712 1
0.926310 -0.564359 -1
0.870296 -0.109952 -1
2.369345 1.375695 -1
1.363782 -0.254082 -1
7.279460 -0.189572 1
1.896005 0.515080 -1
8.102154 -0.603875 1
2.529893 0.662657 -1
1.963874 -0.365233 -1
8.132048 0.785914 1
8.245938 0.372366 1
6.543888 0.433164 1
-0.236713 -5.766721 -1
8.112593 0.295839 1
9.803425 1.495167 1
1.497407 -0.552916 -1
1.336267 -1.632889 -1
9.205805 -0.586480 1
1.966279 -1.840439 -1
8.398012 1.584918 1
7.239953 -1.764292 1
7.556201 0.241185 1
9.015509 0.345019 1
8.266085 -0.230977 1
8.545620 2.788799 1
9.295969 1.346332 1
2.404234 0.570278 -1
2.037772 0.021919 -1
1.727631 -0.453143 -1
1.979395 -0.050773 -1
8.092288 -1.372433 1
1.667645 0.239204 -1
9.854303 1.365116 1
7.921057 -1.327587 1
8.500757 1.492372 1
1.339746 -0.291183 -1
3.107511 0.758367 -1
2.609525 0.902979 -1
3.263585 1.367898 -1
2.912122 -0.202359 -1
1.731786 0.589096 -1
2.387003 1.573131 -1

代码实现

# -*- coding:utf-8 -*-
#!python3
__author__ = 'Wsine'
from numpy import *
import matplotlib.pyplot as plt
import operator
import time
def loadDataSet(fileName):
  dataMat = []
  labelMat = []
  with open(fileName) as fr:
    for line in fr.readlines():
      lineArr = line.strip().split('\t')
      dataMat.append([float(lineArr[0]), float(lineArr[1])])
      labelMat.append(float(lineArr[2]))
  return dataMat, labelMat
def selectJrand(i, m):
  j = i
  while (j == i):
    j = int(random.uniform(0, m))
  return j
def clipAlpha(aj, H, L):
  if aj > H:
    aj = H
  if L > aj:
    aj = L
  return aj
class optStruct:
  def __init__(self, dataMatIn, classLabels, C, toler):
    self.X = dataMatIn
    self.labelMat = classLabels
    self.C = C
    self.tol = toler
    self.m = shape(dataMatIn)[0]
    self.alphas = mat(zeros((self.m, 1)))
    self.b = 0
    self.eCache = mat(zeros((self.m, 2)))
def calcEk(oS, k):
  fXk = float(multiply(oS.alphas, oS.labelMat).T * (oS.X * oS.X[k, :].T)) + oS.b
  Ek = fXk - float(oS.labelMat[k])
  return Ek
def selectJ(i, oS, Ei):
  maxK = -1
  maxDeltaE = 0
  Ej = 0
  oS.eCache[i] = [1, Ei]
  validEcacheList = nonzero(oS.eCache[:, 0].A)[0]
  if (len(validEcacheList)) > 1:
    for k in validEcacheList:
      if k == i:
        continue
      Ek = calcEk(oS, k)
      deltaE = abs(Ei - Ek)
      if (deltaE > maxDeltaE):
        maxK = k
        maxDeltaE = deltaE
        Ej = Ek
    return maxK, Ej
  else:
    j = selectJrand(i, oS.m)
    Ej = calcEk(oS, j)
  return j, Ej
def updateEk(oS, k):
  Ek = calcEk(oS, k)
  oS.eCache[k] = [1, Ek]
def innerL(i, oS):
  Ei = calcEk(oS, i)
  if ((oS.labelMat[i] * Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i] * Ei > oS.tol) and (oS.alphas[i] > 0)):
    j, Ej = selectJ(i, oS, Ei)
    alphaIold = oS.alphas[i].copy()
    alphaJold = oS.alphas[j].copy()
    if (oS.labelMat[i] != oS.labelMat[j]):
      L = max(0, oS.alphas[j] - oS.alphas[i])
      H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])
    else:
      L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)
      H = min(oS.C, oS.alphas[j] + oS.alphas[i])
    if (L == H):
      # print("L == H")
      return 0
    eta = 2.0 * oS.X[i, :] * oS.X[j, :].T - oS.X[i, :] * oS.X[i, :].T - oS.X[j, :] * oS.X[j, :].T
    if eta >= 0:
      # print("eta >= 0")
      return 0
    oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej) / eta
    oS.alphas[j] = clipAlpha(oS.alphas[j], H, L)
    updateEk(oS, j)
    if (abs(oS.alphas[j] - alphaJold) < 0.00001):
      # print("j not moving enough")
      return 0
    oS.alphas[i] += oS.labelMat[j] * oS.labelMat[i] * (alphaJold - oS.alphas[j])
    updateEk(oS, i)
    b1 = oS.b - Ei - oS.labelMat[i] * (oS.alphas[i] - alphaIold) * oS.X[i, :] * oS.X[i, :].T - oS.labelMat[j] * (oS.alphas[j] - alphaJold) * oS.X[i, :] * oS.X[j, :].T
    b2 = oS.b - Ei - oS.labelMat[i] * (oS.alphas[i] - alphaIold) * oS.X[i, :] * oS.X[j, :].T - oS.labelMat[j] * (oS.alphas[j] - alphaJold) * oS.X[j, :] * oS.X[j, :].T
    if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]):
      oS.b = b1
    elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]):
      oS.b = b2
    else:
      oS.b = (b1 + b2) / 2.0
    return 1
  else:
    return 0
def smoP(dataMatIn, classLabels, C, toler, maxIter, kTup=('lin', 0)):
  """
  输入:数据集, 类别标签, 常数C, 容错率, 最大循环次数
  输出:目标b, 参数alphas
  """
  oS = optStruct(mat(dataMatIn), mat(classLabels).transpose(), C, toler)
  iterr = 0
  entireSet = True
  alphaPairsChanged = 0
  while (iterr < maxIter) and ((alphaPairsChanged > 0) or (entireSet)):
    alphaPairsChanged = 0
    if entireSet:
      for i in range(oS.m):
        alphaPairsChanged += innerL(i, oS)
      # print("fullSet, iter: %d i:%d, pairs changed %d" % (iterr, i, alphaPairsChanged))
      iterr += 1
    else:
      nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0]
      for i in nonBoundIs:
        alphaPairsChanged += innerL(i, oS)
        # print("non-bound, iter: %d i:%d, pairs changed %d" % (iterr, i, alphaPairsChanged))
      iterr += 1
    if entireSet:
      entireSet = False
    elif (alphaPairsChanged == 0):
      entireSet = True
    # print("iteration number: %d" % iterr)
  return oS.b, oS.alphas
def calcWs(alphas, dataArr, classLabels):
  """
  输入:alphas, 数据集, 类别标签
  输出:目标w
  """
  X = mat(dataArr)
  labelMat = mat(classLabels).transpose()
  m, n = shape(X)
  w = zeros((n, 1))
  for i in range(m):
    w += multiply(alphas[i] * labelMat[i], X[i, :].T)
  return w
def plotFeature(dataMat, labelMat, weights, b):
  dataArr = array(dataMat)
  n = shape(dataArr)[0]
  xcord1 = []; ycord1 = []
  xcord2 = []; ycord2 = []
  for i in range(n):
    if int(labelMat[i]) == 1:
      xcord1.append(dataArr[i, 0])
      ycord1.append(dataArr[i, 1])
    else:
      xcord2.append(dataArr[i, 0])
      ycord2.append(dataArr[i, 1])
  fig = plt.figure()
  ax = fig.add_subplot(111)
  ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
  ax.scatter(xcord2, ycord2, s=30, c='green')
  x = arange(2, 7.0, 0.1)
  y = (-b[0, 0] * x) - 10 / linalg.norm(weights)
  ax.plot(x, y)
  plt.xlabel('X1'); plt.ylabel('X2')
  plt.show()
def main():
  trainDataSet, trainLabel = loadDataSet('testSet.txt')
  b, alphas = smoP(trainDataSet, trainLabel, 0.6, 0.0001, 40)
  ws = calcWs(alphas, trainDataSet, trainLabel)
  print("ws = \n", ws)
  print("b = \n", b)
  plotFeature(trainDataSet, trainLabel, ws, b)
if __name__ == '__main__':
  start = time.clock()
  main()
  end = time.clock()
  print('finish all in %s' % str(end - start))

输出样例

ws =
 [[ 0.65307162]
 [-0.17196128]]
b =
 [[-2.89901748]]
finish all in 2.5683854014099112

Python SVM(支持向量机)实现方法完整示例

绘图方面还存在一些bug。

希望本文所述对大家Python程序设计有所帮助。

Python 相关文章推荐
使用python提取html文件中的特定数据的实现代码
Mar 24 Python
python添加模块搜索路径方法
Sep 11 Python
Python3实现发送QQ邮件功能(附件)
Dec 23 Python
Python类的继承用法示例
Jan 31 Python
举例讲解Python常用模块
Mar 08 Python
django中使用POST方法获取POST数据
Aug 20 Python
pytorch 状态字典:state_dict使用详解
Jan 17 Python
Django serializer优化类视图的实现示例
Jul 16 Python
python tqdm实现进度条的示例代码
Nov 10 Python
python空元组在all中返回结果详解
Dec 15 Python
Python pygame实现中国象棋单机版源码
Jun 20 Python
使用pandas生成/读取csv文件的方法实例
Jul 09 Python
Tensorflow使用tfrecord输入数据格式
Jun 19 #Python
Tensorflow 训练自己的数据集将数据直接导入到内存
Jun 19 #Python
python如何爬取个性签名
Jun 19 #Python
详解TensorFlow查看ckpt中变量的几种方法
Jun 19 #Python
TensorFlow 滑动平均的示例代码
Jun 19 #Python
python3个性签名设计实现代码
Jun 19 #Python
TensorFlow 模型载入方法汇总(小结)
Jun 19 #Python
You might like
php获取网页内容方法总结
2008/12/04 PHP
ThinkPHP使用smarty模板引擎的方法
2014/07/01 PHP
php使用wordwrap格式化文本段落的方法
2015/03/17 PHP
PHP中的类型提示(type hinting)功能介绍
2015/07/01 PHP
php验证邮箱和ip地址最简单方法汇总
2015/10/30 PHP
PHP可变变量学习小结
2015/11/29 PHP
PHP排序算法之简单选择排序(Simple Selection Sort)实例分析
2018/04/20 PHP
php接口实现拖拽排序功能
2018/04/23 PHP
php实现简易计算器
2020/08/28 PHP
6个常见的 PHP 安全性攻击实例和阻止方法
2020/12/16 PHP
动态创建的表格单元格中的事件实现代码
2008/12/30 Javascript
JQuery最佳实践之精妙的自定义事件
2010/08/11 Javascript
js 删除数组的几种方法小结
2014/02/21 Javascript
js脚本实现数据去重
2014/11/27 Javascript
javascript实现动态改变层大小的方法
2015/05/14 Javascript
Javascript函数的参数
2015/07/16 Javascript
js window对象属性和方法相关资料整理
2015/11/11 Javascript
js中 计算两个日期间的工作日的简单实例
2016/08/08 Javascript
Vue实现调节窗口大小时触发事件动态调节更新组件尺寸的方法
2018/09/15 Javascript
深入理解JavaScript的值传递和引用传递
2018/10/24 Javascript
微信小程序的tab选项卡的实现效果
2019/05/15 Javascript
vue实现页面滚动到底部刷新
2019/08/16 Javascript
Python中一行和多行import模块问题
2018/04/01 Python
Python使用scipy模块实现一维卷积运算示例
2019/09/05 Python
Python爬虫 urllib2的使用方法详解
2019/09/23 Python
Python 继承,重写,super()调用父类方法操作示例
2019/09/29 Python
Pycharm连接远程服务器过程图解
2020/04/30 Python
scrapy-splash简单使用详解
2021/02/21 Python
服装公司总经理岗位职责
2013/11/30 职场文书
初婚未育未抱养证明
2014/01/12 职场文书
经贸日语专业个人求职信范文
2014/04/29 职场文书
保证书格式
2015/01/16 职场文书
销售经理工作检讨书
2015/02/19 职场文书
中国式结婚:司仪主持词(范文)
2019/07/25 职场文书
销区经理年终述职报告模板
2019/11/28 职场文书
Kubernetes中Deployment的升级与回滚
2022/04/01 Servers