当前位置：首页 > news >正文

python实现混淆矩阵_Python - 混淆矩阵可视化实现

news 来源：原创 2024/4/20 10:58:01

主要是汇总几种关于多分类问题中的混淆矩阵可视化Python 实现.

最简单的一种是直接在终端打印混淆矩阵结果，如：import sys

def confusion_matrix(gt_labels, pred_labels, num_labels):

from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(gt_labels, pred_labels, labels=range(num_labels))

sys.stdout.write('\n\nConfusion Matrix')

sys.stdout.write('\t'*(num_labels-2)+'| Accuracy')

sys.stdout.write('\n'+'-'*8*(num_labels+1))

sys.stdout.write('\n')

for i in range(len(conf_matrix)):

for j in range(len(conf_matrix[i])):

sys.stdout.write(str(conf_matrix[i][j].astype(np.int))+'\t')

sys.stdout.write('| %3.2f %%' % (conf_matrix[i][i]*100 / conf_matrix[i].sum()))

sys.stdout.write('\n')

sys.stdout.write('Number of test samples: %i \n\n' % conf_matrix.sum())

1. 示例1from sklearn.metrics import confusion_matrix

labels = ['business', 'health']

cm = confusion_matrix(y_test, pred, labels)

print(cm)

fig = plt.figure()

ax = fig.add_subplot(111)

cax = ax.matshow(cm)

plt.title('Confusion matrix of the classifier')

fig.colorbar(cax)

ax.set_xticklabels([''] + labels)

ax.set_yticklabels([''] + labels)

plt.xlabel('Predicted')

plt.ylabel('True')

plt.show()

2. 示例2def plot_confusion_matrix(cm,

target_names,

title='Confusion matrix',

cmap=None,

normalize=True):

"""

given a sklearn confusion matrix (cm), make a nice plot

Arguments

---------

cm: confusion matrix from sklearn.metrics.confusion_matrix

target_names: given classification classes such as [0, 1, 2]

the class names, for example: ['high', 'medium', 'low']

title: the text to display at the top of the matrix

cmap: the gradient of the values displayed from matplotlib.pyplot.cm

see:

http://matplotlib.org/examples/color/colormaps_reference.html

plt.get_cmap('jet') or plt.cm.Blues

normalize: If False, plot the raw numbers

If True, plot the proportions

Usage

-----

plot_confusion_matrix(cm = cm,

normalize = True, # show proportions

target_names = y_labels_vals, # list of classes names

title = best_estimator_name) # title of graph

"""

import matplotlib.pyplot as plt

import numpy as np

import itertools

accuracy = np.trace(cm) / np.sum(cm).astype('float')

misclass = 1 - accuracy

if cmap is None:

cmap = plt.get_cmap('Blues')

plt.figure(figsize=(8, 6))

plt.imshow(cm, interpolation='nearest', cmap=cmap)

plt.title(title)

plt.colorbar()

if target_names is not None:

tick_marks = np.arange(len(target_names))

plt.xticks(tick_marks, target_names, rotation=45)

plt.yticks(tick_marks, target_names)

if normalize:

cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

thresh = cm.max() / 1.5 if normalize else cm.max() / 2

for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):

if normalize:

plt.text(j, i, "{:0.4f}".format(cm[i, j]),

horizontalalignment="center",

color="white" if cm[i, j] > thresh else "black")

else:

plt.text(j, i, "{:,}".format(cm[i, j]),

horizontalalignment="center",

color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()

plt.ylabel('True label')

plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))

plt.show()

可视化图类似于：

3. 示例3import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.metrics import confusion_matrix

def cm_analysis(y_true, y_pred, labels, ymap=None, figsize=(10,10)):

"""

Generate matrix plot of confusion matrix with pretty annotations.

The plot image is saved to disk.

args:

y_true: true label of the data, with shape (nsamples,)

y_pred: prediction of the data, with shape (nsamples,)

filename: filename of figure file to save

labels: string array, name the order of class labels in the confusion matrix.

use `clf.classes_` if using scikit-learn models.

with shape (nclass,).

ymap: dict: any -> string, length == nclass.

if not None, map the labels & ys to more understandable strings.

Caution: original y_true, y_pred and labels must align.

figsize: the size of the figure plotted.

"""

if ymap is not None:

y_pred = [ymap[yi] for yi in y_pred]

y_true = [ymap[yi] for yi in y_true]

labels = [ymap[yi] for yi in labels]

cm = confusion_matrix(y_true, y_pred, labels=labels)

cm_sum = np.sum(cm, axis=1, keepdims=True)

cm_perc = cm / cm_sum.astype(float) * 100

annot = np.empty_like(cm).astype(str)

nrows, ncols = cm.shape

for i in range(nrows):

for j in range(ncols):

c = cm[i, j]

p = cm_perc[i, j]

if i == j:

s = cm_sum[i]

annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)

elif c == 0:

annot[i, j] = ''

else:

annot[i, j] = '%.1f%%\n%d' % (p, c)

cm = pd.DataFrame(cm, index=labels, columns=labels)

cm.index.name = 'Actual'

cm.columns.name = 'Predicted'

fig, ax = plt.subplots(figsize=figsize)

sns.heatmap(cm, annot=annot, fmt='', ax=ax)

#plt.savefig(filename)

plt.show()

cm_analysis(y_test, y_pred, model.classes_, ymap=None, figsize=(10,10))

可视化输出如：

4. 示例4

可视化图如：

5. 示例5import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix

from sklearn.metrics import classification_report

# Define sample labels

true_labels = [2, 0, 0, 2, 4, 4, 1, 0, 3, 3, 3]

pred_labels = [2, 1, 0, 2, 4, 3, 1, 0, 1, 3, 3]

# Create confusion matrix

confusion_mat = confusion_matrix(true_labels, pred_labels)

# Visualize confusion matrix

plt.imshow(confusion_mat, interpolation='nearest', cmap=plt.cm.gray)

plt.title('Confusion matrix')

plt.colorbar()

ticks = np.arange(5)

plt.xticks(ticks, ticks)

plt.yticks(ticks, ticks)

plt.ylabel('True labels')

plt.xlabel('Predicted labels')

plt.show()

# Classification report

targets = ['Class-0', 'Class-1', 'Class-2', 'Class-3', 'Class-4']

print('\n', classification_report(true_labels, pred_labels, target_names=targets))

6. 示例6from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

import numpy as np

def plot_confusion_matrix(cm, savename=None, title='Confusion Matrix'):

plt.figure(figsize=(12, 8), dpi=100)

np.set_printoptions(precision=2)

# 在混淆矩阵中每格的概率值

ind_array = np.arange(len(classes))

x, y = np.meshgrid(ind_array, ind_array)

for x_val, y_val in zip(x.flatten(), y.flatten()):

c = cm[y_val][x_val]

if c > 0.001:

plt.text(x_val, y_val, "%0.2f" % (c,), color='red', fontsize=15, va='center', ha='center')

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.binary)

plt.title(title)

plt.colorbar()

xlocations = np.array(range(len(classes)))

plt.xticks(xlocations, classes, rotation=90)

plt.yticks(xlocations, classes)

plt.ylabel('Actual label')

plt.xlabel('Predict label')

# offset the tick

#tick_marks = np.array(range(len(classes)+1)) - 0.5

tick_marks = np.array(range(len(classes))) + 0.5

plt.gca().set_xticks(tick_marks, minor=True)

plt.gca().set_yticks(tick_marks, minor=True)

plt.gca().xaxis.set_ticks_position('none')

plt.gca().yaxis.set_ticks_position('none')

plt.grid(True, which='minor', linestyle='-')

plt.gcf().subplots_adjust(bottom=0.15)

# show confusion matrix

if savename:

plt.savefig(savename, format='png')

plt.show()

# classes表示不同类别的名称，比如这有6个类别

classes = ['A', 'B', 'C', 'D', 'E', 'F']

random_numbers = np.random.randint(6, size=50) # 6个类别，随机生成50个样本

y_true = random_numbers.copy() # 样本实际标签

random_numbers[:10] = np.random.randint(6, size=10) # 将前10个样本的值进行随机更改

y_pred = random_numbers # 样本预测标签

# 获取混淆矩阵

cm = confusion_matrix(y_true, y_pred)

plot_confusion_matrix(cm, 'confusion_matrix.png', title='confusion matrix')

可视化结果如图：

比如类别A，预测结果和实际标签都为A的有12个样本，把A样本预测为其他类别的有3个样本(同一行的其他样本)，而把其他类别预测为A样本的有1个样本(同一列的其他样本).其他类别也同样这样分析.

7. 示例7 - 中文标注#!--*-- coding=utf-8 --*--

import matplotlib.pyplot as plt

import numpy as np

confusion = np.array(([91,0,0],[0,92,1],[0,0,95]))

# 热度图，后面是指定的颜色块，可设置其他的不同颜色

plt.imshow(confusion, cmap=plt.cm.Blues)

# ticks 坐标轴的坐标点

# label 坐标轴标签说明

indices = range(len(confusion))

# 第一个是迭代对象，表示坐标的显示顺序，第二个参数是坐标轴显示列表

#plt.xticks(indices, [0, 1, 2])

#plt.yticks(indices, [0, 1, 2])

plt.xticks(indices, ['圆形', '三角形', '方形'])

plt.yticks(indices, ['圆形', '三角形', '方形'])

plt.colorbar()

plt.xlabel('预测值')

plt.ylabel('真实值')

plt.title('混淆矩阵')

# plt.rcParams两行是用于解决标签不能显示汉字的问题

plt.rcParams['font.sans-serif']=['SimHei']

plt.rcParams['axes.unicode_minus'] = False

# 显示数据

for first_index in range(len(confusion)): #第几行

for second_index in range(len(confusion[first_index])): #第几列

plt.text(first_index, second_index, confusion[first_index][second_index])

plt.show()

8. 示例8def plot_confusion_matrix(y_true, y_pred, classes,

normalize=False,

title=None,

cmap=plt.cm.Blues):

"""

This function prints and plots the confusion matrix.

Normalization can be applied by setting `normalize=True`.

"""

if not title:

if normalize:

title = 'Normalized confusion matrix'

else:

title = 'Confusion matrix, without normalization'

# Compute confusion matrix

cm = confusion_matrix(y_true, y_pred)

if normalize:

cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Normalized confusion matrix")

else:

print('Confusion matrix, without normalization')

fig, ax = plt.subplots()

im = ax.imshow(cm, interpolation='nearest', cmap=cmap)

ax.figure.colorbar(im, ax=ax)

# show all ticks...

ax.set(xticks=np.arange(cm.shape[1]),

yticks=np.arange(cm.shape[0]),

# ... and label them with the respective list entries

xticklabels=classes, yticklabels=classes,

title=title,

ylabel='True label',

xlabel='Predicted label')

# Rotate the tick labels and set their alignment.

plt.setp(ax.get_xticklabels(), rotation=45, ha="right",

rotation_mode="anchor")

# Loop over data dimensions and create text annotations.

fmt = '.2f' if normalize else 'd'

thresh = cm.max() / 2.

for i in range(cm.shape[0]):

for j in range(cm.shape[1]):

ax.text(j, i, format(cm[i, j], fmt),

ha="center", va="center",

color="white" if cm[i, j] > thresh else "black")

fig.tight_layout()

plt.show()

python虚拟环境中安装diango_ubuntu下的虚拟环境中安装Django的操作方法

sql行列转换_SQL试题六行列转换

python周末吃什么_Python学员作品-今天吃什么

python bootstrap container宽度_边做边学，PythonDjango实战教程-08-使用Bootstrap样式

mac 思科链路聚合_思科实验：STP生成树实验

python案例教程钱毅湘_Python案例教程清华大学出版社钱毅湘等高等学校通识教育系列教材软件工具程序设计Python...

python赋值法例子_Python学习笔记第五天

边缘计算架构_KubeEdge架构解读：云原生的边缘计算平台

网络摄像头监控软件_一个交换机能带动多少个网络监控摄像头？

unity3d显示c4d材质_学习笔记分享如何学好C4D

数据窗口中的ole控件 pb_工作表数据与UserForm窗口的交互过程中如何实现数据的精确查找...

商业方向的大数据专业_数学专业大数据方向的本科生，考研时该选择大数据还是管理...

高德地图自动生成轨迹_揭秘高德高精度地图，无人驾驶就靠它了

python反序列化总结_Python 反序列化安全问题（二）

python可以用于工业机器人编程与操作_如何实现工业机器人编程抓取

【跃迁之路】【477天】刻意练习系列236（2018.05.28）

C++11: atomic 头文件

js数组之filter

关于使用markdown的方法（引自CSDN教程）

汉诺塔算法

猫头鹰的深夜翻译：Java 2D Graphics, 简单的仿射变换

如何在GitHub上创建个人博客

吐槽Javascript系列二：数组中的splice和slice方法

想写好前端，先练好内功

一道面试题引发的“血案”

一文看透浏览器架构

linux 淘宝开源监控工具tsar

MySQL主从复制一致性检测

学习一下，什么是预包装食品？

#android不同版本废弃api，新api。

（k8s中）docker netty OOM问题记录

（Redis使用系列） Springboot 整合Redisson 实现分布式锁七

（含react-draggable库以及相关BUG如何解决）固定在左上方某盒子内（如按钮）添加可拖动功能，使用react hook语法实现

（机器学习-深度学习快速入门）第一章第一节：Python环境和数据分析

（蓝桥杯每日一题）平方末尾及补充（常用的字符串函数功能）

（四）模仿学习-完成后台管理页面查询

（学习日记）2024.04.04：UCOSIII第三十二节：计数信号量实验

(转载)深入super，看Python如何解决钻石继承难题

.describe() python_Python-Win32com-Excel

.Net Attribute详解(上)-Attribute本质以及一个简单示例

.net core使用RPC方式进行高效的HTTP服务访问

.NET和.COM和.CN域名区别

.net连接MySQL的方法

/使用匿名内部类来复写Handler当中的handlerMessage()方法

@Pointcut 使用

[20171102]视图v$session中process字段含义

[ARM]ldr 和 adr 伪指令的区别

[AUTOSAR][诊断管理][ECU][$37] 请求退出传输。终止数据传输的（上传/下载）

[AutoSar]BSW_Memory_Stack_004 创建一个简单NV block并调试

[Big Data - Kafka] kafka学习笔记：知识点整理

[C# 开发技巧]实现属于自己的截图工具

[C#]扩展方法

[C/C++]数据结构深入挖掘环形链表问题

[HCTF 2018]WarmUp （代码审计）

[HXPCTF 2021]includer‘s revenge

相关文章：