当前位置: 首页 > news >正文

使用isolation forest进行dns网络流量异常检测

代码如下,测试发现,是否对输入数据进行归一化/标准化对于结果没有影响:

import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler


def parse_line(s):
    s = s.replace("u'", "").replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "")
    s2 = s.split(",")
    dat = [float(_) for _ in s2[1:]]
    return (s2[0], dat)


def get_data():
    with open("feature.dat") as f:
      lines = f.readlines()
      return [parse_line(line) for line in lines]


def train(collected_data):
    input_data = [c[1] for c in collected_data]
    #scaler = StandardScaler().fit(input_data)
    #input_data = scaler.transform(input_data)


    #min_max_scaler = MinMaxScaler()
    #input_data = min_max_scaler.fit_transform(input_data)
    #print input_data

    rng = np.random.RandomState(42)
    #clf = IsolationForest(max_samples=10*2, random_state=rng)
    #clf = IsolationForest(max_features=5)
    clf = IsolationForest(max_samples="auto", random_state=rng)
    clf.fit(input_data)
    pred_y = clf.predict(input_data)

    bad_domains = set()
    for i,y in enumerate(pred_y):
        if y == -1:
            print "bad domains:", collected_data[i]
            bad_domains.add(collected_data[i][0])


if __name__ == "__main__":
    dat = get_data()
    train(dat)

输出样例:

bad domains: ('openvpn.', [81.0, 5.0, 3.0, 14.0, 0.1728395061728395, 27.493827160493826, 32.76543209876543, 3.2857142857142856, 18.214285714285715, 3.0714285714285716, 3.255427209766844, 0.04938271604938271, 0.0, 0.3950617283950617, 0.12345679012345678, 0.00224517287831163])
bad domains: ('mobily.com.sa', [16.0, 1.0, 4.0, 12.0, 0.75, 47.3125, 108.8125, 1.0, 5.333333333333333, 0.0, 1.9166666666666667, 0.6875, 0.0, 0.375, 0.375, 0.0066050198150594455])
bad domains: ('vcl2728.com', [40.0, 2.0, 10.0, 27.0, 0.675, 67.125, 462.85, 3.3333333333333335, 28.555555555555557, 3.3703703703703702, 3.111111111111111, 0.025, 0.0, 0.0, 0.0, 0.00186219739292365])
bad domains: ('vkcache.com', [598.0, 1.0, 2.0, 528.0, 0.882943143812709, 47.0, 161.65886287625418, 1.0, 6.0, 0.005681818181818182, 2.453875312427234, 0.22909698996655517, 0.0, 0.11371237458193979, 0.0033444816053511705, 0.00017789795773144525])
bad domains: ('nsconcreteblock.info', [18.0, 2.0, 4.0, 18.0, 1.0, 87.0, 43.5, 1.0, 37.0, 5.0, 3.823329582775343, 1.0, 0.0, 0.0, 0.0, 0.0031928480204342275])
bad domains: ('topcdn.org', [52.0, 2.0, 4.0, 13.0, 0.25, 80.92307692307692, 56.38461538461539, 1.0, 40.92307692307692, 0.0, 4.176988788169356, 0.5, 0.0, 0.28846153846153844, 0.21153846153846154, 0.001188212927756654])
bad domains: ('bilibiligame.net', [6472.0, 165.0, 17.0, 32.0, 0.004944375772558714, 46.542954264524106, 88.28522867737948, 1.0, 18.65625, 2.84375, 3.4818361348887463, 0.9610630407911002, 0.0, 0.2376390605686032, 0.0004635352286773795, 1.659883277007961e-05])
bad domains: ('vip.', [2183.0, 386.0, 30.0, 32.0, 0.014658726523133303, 34.78515803939533, 23.834631241410904, 1.9375, 9.6875, 0.0, 2.83937270784057, 0.9436555199267064, 0.0, 0.09894640403114979, 0.011452130096197893, 6.58449220396123e-05])
bad domains: ('ixigua.com', [2707.0, 133.0, 29.0, 17.0, 0.006280014776505356, 33.71222755818249, 123.10749907646841, 1.0, 4.647058823529412, 0.8823529411764706, 1.9781718484300252, 0.9759881787957149, 0.0, 0.28075360177318065, 0.01699298115995567, 5.478911668986072e-05])
bad domains: ('expressvpn.', [890.0, 31.0, 36.0, 165.0, 0.1853932584269663, 41.89887640449438, 0.0, 1.0363636363636364, 11.224242424242425, 0.05454545454545454, 3.0592421535372565, 0.5325842696629214, 0.0, 0.0, 0.0, 0.00013408420488066506])

 


输入数据样例(已经提取了特征):

(u'abfxsc.com', (24, 1, 4, 11, 0.4583333333333333, 48.0, 56.041666666666664, 1.0, 8.0, 0.0, 3.0, 0.5, 0.0, 0.20833333333333334, 0.08333333333333333, 0.004340277777777778))
(u'dqdkws.cn', (71, 2, 7, 50, 0.704225352112676, 45.0, 79.859154929577471, 1.0, 6.0, 0.0, 2.4132632507067329, 0.5915492957746479, 0.0, 0.0, 0.0, 0.0015649452269170579))
(u'tcdnvod.com', (701, 51, 17, 40, 0.05706134094151213, 55.266761768901567, 56.370898716119832, 3.1749999999999998, 17.399999999999999, 0.125, 3.4810606143066232, 0.9714693295292439, 0.0, 0.39514978601997147, 0.0442225392296719, 0.00012905890248309329))
(u'0937jyg.com', (68, 4, 7, 19, 0.27941176470588236, 46.25, 67.529411764705884, 1.0, 5.3684210526315788, 0.0, 2.2469056830015672, 0.6323529411764706, 0.0, 0.0, 0.0, 0.001589825119236884))
(u'jcloud-cdn.com', (61, 3, 3, 11, 0.18032786885245902, 67.278688524590166, 66.311475409836063, 4.5454545454545459, 24.363636363636363, 0.18181818181818182, 3.5244668708659161, 0.4262295081967213, 0.0, 0.08196721311475409, 0.03278688524590164, 0.0012183235867446393))
(u'omacloud.com', (545, 8, 20, 29, 0.05321100917431193, 46.315596330275227, 30.722935779816513, 1.9655172413793103, 17.793103448275861, 0.0, 3.3836270422458083, 1.0, 0.0, 0.10825688073394496, 0.022018348623853212, 0.00019808256081134618))
(u'serverss.top', (144, 1, 15, 22, 0.1527777777777778, 46.604166666666664, 50.145833333333336, 1.0, 4.5909090909090908, 0.0, 2.1594720075625, 0.5277777777777778, 0.0, 0.2777777777777778, 0.06944444444444445, 0.00074504544777231408))
(u'ctripgslb.com', (601, 9, 10, 34, 0.056572379367720464, 60.512479201331118, 157.12479201331115, 3.0588235294117645, 17.911764705882351, 0.91176470588235292, 3.3912394967901913, 0.8585690515806988, 0.0, 0.3594009983361065, 0.016638935108153077, 0.00013748350197976243))
(u'kas-labs.com', (54, 2, 8, 15, 0.2777777777777778, 55.888888888888886, 142.37037037037038, 1.0, 12.466666666666667, 1.6000000000000001, 3.0989151803147923, 0.5, 0.0, 0.09259259259259259, 0.09259259259259259, 0.0016567263088137839))
(u'mccdnglb.com', (365, 4, 6, 21, 0.057534246575342465, 51.161643835616438, 98.161643835616445, 3.5238095238095237, 18.428571428571427, 0.19047619047619047, 3.4116298602195974, 0.989041095890411, 0.0, 0.16164383561643836, 0.01643835616438356, 0.00026775195458926852))
(u'localhost.', (28, 4, 3, 10, 0.35714285714285715, 41.142857142857146, 172.35714285714286, 1.8999999999999999, 10.9, 1.8999999999999999, 2.3999999999999999, 0.14285714285714285, 0.0, 0.0, 0.0, 0.004340277777777778))
(u'xdy-cdn.cn', (473, 5, 2, 50, 0.10570824524312897, 54.780126849894295, 46.545454545454547, 3.0, 14.74, 0.0, 3.1343677127142864, 0.5750528541226215, 0.0, 0.0, 0.0, 0.00019296823742811933))
(u'labkas.com', (24, 2, 6, 10, 0.4166666666666667, 56.666666666666664, 66.833333333333329, 2.0, 17.399999999999999, 1.7, 3.6751008468322333, 0.08333333333333333, 0.0, 0.0, 0.0, 0.0036764705882352941))
(u'site.', (62, 5, 22, 14, 0.22580645161290322, 43.322580645161288, 50.774193548387096, 1.9285714285714286, 11.785714285714286, 0.21428571428571427, 3.0365341332026929, 0.5806451612903226, 0.0, 0.11290322580645161, 0.06451612903225806, 0.0018615040953090098))
(u'ft25882.com', (39, 2, 5, 20, 0.5128205128205128, 49.0, 92.871794871794876, 1.0, 8.0, 0.0, 3.0, 0.5384615384615384, 0.0, 0.3076923076923077, 0.05128205128205128, 0.0026164311878597592))
(u'douyuyuba.com', (232, 4, 7, 115, 0.4956896551724138, 62.650862068965516, 97.504310344827587, 2.0, 21.530434782608694, 0.97391304347826091, 3.4599350912323117, 0.5560344827586207, 0.0, 0.25, 0.008620689655172414, 0.00034399724802201581))
(u'win.', (334, 7, 39, 23, 0.0688622754491018, 42.604790419161674, 60.008982035928142, 1.8695652173913044, 13.217391304347826, 0.21739130434782608, 2.9398183078690807, 0.7904191616766467, 0.0, 0.3772455089820359, 0.041916167664670656, 0.00035137034434293746))
(u'affise.com', (73, 3, 10, 10, 0.136986301369863, 49.246575342465754, 146.56164383561645, 1.0, 8.5, 0.0, 2.5368841208873407, 0.6027397260273972, 0.0, 0.273972602739726, 0.0547945205479452, 0.0013908205841446453))
(u'stripcdn.com', (46, 3, 8, 17, 0.3695652173913043, 44.043478260869563, 160.54347826086956, 1.0, 3.8823529411764706, 0.52941176470588236, 1.8718920798583554, 0.391304347826087, 0.0, 0.10869565217391304, 0.10869565217391304, 0.0024679170779861796))
(u'doonoo.cn', (198, 1, 8, 19, 0.09595959595959595, 42.111111111111114, 66.060606060606062, 1.0, 3.1052631578947367, 0.0, 1.6286506585399816, 0.5, 0.0, 0.2222222222222222, 0.025252525252525252, 0.00059966418805468941))
(u'nii.ac.jp', (34, 3, 8, 16, 0.47058823529411764, 43.029411764705884, 34.529411764705884, 1.3125, 7.3125, 0.1875, 2.4667777025215347, 0.4411764705882353, 0.0, 0.08823529411764706, 0.08823529411764706, 0.0034176349965823649))
(u'78dm.net', (41, 5, 6, 11, 0.2682926829268293, 39.146341463414636, 66.634146341463421, 1.0, 3.3636363636363638, 0.18181818181818182, 1.3510446035661767, 0.7317073170731707, 0.0, 0.3170731707317073, 0.04878048780487805, 0.0031152647975077881))
(u'gosuncdn.com', (587, 5, 36, 40, 0.06814310051107325, 53.325383304940374, 204.61328790459967, 3.25, 15.699999999999999, 0.0, 3.3370338393801235, 0.5724020442930153, 0.0, 0.09540034071550256, 0.010221465076660987, 0.00015973420228739378))
(u'gfnormal04aj.com', (68, 2, 2, 33, 0.4852941176470588, 62.0, 58.970588235294116, 1.0, 16.0, 0.0, 3.4444634232339926, 0.5147058823529411, 0.0, 0.25, 0.058823529411764705, 0.0011859582542694497))
(u'mediatoday.co.kr', (13, 1, 3, 12, 0.9230769230769231, 50.46153846153846, 100.61538461538461, 1.0, 4.583333333333333, 0.0, 1.7623953076615158, 1.0, 0.0, 0.23076923076923078, 0.23076923076923078, 0.007621951219512195))
(u'qinsx.cn', (127, 4, 8, 14, 0.11023622047244094, 29.811023622047244, 51.362204724409452, 1.0, 1.9285714285714286, 0.0, 0.9285714285714286, 0.5905511811023622, 0.0, 0.30708661417322836, 0.06299212598425197, 0.0013206550449022716))

参考:http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest

相关文章:

  • nginx 开机自动启动
  • Python监控服务器利器--psutil
  • 主线程退出,子线程会退出么?
  • apue读书笔记之apue.h的设置
  • rpm 安装中的问题
  • 编程学习初体验(3. 语言的选择)
  • oracle12C—RMAN表级恢复
  • 黑客的思想
  • RPC协议
  • 命令行工具软件
  • 腾讯云ubuntu安装tensorflow
  • Python垃圾回收机制:gc模块
  • Silverlight Client←→Server数据同步备忘代码
  • Expression Blend实例中文教程(12) - 样式和模板快速入门Style,Template
  • rails实现“事务”的方法
  • [js高手之路]搞清楚面向对象,必须要理解对象在创建过程中的内存表示
  • [nginx文档翻译系列] 控制nginx
  • 【402天】跃迁之路——程序员高效学习方法论探索系列(实验阶段159-2018.03.14)...
  • Akka系列(七):Actor持久化之Akka persistence
  • android 一些 utils
  • exports和module.exports
  • nginx 配置多 域名 + 多 https
  • PhantomJS 安装
  • 对象引论
  • 基于HAProxy的高性能缓存服务器nuster
  • 入职第二天:使用koa搭建node server是种怎样的体验
  • 适配iPhoneX、iPhoneXs、iPhoneXs Max、iPhoneXr 屏幕尺寸及安全区域
  • 体验javascript之美-第五课 匿名函数自执行和闭包是一回事儿吗?
  • 微信公众号开发小记——5.python微信红包
  • 主流的CSS水平和垂直居中技术大全
  • ​sqlite3 --- SQLite 数据库 DB-API 2.0 接口模块​
  • ###STL(标准模板库)
  • #stm32整理(一)flash读写
  • #每天一道面试题# 什么是MySQL的回表查询
  • ()、[]、{}、(())、[[]]命令替换
  • (4)logging(日志模块)
  • (板子)A* astar算法,AcWing第k短路+八数码 带注释
  • (附源码)spring boot基于Java的电影院售票与管理系统毕业设计 011449
  • (附源码)ssm经济信息门户网站 毕业设计 141634
  • (深度全面解析)ChatGPT的重大更新给创业者带来了哪些红利机会
  • (十五)devops持续集成开发——jenkins流水线构建策略配置及触发器的使用
  • (一)RocketMQ初步认识
  • ..thread“main“ com.fasterxml.jackson.databind.JsonMappingException: Jackson version is too old 2.3.1
  • .360、.halo勒索病毒的最新威胁:如何恢复您的数据?
  • .helper勒索病毒的最新威胁:如何恢复您的数据?
  • .MyFile@waifu.club.wis.mkp勒索病毒数据怎么处理|数据解密恢复
  • .net core IResultFilter 的 OnResultExecuted和OnResultExecuting的区别
  • .NET Core实战项目之CMS 第十二章 开发篇-Dapper封装CURD及仓储代码生成器实现
  • .Net(C#)自定义WinForm控件之小结篇
  • .NET开发不可不知、不可不用的辅助类(一)
  • .net中的Queue和Stack
  • [ 代码审计篇 ] 代码审计案例详解(一) SQL注入代码审计案例
  • [BUAA软工]第一次博客作业---阅读《构建之法》
  • [BUUCTF]-PWN:[极客大挑战 2019]Not Bad解析
  • [BZOJ 4598][Sdoi2016]模式字符串