#!/bin/bash

#function:check band from squid log

#auther:zhanglejie

#date:2014/7/3

#清理文件

mkdir /tmp/domain &> /dev/null

rm -f  /tmp/domain/domain_all.log

rm -f  /tmp/domain/pbl_report`date +%Y%m%d`.csv

rm -f  /tmp/domain/domain_name.log

#rm -f /tmp/domain/domain_name_tmp.log

#过滤日志

zcat $1|awk '$4~/200/ &&  $7!~/\?/{print $5" "$7" "$9}' |awk -F "/" '{print $3" "$0}'| sed -e 's/DIRECT\///' -e 's/NONE\///'> /tmp/domain/domain_all.log

#zcat $1|awk '$4~/200/ &&  $7!~/\?/{print $5" "$7" "$9}' > /tmp/domain/domain_all.log

#/usr/bin/split -b 1500000 /tmp/domain/domain_all.log

#整理总日志

#function out_tmp()  

#{

#while read line

#do

##d1 size; d2 url; d3 ip; d4 domain

# d1=`echo $line |awk  '{print $1}'`

# d2=`echo $line |awk  '{print $2}'`

# d3=`echo $line |awk  '{print $3}'|awk -F "/" '{print $2}'`

# d4=`echo $line |awk  '{print $2}'|awk -F "/" '{print $3}'`

 #if [ $d3 == "-" ]

 #then

 #d3=`/usr/bin/nslookup $d4|grep "Address"|tail -n 1|awk -F ":" '{print $2}'`

 #fi

# echo "$d4 $d1 $d2 $d3" >> /tmp/domain/domain_name_tmp.log

#done < $1

#}

#for i in `ls /tmp/domain/x*`

#do

#out_tmp $i &

#done

#检查并发是否结束

#pid_num=0

#while [ ! $pid_num -eq 1 ]

#do

#pid_num=`ps aux|grep band_find.sh |wc -l`

#sleep 5

#done

#去重

sort /tmp/domain/domain_all.log|uniq > /tmp/domain/domain_name_all.log

##排名

cat /tmp/domain/domain_name_all.log|awk  '{print $1}'|sort | uniq -c|sort -n > /tmp/domain/domain_sort.log

##分段取

cat -n /tmp/domain/domain_sort.log |awk '$1>=1 && $1<=10 {print $3}'    > /tmp/domain/domain_top10.log

cat -n /tmp/domain/domain_sort.log |awk '$1>=100 && $1<=120 {print $3}' > /tmp/domain/domain_mid20.log

cat -n /tmp/domain/domain_sort.log |awk '$1>=500 && $1<=520 {print $3}' > /tmp/domain/domain_end20.log

###过滤指定大小

cat /tmp/domain/domain_name_all.log|awk  '$2<100000 && $2>50000 {print $0}' > /tmp/domain/domain_size.log

cat /tmp/domain/domain_name_all.log|awk  '$2>100000 || $2<50000 {print $0}' > /tmp/domain/domain_size_other.log

##输出

function out()

{

while read line

do

num=`grep $line $2|wc -l`

 if [ $num -eq 0  ]

 then

 grep $line /tmp/domain/domain_size_other.log|head -n 3 >> /tmp/domain/pbl_report`date +%Y%m%d`.csv

 else

 grep $line $2|head -n 3 >> /tmp/domain/pbl_report`date +%Y%m%d`.csv

 fi

done < $1

echo "**********" >> /tmp/domain/pbl_report`date +%Y%m%d`.csv

}

out /tmp/domain/domain_top10.log /tmp/domain/domain_size.log

out /tmp/domain/domain_mid20.log /tmp/domain/domain_size.log

out /tmp/domain/domain_end20.log /tmp/domain/domain_size.log


echo "Report is here /tmp/domain/pbl_report`date +%Y%m%d`.csv"