import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import dateutil.parser
from dateutil.parser import parse
from datetime import timedelta
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import re
import datetime
lines = []
filepath = 'firewall.log'
with open(filepath) as fp:
for cnt, line in enumerate(fp):
cleaned_list = []
results = re.split('\W+ ]?', line)
date = results[0] + " " + results[1][0:-14]
cleaned_list.append(date)
ms = results[2][1:]
cleaned_list.append(ms)
block = results[3][1:]
cleaned_list.append(block)
incoming = results[4]
cleaned_list.append(incoming)
key_results = results[-1]
find_MAC = re.search("(MAC=(\d|\w|\:)*)", key_results)
cleaned_MAC = (key_results[find_MAC.span()[0]:find_MAC.span()[1]])[4:]
cleaned_list.append(cleaned_MAC)
find_SRCip = re.search("(SRC=(\d|\.)*)", key_results)
cleaned_SRCip = (key_results[find_SRCip.span()[0]:find_SRCip.span()[1]])[4:]
cleaned_list.append(cleaned_SRCip)
find_DPT = re.search("(DPT=\d*)", key_results)
cleaned_DPT = (key_results[find_DPT.span()[0]:find_DPT.span()[1]])[4:]
cleaned_list.append(cleaned_DPT)
find_proto = re.search("(PROTO=\w*)", key_results)
cleaned_proto = (key_results[find_proto.span()[0]:find_proto.span()[1]])[6:]
cleaned_list.append(cleaned_proto)
print(cleaned_list)
lines.append(cleaned_list)
print(lines)
df = pd.DataFrame(lines, columns=["date", "ms", "ufw", "IN","mac_address", "source_ip", "DPT", "protocol"])
df
df["source_ip"].value_counts().head(10)
The top IP addresses represent:
df["DPT"].value_counts().head(10)
df["protocol"].value_counts()
def clean_date(date):
dt = datetime.datetime.strptime("2019 "+date, '%Y %b %d %H:%M:%S')
dt = dt - datetime.timedelta(hours=5)
return dt
# test
clean_date("Nov 3 06:27:48")
df['datetime'] = df['date'].apply(clean_date)
df
df.index = df['datetime']
df
plt.figure(figsize=(15,5))
ax = plt.subplot(1, 1, 1)
df['ms'].resample('H').count().plot(ax=ax, legend=False)
plt.savefig("hourly_hits.png", transparent=True)
plt.show()
plt.figure(figsize=(15,5))
ax = plt.subplot(1, 1, 1)
only_udp = df[df['protocol'] == 'UDP']
only_tcp = df[df['protocol'] == 'TCP']
only_udp['ms'].resample('H').count().plot(ax=ax, color='green')
only_tcp['ms'].resample('H').count().plot(ax=ax, color='red')
plt.savefig("hourly_hits_by_protocol.png", transparent=True)
plt.show()