#!/bin/bash log='/var/log/apache2/access.log.1' # agents agents="(" agents="${agents}\"Mozilla/[0-9].[0-9] \(Windows NT 6.(1|3); WOW64; rv:[0-9]{1,2}.[0-9]{1,2}\) Gecko/20100101 Firefox/[0-9]{1,2}.[0-9]{1,2}\"" agents="${agents}|\"Mozilla/5.0 \(Linux; Android 5.1; MotoE2\(4G-LTE\) Build/LPI23.29-18.9(-2|)\) AppleWebKit/537.36 \(KHTML, like Gecko\) Chrome/47.0.2526.83 Mobile Safari/537.36\"" agents="${agents}|\"Mozilla/5.0 \(Windows NT 6.(1|3); WOW64\) AppleWebKit/537.36 \(KHTML, like Gecko\) Chrome/4(1|2).0.[0-9]{1,4}.[0-9]{1,4} Safari/[0-9]{1,3}.[0-9]{1,2}\"" agents="${agents}|\"Mozilla/5.0 \((X11; Linux x86_64|Windows NT 6.(1|3); WOW64); rv:[0-9]{1,2}.[0-9]{1,2}\) Gecko/20100101 Firefox/[0-9]{1,2}.[0-9]{1,2}\"" agents="${agents}|\"Mozilla/5.0 \(Windows NT 6.(1|3); Win64; x64; rv:[0-9]{1,2}.[0-9]{1,2}\) Gecko/20100101 Firefox/[0-9]{1,2}.[0-9]{1,2}\"" agents="${agents}|\"Mozilla/5.0 \(Linux; Android 5.1; MotoE2\(4G-LTE\) Build/LPIS23.29-18.9-2\) AppleWebKit/537.36 \(KHTML, like Gecko\) Chrome/50.0.2661.89 Mobile Safari/537.36\"" agents="${agents}|\"Mozilla/5.0 \(Macintosh; Intel Mac OS X 10_10_3\) AppleWebKit/600.5.17 \(KHTML, like Gecko\) Version/8.0.5 Safari/600.5.17\"" agents="${agents})" ### special agents # nagios nagios_agents="${agents}|\"aNag/5.3.1\"" # davdroid davdroid_agents="${agents}|\"DAVdroid/[0-9]{1}.[0-9]{1}(|.[0-9]{1})(|.[0-9]{1}) \([0-9]{4}/[0-9]{2}/[0-9]{2}; dav4android; okhttp3\) Android/6.0.1\"" ### filters # nagios filter="192.168.6.210 (200|302) - somehost.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET / HTTP/1.1\" [0-9]{1,6} \"-\" \"check_http/v2.1.1 \(monitoring-plugins 2.1.1\)\"" # nagios filter="${filter}|(207.66.228.(198|231|236|237|239)|192.168.0.[0-9]{1,3}) (200|304) - nagios.thekeyel.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"(POST|GET) (/cgi-bin/nagios3/(cmd|status|tac|extinfo).cgi.*|/nagios3/(js/jquery-1.7.1.min.js|stylesheets/(common.css|cmd.css|extinfo.css|status.css|tac.css)|images/.*)|/stylesheets/common.css|/images/(sblogo|sflogo|logofullsize|weblogo1).png|/js/jquery-1.7.1.min.js|/(main|side).php) HTTP/1.1\" [0-9]{1,5} \"(https://somehost.com/(cgi-bin/nagios3/(status|tac|cmd|extinfo).cgi.*|(main|side).php|)|-)\" ${nagios_agents}\$" # local lwp filter="${filter}|192.168.6.[0-9]{1,3} 200 - somehost.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET /configs/rev HTTP/1.1\" [0-9]{1,4} \"-\" \"lwp-request/6.03 libwww-perl/6.08\"\$" # local/work (ttrss|pictures) filter="${filter}|(192.168.6.[0-9]{1,3}|192.168.6.(198|232|239)) (200|302) - somehost.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"(GET|POST) /.* HTTP/1.1\" [0-9]{1,10} \"https://somehost.com/.*\" ${agents}\$" # from ttrss filter="${filter}|192.168.6.83 (200|301) - somehost.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET /\?feed\=rss2 HTTP/1.1\" [0-9]{1,5} \"-\" \"Tiny Tiny RSS/1.15.3.6f06040 \(http://tt-rss.org/\)\"\$" # local/work calendar filter="${filter}|(207.66.228.(198|239)|192.168.0.[0-9]{1,3}) (200|201|204|207) - cal.thekyel.com (-|kyelw) \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"(GET|PUT|REPORT|OPTIONS|PROPFIND|GET) .* HTTP/1.1\" [0-9]{1,10} \"(-|https://cal.thekyel.com/.*)\" ${agents}\$" # todo - home filter="${filter}|192.168.0.[0-9]{1,3} (200|302) - nah.thekyel.com (CaptSpify|-) \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"(POST|GET) /index.php HTTP/1.1\" [0-9]{1,10} \"https://nah.thekyel.com/index.php\" ${agents}\$" # calendar filter="${filter}|(207.66.228.(198|232|239)|192.168.0.[0-9]{1,3}) (200|201|207|204) - cal.thekyel.com (kyelw|-) \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"(GET|PUT|DELETE|PROPFIND|REPORT) /(cache.manifest|cal.php.*) HTTP/1.1\" [0-9]{1,6} \"-\" ${davdroid_agents}\$" # basic home-page request filter="${filter}|[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} (200|401) - (-|kyelw) \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET / HTTP/1.1\" [0-9]{1,10} \"-\" ${agents}\$" # favicon filter="${filter}|[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} 200 - - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET /favicon.ico HTTP/1.1\" [0-9]{1,10} \"-\" ${agents}\$" # internal dummy connection filter="${filter}|127.0.0.1 200 - (cal|code|sec|set|share|web|apache|web1).thekyel.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"OPTIONS \* HTTP/1.0\" 126 \"-\" \"Apache/2.4.25 \(Debian\) SVN/1.9.5 OpenSSL/1.0.2(q|r|s|t|u) (mod_wsgi/4.5.11 Python/2.7 |)\(internal dummy connection\)\"\$" # internal dummy connection, format 2 filter="${filter}|[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} 200 - (cal|code|sec|set|share|web|apache|web1).thekyel.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"OPTIONS \* HTTP/1.0\" [0-9]{1,3} \"-\" \"Apache/2.4.25 \(Debian\) SVN/1.9.5 OpenSSL/1.0.2t \(internal dummy connection\)\"\$" # ical filter="${filter}|(192.168.0.[0-9]{1,3}|207.66.228.(231|234|239|241)) 200 - kyelw \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET /icalclient.php HTTP/1.1\" [0-9]{1,6} \"-\" \"ICSdroid/1.3 \(Android/6.0.1\)\"\$" # music download filter="${filter}|(192.168.0.[0-9]{1,3}|207.66.228.198) (200|301) - music.thekyel.com (-|kyelw|CaptSpify) \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"(GET|POST) /download.php HTTP/1.1\" [0-9]{1,4} \"(https://music.thekyel.com/download.php|-)\" ${agents}\$" # shared filter="${filter}|192.168.(2|4).[0-9]{1,3} 200 - (g|s)et.thekyel.com - \[[0-9]{1,2}/[a-zA-Z]{3}/[0-9]{4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} -0[7,8]00\] \"GET /\?file=(|configs_|wifi_|web_report_)(configs|gateway.report|syslog_list|security_pictures|backup_inbox|backup_mail|mysql_backup|zips|shared_cleanup|syslog_backup|camera|youtube|nmap.|catalog|dns|home|ganeti|ra|mysql|ampache|lights|ttrss|piwigo|lucien_test|lucien|reminders|nagios|storage|syslog|mail|web)(|[0-4])(|_heartbeat|.thekyel.com)(|\&value=(free|locked|col_baby_blue|col_orange|allwhite|col_red)) HTTP/1.1\" [0-9]{1,3} \"-\" \"lwp-request/6.03 libwww-perl/6.08\"\$" ttrss_summary="$(awk '{print $1,$4,$7,$8,$12}' ${log} | grep ttrss | sort | uniq -c | awk '{print $1}' | awk -F: '{total+=$1} END{print total}')" nagios_summary="$(awk '{print $1,$4,$7,$8,$12}' ${log} | grep nagios | sort | uniq -c | awk '{print $1}' | awk -F: '{total+=$1} END{print total}')" # need to look for errors's errors="$(awk '$2 !~ /(200|301|302|304)/' ${log} | awk '{print $1,$2,$4,$8,$12;}' | sort | uniq -c | sed 's/^\ *//g' | sort -n -r -k 1,2 -k 3,3 -k 4,4)" ### leftovers leftovers=$(grep -ivE "${filter}" "${log}") ### known-connections yandexbot=$(grep -E "Yandex(Bot|Images)" "${log}" | wc -l) ahrefsbot=$(grep 'AhrefsBot' "${log}" | wc -l) CCbot=$(grep 'CCBot' "${log}" | wc -l) baiduspider=$(grep 'Baiduspider' "${log}" | wc -l) bingbot=$(grep 'bingbot' "${log}" | wc -l) googlebot=$(grep 'Googlebot' "${log}" | wc -l) yahoobot=$(grep 'Slurp' "${log}" | wc -l) sogoubot=$(grep 'sogou web spider' "${log}" | wc -l) uptimebot=$(grep 'Uptimebot' "${log}" | wc -l) rogerbot=$(grep 'rogerbot' "${log}" | wc -l) mail_ru_bot=$(grep 'Mail.RU_Bot' "${log}" | wc -l) linkdexbot=$(grep 'linkdexbot' "${log}" | wc -l) netcraftbot=$(grep 'netcraftbot' "${log}" | wc -l) majesticbot=$(grep 'MJ12bot' "${log}" | wc -l) ### IP count # ahrefs ahrefs_filters="\ 164.132.161.[0-9]{1,2}\$|\ 51.255.65.[0-9]{1,2}\$|\ 151.80.31.[0-9]{3}\$" # CC CC_filters="\ 54.92.159.129\$" # mail.ru mailru_filters="\ 217.69.133.(69|227|228|229|230|231|232|233)\$" # msnbot msnbot_filters="\ 178.255.215.79\$" # exabot exabot_filters="\ 157.55.39.(42|99|125|137|164|173|182|193|197)\$|\ 207.46.13.(24|58|101)\$|\ 40.77.167.(6|27|64|80)\$|\ 65.55.213.(244|245)\$" # seznambot seznambot_filters="\ 77.75.76.(166|101)\$" # baidu baidu_filters="\ 180.76.15.[0-9]{1,3}\$|\ 220.181.108.(75|77|81|82|83|85|92|93|101|106|112|143|152|156|163|184|186)\$|\ 123.125.71.[0-9]{1,3}\$" # umich umich_filters="\ 141.212.122.161\$" # google google_filters="\ 66.249.79.[0-9]{1,3}\$|\ 66.249.64.145\$|\ 66.249.75.23\$|\ 66.249.65.[0-9]{1,3}\$|\ 66.249.66.149\$" # yahoo yahoo_filters="\ 68.180.228.(44|225)\$|\ 68.180.229.241\$" # yankex yandex_filters="\ 141.8.143.196\$|\ 100.43.85.2\$|\ 100.43.81.141\$|\ 100.43.91.7\$|\ 199.21.99.203\$|\ 5.255.250.(13|20)\$" # majestic majestic_filters="\ 62.210.170.165\$" # roger roger_filters="\ 209.249.5.249\$|\ 209.249.5.254\$" # sogou sogou_filters="\ 106.120.173.84\$" # netcraft netcraft_filters="\ 45.55.32.243\$|\ 159.203.109.186\$|\ 159.203.120.38\$|\ 104.236.33.193\$|\ 159.203.106.53\$" # local local_filters="192.168.(0|2|4).[0-9]{1,3}" # work work_filters="207.66.228.(198|234|236|237|239)" log_summary=$(awk '{print $1;}' "${log}" | sort | uniq -c | sort -rn) ahrefs_log=$(echo "${log_summary}" | grep -E "${ahrefs_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) CC_log=$(echo "${log_summary}" | grep -E "${CC_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) mailru_log=$(echo "${log_summary}" | grep -E "${mailru_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) exabot_log=$(echo "${log_summary}" | grep -E "${exabot_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) seznambot_log=$(echo "${log_summary}" | grep -E "${seznambot_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) msnbot_log=$(echo "${log_summary}" | grep -E "${msnbot_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) baidu_log=$(echo "${log_summary}" | grep -E "${baidu_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) umich_log=$(echo "${log_summary}" | grep -E "${umich_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) google_log=$(echo "${log_summary}" | grep -E "${google_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) roger_log=$(echo "${log_summary}" | grep -E "${roger_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) uptime_log=$(echo "${log_summary}" | grep -E "${uptime_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) sogou_log=$(echo "${log_summary}" | grep -E "${sogou_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) netcraft_log=$(echo "${log_summary}" | grep -E "${netcraft_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) yahoo_log=$(echo "${log_summary}" | grep -E "${yahoo_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) yandex_log=$(echo "${log_summary}" | grep -E "${yandex_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) majestic_log=$(echo "${log_summary}" | grep -E "${majestic_filters}" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) local_log=$(echo "${log_summary}" | grep -E "\ ${local_filters}\$" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) work_log=$(echo "${log_summary}" | grep -E "\ ${work_filters}\$" | sed 's/^\ *//g' | awk '{print $1;}' | paste -sd+ | bc) ### full echo -e "${header}\n${body}\n\nFiltered Summary:\n-----------------" echo "${leftovers}" | grep -vE "${work_filters}|${local_filters}" | sort -k2