I've occasionally had need to closely monitor a computer for network connectivity, either watching for brief interruptions or watching it over a long period of time such as overnight or over a week, with the ability to keep track of the exact times and durations of the failures.
I ran into this again this week with what we are currently attributing to a "very cranky layer 3 switch", which was causing a specific server to randomly drop incoming packets for brief durations throughout the day.
I thought some of you here may find the tool I made useful for diagnosing networking issues. (particularly watching wireless connectivity) In any event, this makes it googleable for anyone else in the ether that needs it later
The traditional PING will test only once per second, and fills the screen with many pages of text if ran overnight. My script tests about 10x / second, and only notes transitions, giving durations as well as dates and times, making it ideal for spotting very brief interruptions and running long-duration testing.
#!/bin/bash
vers=2015.05.08.A
if [ -z "$1" ] ; then
echo "Syntax: pingtest {ipaddress}"
echo
exit
fi
ip=$1
ms () {
perl -MTime::HiRes -e 'print int(1000 * Time::HiRes::gettimeofday),"\n"'
}
waitms () {
local got_ms want_ms
got_ms=$1
((want_ms=$(ms)+got_ms))
while [ $(ms) -lt $want_ms ] ; do
test
done
}
datems () {
local now fraction
now=$(ms)
((fraction=now%1000))
((now/=1000))
echo "$(date "+%Y/%m/%d %H:%M:%S").$((fraction/100))"
}
dotrans () {
local newtrans span
echo -n $'\r'$(datems)" $1 for $duration "
waitms 60
ping -t 1 -c 1 $ip &> /dev/null ; rc=$?
newtrans=$(ms)
((span=newtrans-trans))
if [[ (($rc == 0) && ($2 == 1)) || (($rc != 0) && ($2 != 1)) ]] ; then
((trans=newtrans))
fi
((span/=1000))
duration=$( (export TZ=Zulu ; date -j -f "%s" $span "+%H:%M:%S") )
}
clear
echo
echo $'\r'$(date)" START testing $ip"
echo
trans=$(ms)
duration="---"
rc=0
while true ; do
while [ $rc == 0 ] ; do
dotrans " UP " 0
done
echo $'\a\a\a'
while [ $rc != 0 ] ; do
dotrans "DOWN" 1
done
echo $'\a\a\a'
done
example output showing me yanking the ethernet cable briefly: (takes awhile to recover, whereas a dropped ping packet would only lag one second)
Fri May 8 08:32:45 CDT 2015 START testing 10.3.100.145
2015/05/08 08:33:36.0 UP for 00:00:50
2015/05/08 08:33:46.2 DOWN for 00:00:09
2015/05/08 08:34:05.6 UP for 00:25:19