| # Copyright (C) 2007 Dejan Muhamedagic <dmuhamedagic@suse.de> |
| # Almost everything as part of hb_report |
| # Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net> |
| # Cleanups, refactoring, extensions |
| # |
| # This program is free software; you can redistribute it and/or |
| # modify it under the terms of the GNU General Public |
| # License as published by the Free Software Foundation; either |
| # version 2.1 of the License, or (at your option) any later version. |
| # |
| # This software is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| # General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public |
| # License along with this library; if not, write to the Free Software |
| # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| # |
| |
| if |
| echo $REPORT_HOME | grep -qs '^/' |
| then |
| debug "Using full path to working directory: $REPORT_HOME" |
| else |
| REPORT_HOME="$HOME/$REPORT_HOME" |
| debug "Canonicalizing working directory path: $REPORT_HOME" |
| fi |
| |
| detect_host |
| |
| findlogdcf() { |
| for f in \ |
| `test -x $CRM_DAEMON_DIR/ha_logd && |
| which strings > /dev/null 2>&1 && |
| strings $CRM_DAEMON_DIR/ha_logd | grep 'logd\.cf'` \ |
| `for d; do echo $d/logd.cf $d/ha_logd.cf; done` |
| do |
| if [ -f "$f" ]; then |
| echo $f |
| debug "Located logd.cf at: $f" |
| return 0 |
| fi |
| done |
| debug "Could not determine logd.cf location" |
| return 1 |
| } |
| |
| # |
| # find files newer than a and older than b |
| # |
| isnumber() { |
| echo "$*" | grep -qs '^[0-9][0-9]*$' |
| } |
| |
| touchfile() { |
| t=`mktemp` && |
| perl -e "\$file=\"$t\"; \$tm=$1;" -e 'utime $tm, $tm, $file;' && |
| echo $t |
| } |
| |
| find_files_clean() { |
| [ -z "$from_stamp" ] || rm -f "$from_stamp" |
| [ -z "$to_stamp" ] || rm -f "$to_stamp" |
| from_stamp="" |
| to_stamp="" |
| } |
| |
| find_files() { |
| dirs= |
| from_time=$2 |
| to_time=$3 |
| for d in $1; do |
| if [ -d $d ]; then |
| dirs="$dirs $d" |
| fi |
| done |
| |
| if [ x"$dirs" = x ]; then |
| return |
| fi |
| |
| isnumber "$from_time" && [ "$from_time" -gt 0 ] || { |
| warning "sorry, can't find files in [ $1 ] based on time if you don't supply time" |
| return |
| } |
| trap find_files_clean 0 |
| if ! from_stamp=`touchfile $from_time`; then |
| warning "sorry, can't create temporary file for find_files" |
| return |
| fi |
| findexp="-newer $from_stamp" |
| if isnumber "$to_time" && [ "$to_time" -gt 0 ]; then |
| if ! to_stamp=`touchfile $to_time`; then |
| warning "sorry, can't create temporary file for find_files" |
| find_files_clean |
| return |
| fi |
| findexp="$findexp ! -newer $to_stamp" |
| fi |
| find $dirs -type f $findexp |
| find_files_clean |
| trap "" 0 |
| } |
| |
| # |
| # check permissions of files/dirs |
| # |
| pl_checkperms() { |
| perl -e ' |
| # check permissions and ownership |
| # uid and gid are numeric |
| # everything must match exactly |
| # no error checking! (file should exist, etc) |
| ($filename, $perms, $in_uid, $in_gid) = @ARGV; |
| ($mode,$uid,$gid) = (stat($filename))[2,4,5]; |
| $p=sprintf("%04o", $mode & 07777); |
| $p ne $perms and exit(1); |
| $uid ne $in_uid and exit(1); |
| $gid ne $in_gid and exit(1); |
| ' $* |
| } |
| |
| num_id() { |
| getent $1 $2 | awk -F: '{print $3}' |
| } |
| |
| chk_id() { |
| [ "$2" ] && return 0 |
| echo "$1: id not found" |
| return 1 |
| } |
| |
| check_perms() { |
| while read type f p uid gid; do |
| if [ ! -e "$f" ]; then |
| echo "$f doesn't exist" |
| continue |
| elif [ ! -$type "$f" ]; then |
| echo "$f has wrong type" |
| continue |
| fi |
| n_uid=`num_id passwd $uid` |
| chk_id "$uid" "$n_uid" || continue |
| n_gid=`num_id group $gid` |
| chk_id "$gid" "$n_gid" || continue |
| pl_checkperms $f $p $n_uid $n_gid || { |
| echo "wrong permissions or ownership for $f:" |
| ls -ld $f |
| } |
| done |
| } |
| |
| # |
| # coredumps |
| # |
| findbinary() { |
| random_binary=`which cat 2>/dev/null` # suppose we are lucky |
| binary=`gdb $random_binary $1 < /dev/null 2>/dev/null | |
| grep 'Core was generated' | awk '{print $5}' | |
| sed "s/^.//;s/[.':]*$//"` |
| if [ x = x"$binary" ]; then |
| debug "Could not detect the program name for core $1 from the gdb output; will try with file(1)" |
| binary=$(file $1 | awk '/from/{ |
| for( i=1; i<=NF; i++ ) |
| if( $i == "from" ) { |
| print $(i+1) |
| break |
| } |
| }') |
| binary=`echo $binary | tr -d "'"` |
| binary=$(echo $binary | tr -d '`') |
| if [ "$binary" ]; then |
| binary=`which $binary 2>/dev/null` |
| fi |
| fi |
| if [ x = x"$binary" ]; then |
| warning "Could not find the program path for core $1" |
| return |
| fi |
| fullpath=`which $binary 2>/dev/null` |
| if [ x = x"$fullpath" ]; then |
| if [ -x $CRM_DAEMON_DIR/$binary ]; then |
| echo $CRM_DAEMON_DIR/$binary |
| debug "Found the program at $CRM_DAEMON_DIR/$binary for core $1" |
| else |
| warning "Could not find the program path for core $1" |
| fi |
| else |
| echo $fullpath |
| debug "Found the program at $fullpath for core $1" |
| fi |
| } |
| |
| getbt() { |
| which gdb > /dev/null 2>&1 || { |
| warning "Please install gdb to get backtraces" |
| return |
| } |
| for corefile; do |
| absbinpath=`findbinary $corefile` |
| [ x = x"$absbinpath" ] && continue |
| echo "====================== start backtrace ======================" |
| ls -l $corefile |
| # Summary first... |
| gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt"} -ex quit \ |
| $absbinpath $corefile 2>/dev/null |
| echo "====================== start detail ======================" |
| # Now the unreadable details... |
| gdb -batch -n -quiet -ex ${BT_OPTS:-"thread apply all bt full"} -ex quit \ |
| $absbinpath $corefile 2>/dev/null |
| echo "======================= end backtrace =======================" |
| done |
| } |
| |
| dump_status_and_config() { |
| crm_mon -1 2>&1 | grep -v '^Last upd' > $target/$CRM_MON_F |
| cibadmin -Ql 2>/dev/null > $target/${CIB_F}.live |
| } |
| |
| getconfig() { |
| cluster=$1; shift; |
| target=$1; shift; |
| |
| for cf in $*; do |
| if [ -e "$cf" ]; then |
| cp -a "$cf" $target/ |
| fi |
| done |
| |
| if which crm_uuid >/dev/null 2>&1; then |
| crm_uuid -r > $target/$HB_UUID_F 2>&1 |
| fi |
| |
| if is_running crmd; then |
| dump_status_and_config |
| case $cluster in |
| cman) crm_node -p --cman > $target/$MEMBERSHIP_F 2>&1;; |
| corosync|openais) crm_node -p --openais > $target/$MEMBERSHIP_F 2>&1;; |
| heartbeat) crm_node -p --heartbeat > $target/$MEMBERSHIP_F 2>&1;; |
| *) crm_node -p > $target/$MEMBERSHIP_F 2>&1;; |
| esac |
| echo "$host" > $target/RUNNING |
| |
| elif is_running pacemaker_remoted; then |
| dump_status_and_config |
| echo "$host" > $target/RUNNING |
| |
| else |
| echo "$host" > $target/STOPPED |
| fi |
| } |
| |
| get_readable_cib() { |
| target="$1"; shift; |
| |
| if [ -f "$target/$CIB_F" ]; then |
| crm_verify -V -x "$target/$CIB_F" >"$target/$CRM_VERIFY_F" 2>&1 |
| if which crm >/dev/null 2>&1 ; then |
| CIB_file="$target/$CIB_F" crm configure show >"$target/$CIB_TXT_F" 2>&1 |
| elif which pcs >/dev/null 2>&1 ; then |
| pcs config -f "$target/$CIB_F" >"$target/$CIB_TXT_F" 2>&1 |
| fi |
| fi |
| } |
| |
| # |
| # remove values of sensitive attributes |
| # |
| # this is not proper xml parsing, but it will work under the |
| # circumstances |
| sanitize_xml_attrs() { |
| sed $( |
| for patt in $SANITIZE; do |
| echo "-e /name=\"$patt\"/s/value=\"[^\"]*\"/value=\"****\"/" |
| done |
| ) |
| } |
| |
| sanitize_hacf() { |
| awk ' |
| $1=="stonith_host"{ for( i=5; i<=NF; i++ ) $i="****"; } |
| {print} |
| ' |
| } |
| |
| sanitize_one_clean() { |
| [ -z "$tmp" ] || rm -f "$tmp" |
| tmp="" |
| [ -z "$ref" ] || rm -f "$ref" |
| ref="" |
| } |
| |
| sanitize() { |
| file=$1 |
| compress="" |
| if [ -z "$SANITIZE" ]; then |
| return |
| fi |
| echo $file | grep -qs 'gz$' && compress=gzip |
| echo $file | grep -qs 'bz2$' && compress=bzip2 |
| if [ "$compress" ]; then |
| decompress="$compress -dc" |
| else |
| compress=cat |
| decompress=cat |
| fi |
| trap sanitize_one_clean 0 |
| tmp=`mktemp` |
| ref=`mktemp` |
| if [ -z "$tmp" -o -z "$ref" ]; then |
| sanitize_one_clean |
| fatal "cannot create temporary files" |
| fi |
| touch -r $file $ref # save the mtime |
| if [ "`basename $file`" = ha.cf ]; then |
| sanitize_hacf |
| else |
| $decompress | sanitize_xml_attrs | $compress |
| fi < $file > $tmp |
| mv $tmp $file |
| # note: cleaning $tmp up is still needed even after it's renamed |
| # because its temp directory is still there. |
| |
| touch -r $ref $file |
| sanitize_one_clean |
| trap "" 0 |
| } |
| |
| # |
| # get some system info |
| # |
| distro() { |
| if |
| which lsb_release >/dev/null 2>&1 |
| then |
| lsb_release -d | sed -e 's/^Description:\s*//' |
| debug "Using lsb_release for distribution info" |
| return |
| fi |
| |
| relf=`ls /etc/debian_version 2>/dev/null` || |
| relf=`ls /etc/slackware-version 2>/dev/null` || |
| relf=`ls -d /etc/*-release 2>/dev/null` && { |
| for f in $relf; do |
| test -f $f && { |
| echo "`ls $f` `cat $f`" |
| debug "Found `echo $relf | tr '\n' ' '` distribution release file(s)" |
| return |
| } |
| done |
| } |
| warning "No lsb_release, no /etc/*-release, no /etc/debian_version: no distro information" |
| } |
| |
| pkg_ver() { |
| if which dpkg >/dev/null 2>&1 ; then |
| pkg_mgr="deb" |
| elif which rpm >/dev/null 2>&1 ; then |
| pkg_mgr="rpm" |
| elif which pkg_info >/dev/null 2>&1 ; then |
| pkg_mgr="pkg_info" |
| elif which pkginfo >/dev/null 2>&1 ; then |
| pkg_mgr="pkginfo" |
| else |
| warning "Unknown package manager" |
| return |
| fi |
| debug "The package manager is: $pkg_mgr" |
| echo "The package manager is: $pkg_mgr" |
| |
| echo "Installed packages:" |
| case $pkg_mgr in |
| deb) |
| dpkg-query -f '${Package} ${Version} ${Architecture}\n' -W | sort |
| echo |
| for pkg in $*; do |
| if dpkg-query -W $pkg 2>/dev/null ; then |
| debug "Verifying installation of: $pkg" |
| echo "Verifying installation of: $pkg" |
| debsums -s $pkg 2>/dev/null |
| fi |
| done |
| ;; |
| rpm) |
| rpm -qa --qf '%{name} %{version}-%{release} - %{distribution} %{arch}\n' | sort |
| echo |
| for pkg in $*; do |
| if rpm -q $pkg >/dev/null 2>&1 ; then |
| debug "Verifying installation of: $pkg" |
| echo "Verifying installation of: $pkg" |
| rpm --verify $pkg 2>&1 |
| fi |
| done |
| ;; |
| pkg_info) |
| pkg_info |
| ;; |
| pkginfo) |
| pkginfo | awk '{print $3}' # format? |
| ;; |
| esac |
| } |
| |
| getbacktraces() { |
| debug "Looking for backtraces: $*" |
| flist=$( |
| for f in `find_files "$CRM_CORE_DIRS" $1 $2`; do |
| bf=`basename $f` |
| test `expr match $bf core` -gt 0 && |
| echo $f |
| done) |
| if [ "$flist" ]; then |
| for core in $flist; do |
| log "Found core file: `ls -al $core`" |
| done |
| |
| # Make a copy of them in case we need more data later |
| # Luckily they compress well |
| mkdir cores >/dev/null 2>&1 |
| cp -a $flist cores/ |
| shrink cores |
| rm -rf cores |
| |
| # Now get as much as we can from them automagically |
| for f in $flist; do |
| getbt $f |
| done |
| fi |
| } |
| |
| getpeinputs() { |
| if [ -n "$PE_STATE_DIR" ]; then |
| flist=$( |
| find_files "$PE_STATE_DIR" "$1" "$2" | sed "s,`dirname $PE_STATE_DIR`/,,g" |
| ) |
| if [ "$flist" ]; then |
| (cd $(dirname "$PE_STATE_DIR") && tar cf - $flist) | (cd "$3" && tar xf -) |
| debug "found `echo $flist | wc -w` pengine input files in $PE_STATE_DIR" |
| fi |
| fi |
| } |
| |
| getblackboxes() { |
| flist=$( |
| find_files $BLACKBOX_DIR $1 $2 |
| ) |
| |
| for bb in $flist; do |
| bb_short=`basename $bb` |
| qb-blackbox $bb > $3/${bb_short}.blackbox 2>&1 |
| info "Extracting contents of blackbox: $bb_short" |
| done |
| } |
| |
| # |
| # some basic system info and stats |
| # |
| sys_info() { |
| cluster=$1; shift |
| echo "Platform: `uname`" |
| echo "Kernel release: `uname -r`" |
| echo "Architecture: `uname -m`" |
| if [ `uname` = Linux ]; then |
| echo "Distribution: `distro`" |
| fi |
| |
| echo |
| cibadmin --version 2>&1 | head -1 |
| cibadmin -! 2>&1 |
| case $cluster in |
| openais) |
| echo openais # version: how? |
| ;; |
| cman) |
| cman_tool -V 2>&1 | head -1 |
| /usr/sbin/corosync -v 2>&1 | head -1 |
| ;; |
| corosync) |
| /usr/sbin/corosync -v 2>&1 | head -1 |
| ;; |
| heartbeat) |
| echo heartbeat $($CRM_DAEMON_DIR/heartbeat -V 2>&1) |
| ;; |
| esac |
| |
| # Cluster glue version hash (if available) |
| stonith -V 2>/dev/null |
| |
| # Resource agents version hash |
| echo "resource-agents: `grep 'Build version:' /usr/lib/ocf/resource.d/heartbeat/.ocf-shellfuncs`" |
| |
| echo |
| pkg_ver $* |
| } |
| |
| sys_stats() { |
| set -x |
| uname -n |
| uptime |
| ps axf |
| ps auxw |
| top -b -n 1 |
| ifconfig -a |
| ip addr list |
| netstat -i |
| arp -an |
| test -d /proc && { |
| cat /proc/cpuinfo |
| } |
| lsscsi |
| lspci |
| lsblk |
| mount |
| df |
| set +x |
| } |
| |
| dlm_dump() { |
| if which dlm_tool >/dev/null 2>&1 ; then |
| if is_running dlm_controld; then |
| echo "--- Lockspace overview:" |
| dlm_tool ls -n |
| |
| echo "---Lockspace history:" |
| dlm_tool dump |
| |
| echo "---Lockspace status:" |
| dlm_tool status |
| dlm_tool status -v |
| |
| echo "---Lockspace config:" |
| dlm_tool dump_config |
| |
| dlm_tool log_plock |
| |
| dlm_tool ls | grep name | |
| while read X N ; do |
| echo "--- Lockspace $N:" |
| dlm_tool lockdump "$N" |
| dlm_tool lockdebug -svw "$N" |
| done |
| fi |
| fi |
| } |
| |
| drbd_info() { |
| test -f /proc/drbd && { |
| echo "--- /proc/drbd:" |
| cat /proc/drbd 2>&1 |
| echo |
| } |
| |
| if which drbd-overview >/dev/null 2>&1; then |
| echo "--- drbd-overview:" |
| drbd-overview 2>&1 |
| echo |
| fi |
| |
| if which drbdsetup >/dev/null 2>&1; then |
| echo "--- drbdsetup status:" |
| drbdsetup status --verbose --statistics 2>&1 |
| echo |
| |
| echo "--- drbdsetup events2:" |
| drbdsetup events2 --timestamps --statistics --now 2>&1 |
| echo |
| fi |
| |
| if which drbdadm >/dev/null 2>&1; then |
| echo "--- drbdadm show-gi:" |
| for res in $(drbdsetup status | grep -e ^\\S | awk '{ print $1 }'); do |
| echo "$res:" |
| drbdadm show-gi $res 2>&1 |
| echo |
| done |
| fi |
| } |
| |
| iscfvarset() { |
| test "`getcfvar $1 $2`" |
| } |
| |
| iscfvartrue() { |
| getcfvar $1 $2 $3 | grep -E -qsi "^(true|y|yes|on|1)" |
| } |
| |
| iscfvarfalse() { |
| getcfvar $1 $2 $3 | grep -E -qsi "^(false|n|no|off|0)" |
| } |
| |
| find_syslog() { |
| priority="$1" |
| |
| # Always include system logs (if we can find them) |
| msg="Mark:pcmk:`perl -e 'print time()'`" |
| logger -p "$priority" "$msg" >/dev/null 2>&1 |
| |
| # Force buffer flush |
| killall -HUP rsyslogd >/dev/null 2>&1 |
| |
| sleep 2 # Give syslog time to catch up in case it's busy |
| findmsg 1 "$msg" |
| } |
| |
| get_logfiles_cs() { |
| if [ ! -f "$cf_file" ]; then |
| return |
| fi |
| |
| debug "Reading $cf_type log settings from $cf_file" |
| |
| # The default value of to_syslog is yes. |
| if ! iscfvarfalse $cf_type to_syslog "$cf_file"; then |
| facility_cs=$(getcfvar $cf_type syslog_facility "$cf_file") |
| if [ -z "$facility_cs" ]; then |
| facility_cs="daemon" |
| fi |
| |
| find_syslog "$facility_cs.info" |
| fi |
| if [ "$SOS_MODE" = "1" ]; then |
| return |
| fi |
| |
| if iscfvartrue $cf_type to_logfile "$cf_file"; then |
| logfile=$(getcfvar $cf_type logfile "$cf_file") |
| if [ -f "$logfile" ]; then |
| debug "Log settings found for cluster type $cf_type: $logfile" |
| echo "$logfile" |
| fi |
| fi |
| } |
| |
| uselogd() { |
| cf_file=$2 |
| case $1 in |
| heartbeat) |
| iscfvartrue $1 use_logd $cf_file && return 0 # if use_logd true |
| iscfvarset $1 logfacility $cf_file || |
| iscfvarset $1 logfile $cf_file || |
| iscfvarset $1 debugfile $cf_file || |
| return 0 # or none of the log options set |
| false |
| ;; |
| *) |
| iscfvartrue $1 use_logd $cf_file |
| ;; |
| esac |
| } |
| |
| get_logfiles() { |
| cf_type=$1 |
| cf_file="$2" |
| cf_logd="$3" |
| |
| if [ -f "$cf_logd" ]; then |
| # TODO: this call is broken, it expects args |
| if uselogd; then |
| cf_file="$cf_logd" |
| cf_type="logd" |
| fi |
| fi |
| |
| case $cf_type in |
| cman|openais|corosync) get_logfiles_cs;; |
| heartbeat|logd) |
| if [ -f "$cf_file" ]; then |
| debug "Reading $cf_type log settings from $cf_file" |
| if iscfvartrue $cf_type debug "$cf_file"; then |
| logfile=$(getcfvar $cf_type debugfile "$cf_file") |
| else |
| logfile=$(getcfvar $cf_type logfile "$cf_file") |
| fi |
| fi |
| cs_facility=$(getcfvar $cf_type logfacility "$cf_file") |
| [ "" = "$cs_facility" ] && cs_facility="daemon" |
| find_syslog "$facility_cs.info" |
| ;; |
| esac |
| |
| . @CONFIGDIR@/pacemaker |
| |
| facility="$PCMK_logfacility" |
| if [ -z "$facility" ]; then |
| facility="daemon" |
| fi |
| if [ "$facility" != "$facility_cs" ]&&[ "$facility" != none ]; then |
| find_syslog "$facility.notice" |
| fi |
| if [ "$SOS_MODE" = "1" ]; then |
| return |
| fi |
| |
| logfile="$PCMK_logfile" |
| if [ "$logfile" != none ]; then |
| if [ -z "$logfile" ]; then |
| logfile="/var/log/pacemaker.log" |
| if [ -f "$logfile" ]; then |
| debug "Log settings not found for Pacemaker, assuming $logfile" |
| echo "$logfile" |
| fi |
| |
| elif [ -f "$logfile" ]; then |
| debug "Log settings found for Pacemaker: $logfile" |
| echo "$logfile" |
| fi |
| fi |
| |
| # Look for detail logs: |
| |
| # - initial pacemakerd logs and tracing might go to a different file |
| pattern="Starting Pacemaker" |
| |
| # - make sure we get something from the Policy Engine |
| pattern="$pattern\\|Calculated transition" |
| |
| # - cib and lrmd updates (helpful on non-DC nodes or when the cluster has been up for a long time) |
| pattern="$pattern\\|cib_perform_op\\|process_lrm_event" |
| |
| # - pacemaker_remote might use a different file |
| pattern="$pattern\\|pacemaker_remoted:" |
| |
| findmsg 3 "$pattern" |
| } |
| |
| essential_files() { |
| cat<<EOF |
| d $HA_STATE_DIR 0755 root root |
| d $PE_STATE_DIR 0750 hacluster haclient |
| d $CRM_CONFIG_DIR 0750 hacluster haclient |
| d $CRM_STATE_DIR 0750 hacluster haclient |
| EOF |
| case $1 in |
| openais|corosync|cman) |
| ;; |
| heartbeat) |
| cat<<EOF |
| d $HA_STATE_DIR/ccm 0750 hacluster haclient |
| EOF |
| ;; |
| esac |
| } |
| |
| # Trim leading and ending whitespace (using only POSIX expressions) |
| trim() { |
| TRIM_S="$1" |
| |
| TRIM_S="${TRIM_S#"${TRIM_S%%[![:space:]]*}"}" |
| TRIM_S="${TRIM_S%"${TRIM_S##*[![:space:]]}"}" |
| echo -n "$TRIM_S" |
| } |
| |
| collect_logs() { |
| CL_START="$1" |
| shift |
| CL_END="$1" |
| shift |
| CL_LOGFILES="$@" |
| |
| which journalctl > /dev/null 2>&1 |
| if [ $? -eq 0 ]; then |
| cl_have_journald=1 |
| else |
| cl_have_journald=0 |
| fi |
| |
| cl_lognames="$CL_LOGFILES" |
| if [ $cl_have_journald -eq 1 ]; then |
| cl_lognames="$cl_lognames journalctl" |
| fi |
| cl_lognames=$(trim "$cl_lognames") |
| if [ -z "$cl_lognames" ]; then |
| return |
| fi |
| |
| # YYYY-MM-DD HH:MM:SS |
| cl_start_ymd=$(date -d @${CL_START} +"%F %T") |
| cl_end_ymd=$(date -d @${CL_END} +"%F %T") |
| |
| debug "Gathering logs from $cl_start_ymd to $cl_end_ymd:" |
| debug " $cl_lognames" |
| |
| # Remove our temporary file if we get interrupted here |
| trap '[ -z "$cl_pattfile" ] || rm -f "$cl_pattfile"' 0 |
| |
| # Create a temporary file with patterns to grep for |
| cl_pattfile=$(mktemp) || fatal "cannot create temporary files" |
| for cl_pattern in $LOG_PATTERNS; do |
| echo "$cl_pattern" |
| done > $cl_pattfile |
| |
| echo "Log pattern matches from $REPORT_TARGET:" > $ANALYSIS_F |
| if [ -n "$CL_LOGFILES" ]; then |
| for cl_logfile in $CL_LOGFILES; do |
| cl_extract="$(basename $cl_logfile).extract.txt" |
| |
| if [ ! -f "$cl_logfile" ]; then |
| # Not a file |
| continue |
| |
| elif [ -f "$cl_extract" ]; then |
| # We already have it |
| continue |
| fi |
| |
| dumplogset "$cl_logfile" $LOG_START $LOG_END > "$cl_extract" |
| sanitize "$cl_extract" |
| |
| grep -f "$cl_pattfile" "$cl_extract" >> $ANALYSIS_F |
| done |
| fi |
| |
| # Collect systemd logs if present |
| if [ $cl_have_journald -eq 1 ]; then |
| journalctl --since "$cl_start_ymd" --until "$cl_end_ymd" > journal.log |
| grep -f "$cl_pattfile" journal.log >> $ANALYSIS_F |
| fi |
| |
| rm -f $cl_pattfile |
| trap "" 0 |
| } |
| |
| require_tar |
| |
| debug "Initializing $REPORT_TARGET subdir" |
| if [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then |
| if [ -e $REPORT_HOME/$REPORT_TARGET ]; then |
| warning "Directory $REPORT_HOME/$REPORT_TARGET already exists, using /tmp/$$/$REPORT_TARGET instead" |
| REPORT_HOME=/tmp/$$ |
| fi |
| fi |
| |
| mkdir -p $REPORT_HOME/$REPORT_TARGET |
| cd $REPORT_HOME/$REPORT_TARGET |
| |
| case $CLUSTER in |
| any) cluster=`get_cluster_type`;; |
| *) cluster=$CLUSTER;; |
| esac |
| |
| logd_cf=`findlogdcf` |
| cluster_cf=`find_cluster_cf $cluster` |
| |
| # If cluster stack is still "any", this might be a Pacemaker Remote node, |
| # so don't complain in that case. |
| if [ -z "$cluster_cf" ] && [ $cluster != "any" ]; then |
| warning "Could not determine the location of your cluster configuration" |
| fi |
| |
| if [ "$SEARCH_LOGS" = "1" ]; then |
| logfiles=$(get_logfiles "$cluster" "$cluster_cf" "$logd_cf" | sort -u) |
| fi |
| logfiles="$(trim "$logfiles $EXTRA_LOGS")" |
| |
| if [ -z "$logfiles" ]; then |
| which journalctl > /dev/null 2>&1 |
| if [ $? -eq 0 ]; then |
| info "Systemd journal will be only log collected" |
| else |
| info "No logs will be collected" |
| fi |
| info "No log files found or specified with --logfile /some/path" |
| fi |
| |
| debug "Config: $cluster ($cluster_cf $logd_cf) $logfiles" |
| |
| sys_info $cluster $PACKAGES > $SYSINFO_F |
| essential_files $cluster | check_perms > $PERMISSIONS_F 2>&1 |
| getconfig $cluster "$REPORT_HOME/$REPORT_TARGET" "$cluster_cf" "$logd_cf" "$CRM_CONFIG_DIR/$CIB_F" "$HA_STATE_DIR/hostcache" "/etc/drbd.conf" "/etc/drbd.d" "/etc/booth" |
| |
| getpeinputs $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET |
| getbacktraces $LOG_START $LOG_END > $REPORT_HOME/$REPORT_TARGET/$BT_F |
| getblackboxes $LOG_START $LOG_END $REPORT_HOME/$REPORT_TARGET |
| |
| case $cluster in |
| cman|corosync) |
| if is_running corosync; then |
| corosync-blackbox >corosync-blackbox-live.txt 2>&1 |
| # corosync-fplay > corosync-blackbox.txt |
| tool=`pickfirst corosync-objctl corosync-cmapctl` |
| case $tool in |
| *objctl) $tool -a > corosync.dump 2>/dev/null;; |
| *cmapctl) $tool > corosync.dump 2>/dev/null;; |
| esac |
| corosync-quorumtool -s -i > corosync.quorum 2>&1 |
| fi |
| ;; |
| esac |
| |
| dc=`crm_mon -1 2>/dev/null | awk '/Current DC/ {print $3}'` |
| if [ "$REPORT_TARGET" = "$dc" ]; then |
| echo "$REPORT_TARGET" > DC |
| fi |
| |
| dlm_dump > $DLM_DUMP_F 2>&1 |
| sys_stats > $SYSSTATS_F 2>&1 |
| drbd_info > $DRBD_INFO_F 2>&1 |
| |
| debug "Sanitizing files: $SANITIZE" |
| # |
| # replace sensitive info with '****' |
| # |
| cf="" |
| if [ ! -z "$cluster_cf" ]; then |
| cf=`basename $cluster_cf` |
| fi |
| for f in "$cf" "$CIB_F" "$CIB_F.live" pengine/*; do |
| if [ -f "$f" ]; then |
| sanitize "$f" |
| fi |
| done |
| |
| # For convenience, generate human-readable version of CIB and any XML errors |
| # in it (AFTER sanitizing, so we don't need to sanitize this output) |
| get_readable_cib "$REPORT_HOME/$REPORT_TARGET" |
| |
| collect_logs "$LOG_START" "$LOG_END" $logfiles |
| |
| # Purge files containing no information |
| for f in `ls -1`; do |
| if [ -d "$f" ]; then |
| continue |
| elif [ ! -s "$f" ]; then |
| case $f in |
| *core*) log "Detected empty core file: $f";; |
| *) debug "Removing empty file: `ls -al $f`" |
| rm -f $f |
| ;; |
| esac |
| fi |
| done |
| |
| # Parse for events |
| for l in $logfiles; do |
| b="$(basename $l).extract.txt" |
| node_events "$b" > $EVENTS_F |
| |
| # Link the first logfile to a standard name if it doesn't yet exist |
| if [ -e "$b" -a ! -e "$HALOG_F" ]; then |
| ln -s "$b" "$HALOG_F" |
| fi |
| done |
| |
| if [ -e $REPORT_HOME/.env ]; then |
| debug "Localhost: $REPORT_MASTER $REPORT_TARGET" |
| |
| elif [ "$REPORT_MASTER" != "$REPORT_TARGET" ]; then |
| debug "Streaming report back to $REPORT_MASTER" |
| (cd $REPORT_HOME && tar cf - $REPORT_TARGET) |
| if [ "$REMOVE" = "1" ]; then |
| cd |
| rm -rf $REPORT_HOME |
| fi |
| fi |
| |
| # vim: set expandtab tabstop=8 softtabstop=4 shiftwidth=4 textwidth=80: |