drbd-utils/scripts/crm-fence-peer.sh - actifio - Git at Google

 #!/bin/bash
 #

 export LANG=C LC_ALL=C TZ=":/etc/localtime"

 sed_rsc_location_suitable_for_string_compare()
 {
 	# expected input: exactly one tag per line: "^[[:space:]]*<.*/?>$"
 	sed -ne '
 	# within the rsc_location constraint with that id,
 	/<rsc_location .*\bid="'"$1"'"/, /<\/rsc_location>/ {
 		/<\/rsc_location>/q # done, if closing tag is found
 		s/^[[:space:]]*//   # trim spaces
 		s/ *\bid="[^"]*"//  # remove id tag
 		# print each attribute on its own line, by
 		: attr
 		h # remember the current (tail of the) line
 		# remove all but the first attribute, and print,
 		s/^\([^[:space:]]*[[:space:]][^= ]*="[^"]*"\).*$/\1/p
 		g # then restore the remembered line,
 		# and remove the first attribute.
 		s/^\([^[:space:]]*\)[[:space:]][^= ]*="[^"]*"\(.*\)$/\1\2/
 		# then repeat, until no more attributes are left
 		t attr
 	}' | sort
 }

 cibadmin_invocations=0
 set_constraint()
 {
 	cibadmin_invocations=$(( $cibadmin_invocations + 1 ))
 	cibadmin -C -o constraints -X "$new_constraint"
 }

 remove_constraint()
 {
 	cibadmin_invocations=$(( $cibadmin_invocations + 1 ))
 	cibadmin -D -X "<rsc_location rsc=\"$master_id\" id=\"$id_prefix-$master_id\"/>"
 }

 cib_xml=""
 get_cib_xml() {
 	cibadmin_invocations=$(( $cibadmin_invocations + 1 ))
 	cib_xml=$( set +x; cibadmin "$@" )
 }


 # if not passed in, try to "guess" it from the cib
 # we only know the DRBD_RESOURCE.
 fence_peer_init()
 {
 	# we know which instance we are: $OCF_RESOURCE_INSTANCE.
 	# but we do not know the xml ID of the <master/> :(
 	# cibadmin -Ql --xpath \
 	# '//master[primitive[@type="drbd" and instance_attributes/nvpair[@name = "drbd_resource" and @value="r0"]]]/@id'
 	# but I'd have to pipe that through sed anyways, because @attribute
 	# xpath queries are not supported.
 	# and I'd be incompatible with older cibadmin not supporting --xpath.
 	# be cool, sed it out:
 	: ${master_id=$(set +x; echo "$cib_xml" |
 		sed -ne '/<master /,/<\/master>/ {
 			   /<master / h;
 			     /<primitive/,/<\/primitive/ {
 			       /<instance_attributes/,/<\/instance_attributes/ {
 				 /<nvpair .*\bname="drbd_resource"/ {
 				   /.*\bvalue="'"$DRBD_RESOURCE"'"/! d
 				   x
 				   s/^.*\bid="\([^"]*\)".*/\1/p
 				   q
 				 };};};}')}
 	if [[ -z $master_id ]] ; then
 		echo WARNING "drbd-fencing could not determine the master id of drbd resource $DRBD_RESOURCE"
 		return 1;
 	fi
 	have_constraint=$(set +x; echo "$cib_xml" |
 		sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")
 	return 0
 }

 # drbd_fence_peer_exit_code is per the exit code
 # convention of the DRBD "fence-peer" handler,
 # obviously.
 # 3: peer is already outdated or worse (e.g. inconsistent)
 # 4: peer has been successfully fenced
 # 5: peer not reachable, assumed to be dead
 # 6: please outdate yourself, peer is known (or likely)
 #    to have better data, or is even currently primary.
 #    (actually, currently it is "peer is active primary now", but I'd like to
 #    change that meaning slightly towards the above meaning)
 # 7: peer has been STONITHed, thus assumed to be properly fenced
 #    XXX IMO, this should rather be handled like 5, not 4.

 # NOTE:
 #    On loss of all cluster comm (cluster split-brain),
 #    without STONITH configured, you always still risk data divergence.
 #
 # There are different timeouts:
 #
 # --timeout is how long we poll the DC for a definite "unreachable" node state,
 # before we give up and say "unknown".
 # This should be longer than "dead time" or "stonith timeout",
 # the time it takes the cluster manager to declare the other node dead and
 # shoot it, just to be sure.
 #
 # --dc-timeout is how long we try to contact a DC before we give up.
 # This is necessary, because placing the constraint will fail (with some
 # internal timeout) if no DC was available when we request the constraint.
 # Which is likely if the DC crashed. Then the surviving DRBD Primary needs
 # to wait for a new DC to be elected. Usually such election takes only
 # fractions of a second, but it can take much longer (the default election
 # timeout in pacemaker is ~2 minutes!).
 #
 # --network-hickup is how long we wait for the replication link to recover,
 # if crmadmin confirms that the peer is in fact still alive.
 # It may have been just a network hickup. If so, no need to potentially trigger
 # node level fencing.
 #
 # a) Small-ish (1s) timeout, medium (10..20s) dc-timeout:
 #    Intended use case: fencing resource-only, no STONITH configured.
 #
 #    Even with STONITH properly configured, on cluster-split-brain this method
 #    risks to complete transactions to user space which can be lost due to
 #    STONITH later.
 #
 #    With dual-primary setup (cluster file system),
 #    you should use method b).
 #
 # b) timeout >= deadtime, dc-timeout > timeout
 #    Intended use case: fencing resource-and-stonith, STONITH configured.
 #
 #    Difference to a)
 #
 #       If peer is still reachable according to the cib,
 #	we first poll the cib/try to confirm with crmadmin,
 #	until either crmadmin confirms reachability, timeout has elapsed,
 #	or the peer becomes definitely unreachable.
 #
 #	This gives STONITH the chance to kill us.
 #	With "fencing resource-and-stontith;" this protects us against
 #	completing transactions to userland which might otherwise be lost.
 #
 #	We then place the constraint (if we are UpToDate), as explained below,
 #	and return reachable/unreachable according to our last cib status poll
 #	or crmadmin -S result.
 #

 #
 #    replication link loss, current Primary calls this handler:
 #	We are UpToDate, but we potentially need to wait for a DC election.
 #	Once we have contacted the DC, we poll the cib until the peer is
 #	confirmed unreachable, or crmadmin -S confirms it as reachable,
 #	or timeout expired.
 #	Then we place the constraint, and are done.
 #
 #	If it is complete communications loss, one will stonith the other.
 #	For two-node clusters with no-quorum-policy=ignore, we will have a
 #	deathmatch shoot-out, which the former DC is likely to win.
 #
 #	In dual-primary setups, if it is only replication link loss, both nodes
 #	will call this handler, but only one will succeed to place the
 #	constraint. The other will then typically need to "commit suicide".
 #	With stonith enabled, and --suicide-on-failure-if-primary,
 #	we will trigger a node level fencing, telling
 #	pacemaker to "terminate" that node,
 #	and scheduling a reboot -f just in case.
 #
 #    Primary crash, promotion of former Secondary:
 #	DC-election, if any, will have taken place already.
 #	We are UpToDate, we place the constraint, done.
 #
 #    node or cluster crash, promotion of Secondary with replication link down:
 #	We are "Only" Consistent.  Usually any "init-dead-time" or similar has
 #	expired already, and the cib node states are already authoritative
 #	without doing additional waiting.  If the peer is still reachable, we
 #	place the constraint - if the peer had better data, it should have a
 #	higher master score, and we should not have been asked to become
 #	primary.  If the peer is not reachable, we don't do anything, and DRBD
 #	will refuse to be promoted. This is necessary to avoid problems
 #	With data diversion, in case this "crash" was due to a STONITH operation,
 #	maybe the reboot did not fix our cluster communications!
 #
 #	Note that typically, if STONITH is in use, it has been done on any
 #	unreachable node _before_ we are promoted, so the cib should already
 #	know that the peer is dead - if it is.
 #

 # slightly different logic than crm_is_true
 crm_is_not_false()
 {
 	case $1 in
 	no|n|false|0|off)
 		false ;;
 	*)
 		true ;;
 	esac
 }

 check_cluster_properties()
 {
 	local x properties=$(set +x; echo "$cib_xml" |
 		sed -n -e '/<crm_config/,/<\/crm_config/ !d;' \
 			-e '/<cluster_property_set/,/<\/cluster_property_set/ !d;' \
 			-e '/<nvpair / !d' \
 			-e 's/^.* name="\([^"]*\)".* value="\([^"]*\)".*$/\1=\2/p' \
 			-e 's/^.* value="\([^"]*\)".* name="\([^"]*\)".*$/\2=\1/p')

 	for x in $properties ; do
 		case $x in
 		startup[-_]fencing=*)	startup_fencing=${x#*=} ;;
 		stonith[-_]enabled=*)	stonith_enabled=${x#*=} ;;
 		esac
 	done

 	crm_is_not_false $startup_fencing && startup_fencing=true || startup_fencing=false
 	crm_is_not_false $stonith_enabled && stonith_enabled=true || stonith_enabled=false
 }


 #
 # In case this is a two-node cluster (still common with
 # DRBD clusters) it does not have real quorum.
 # If it is configured to do STONITH, and reboot,
 # and after reboot that STONITHed node cluster comm is
 # still broken, it will shoot the still online node,
 # and try to go online with stale data.
 # Exactly what this "fence" handler should prevent.
 # But setting constraints in a cluster partition with
 # "no-quorum-policy=ignore" will usually succeed.
 #
 # So we need to differentiate between node reachable or
 # not, and DRBD "Consistent" or "UpToDate".
 #
 try_place_constraint()
 {
 	local peer_state

 	rc=1

 	while :; do
 		check_peer_node_reachable
 		[[ $peer_state != "reachable" ]] && break
 		# if it really is still reachable, maybe the replication link
 		# recovers by itself, and we can get away without taking action?
 		(( $net_hickup_time > $SECONDS )) || break
 		sleep $(( net_hickup_time - SECONDS ))
 	done

 	set_states_from_proc_drbd
 	: == DEBUG == DRBD_peer=${DRBD_peer[*]} ===
 	: == DEBUG == DRBD_pdsk=${DRBD_pdsk[*]} ===
 	if $DRBD_pdsk_all_uptodate ; then
 		echo WARNING "All peer disks are UpToDate! Did not place the constraint."
 		rc=0
 		return
 	fi

 	: == DEBUG == CTS_mode=$CTS_mode ==
 	: == DEBUG == DRBD_disk_all_consistent=$DRBD_disk_all_consistent ==
 	: == DEBUG == DRBD_disk_all_uptodate=$DRBD_disk_all_uptodate ==
 	: == DEBUG == $peer_state/${DRBD_disk[*]}/$unreachable_peer_is ==
 	if [[ ${#DRBD_disk[*]} = 0 ]]; then
 		# Someone called this script, without the corresponding drbd
 		# resource being configured. That's not very useful.
 		echo WARNING "could not determine my disk state: did not place the constraint!"
 		rc=0
 		# keep drbd_fence_peer_exit_code at "generic error",
 		# which will cause a "script is broken" message in case it was
 		# indeed called as handler from within drbd

 	# No, NOT fenced/Consistent:
 	# just because we have been able to shoot him
 	# does not make our data any better.
 	elif [[ $peer_state = reachable ]] && $DRBD_disk_all_consistent; then
 		#           = reachable ]] && $DRBD_disk_all_uptodate
 		#	is implicitly handled here as well.
 		set_constraint &&
 		drbd_fence_peer_exit_code=4 rc=0 &&
 		echo INFO "peer is $peer_state, my disk is ${DRBD_disk[*]}: placed constraint '$id_prefix-$master_id'"

 	elif [[ $peer_state = fenced ]] && $DRBD_disk_all_uptodate ; then
 		set_constraint &&
 		drbd_fence_peer_exit_code=7 rc=0 &&
 		echo INFO "peer is $peer_state, my disk is $DRBD_disk: placed constraint '$id_prefix-$master_id'"

 	# Peer is neither "reachable" nor "fenced" (above would have matched)
 	# So we just hit some timeout.
 	# As long as we are UpToDate, place the constraint and continue.
 	# If you don't like that, use a ridiculously high timeout,
 	# or patch this script.
 	elif $DRBD_disk_all_uptodate ; then
 		# We could differentiate between unreachable,
 		# and DC-unreachable.  In the latter case, placing the
 		# constraint will fail anyways, and  drbd_fence_peer_exit_code
 		# will stay at "generic error".
 		set_constraint &&
 		drbd_fence_peer_exit_code=5 rc=0 &&
 		echo INFO "peer is not reachable, my disk is UpToDate: placed constraint '$id_prefix-$master_id'"

 	# This block is reachable by operator intervention only
 	# (unless you are hacking this script and know what you are doing)
 	elif [[ $peer_state != reachable ]] && [[ $unreachable_peer_is = outdated ]] && $DRBD_disk_all_consistent; then
 		# If the peer is not reachable, but we are only Consistent, we
 		# may need some way to still allow promotion.
 		# Easy way out: --force primary with drbdsetup.
 		# But that would not place the constraint, nor outdate the
 		# peer.  With this --unreachable-peer-is-outdated, we still try
 		# to set the constraint.  Next promotion attempt will find the
 		# "correct" constraint, consider the peer as successfully
 		# fenced, and continue.
 		set_constraint &&
 		drbd_fence_peer_exit_code=5 rc=0 &&
 		echo WARNING "peer is unreachable, my disk is only Consistent: --unreachable-peer-is-outdated FORCED constraint '$id_prefix-$master_id'" &&
 		echo WARNING "This MAY RISK DATA INTEGRITY"

 	# So I'm not UpToDate, and peer is not reachable.
 	# Tell the module about "not reachable", and don't do anything else.
 	else
 		echo WARNING "peer is $peer_state, my disk is ${DRBD_disk[*]}: did not place the constraint!"
 		drbd_fence_peer_exit_code=5 rc=0
 		# I'd like to return 6 here, otherwise pacemaker will retry
 		# forever to promote, even though 6 is not strictly correct.
 	fi
 	return $rc
 }

 commit_suicide()
 {
 	local reboot_timeout=20
 	local extra_msg

 	if $stonith_enabled ; then
 		# avoid double fence, tell pacemaker to kill me
 		echo WARNING "trying to have pacemaker kill me now!"
 		crm_attribute -t status -N $HOSTNAME -n terminate -v 1
 		echo WARNING "told pacemaker to kill me, but scheduling reboot -f in 300 seconds just in case"

 		# -------------------------
 		echo WARNING $'\n'"    told pacemaker to kill me,"\
 			     $'\n'"    but scheduling reboot -f in 300 seconds just in case."\
 			     $'\n'"    kill $$ # to cancel" | wall
 		# -------------------------

 		reboot_timeout=300
 		extra_msg="Pacemaker terminate pending. If that fails, I'm "

 	else
 		# -------------------------
 		echo WARNING $'\n'"    going to reboot -f in $reboot_timeout seconds"\
 			     $'\n'"    kill $$ # to cancel!" | wall
 		# -------------------------
 	fi

 	reboot_timeout=$(( reboot_timeout + SECONDS ))
 	# pacemaker apparently cannot kill me.
 	while (( $SECONDS < $reboot_timeout )); do
 		echo WARNING "${extra_msg}going to reboot -f in $(( reboot_timeout - SECONDS )) seconds! To cancel: kill $$"
 		sleep 2
 	done
 	echo WARNING "going to reboot -f now!"
 	reboot -f
 	sleep 864000
 }

 # drbd_peer_fencing fence|unfence
 drbd_peer_fencing()
 {
 	local rc
 	# input to fence_peer_init:
 	# $DRBD_RESOURCE is set by command line of from environment.
 	# $id_prefix is set by command line or default.
 	# $master_id is set by command line or will be parsed from the cib.
 	# output of fence_peer_init:
 	local have_constraint new_constraint

 	# if I cannot query the local cib, give up
 	get_cib_xml -Ql || return
 	fence_peer_init || return

 	if [[ $1 = fence ]] || $unfence_only_if_owner_match ; then
 		if [[ $fencing_attribute = "#uname" ]]; then
 			fencing_value=$HOSTNAME
 		elif ! fencing_value=$(crm_attribute -Q -t nodes -n $fencing_attribute 2>/dev/null); then
 			fencing_attribute="#uname"
 			fencing_value=$HOSTNAME
 		fi
 		# double negation: do not run but with my data.
 		new_constraint="\
 <rsc_location rsc=\"$master_id\" id=\"$id_prefix-$master_id\">
   <rule role=\"$role\" score=\"-INFINITY\" id=\"$id_prefix-rule-$master_id\">
     <expression attribute=\"$fencing_attribute\" operation=\"ne\" value=\"$fencing_value\" id=\"$id_prefix-expr-$master_id\"/>
   </rule>
 </rsc_location>"
 	fi

 	case $1 in
 	fence)

 		local startup_fencing stonith_enabled
 		check_cluster_properties

 		if [[ -z $have_constraint ]] ; then
 			# try to place it.

 			try_place_constraint && return

 			# maybe callback and operator raced for the same constraint?
 			# before we potentially trigger node level fencing
 			# or keep IO frozen, double check.
 			# try_place_constraint has updated cib_xml from DC

 			have_constraint=$(set +x; echo "$cib_xml" |
 				sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")
 		fi

 		if [[ "$have_constraint" = "$(set +x; echo "$new_constraint" |
 			sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")" ]]; then
 			echo INFO "suitable constraint already placed: '$id_prefix-$master_id'"
 			drbd_fence_peer_exit_code=4
 			rc=0
 		elif [[ -n "$have_constraint" ]] ; then
 			# if this id already exists, but looks different, we may have lost a shootout
 			echo WARNING "constraint $have_constraint already exists"
 			# anything != 0 will do;
 			# 21 happend to be "The object already exists" with my cibadmin
 			rc=21

 			# maybe: drbd_fence_peer_exit_code=6
 			# as this is not the constraint we'd like to set,
 			# it is likely the inverse, so we probably can assume
 			# that the peer is active primary, or at least has
 			# better data than us, and wants us outdated.
 		fi

 		if [[ $rc != 0 ]]; then
 			# at least we tried.
 			# maybe it was already in place?
 			echo WARNING "DATA INTEGRITY at RISK: could not place the fencing constraint!"
 		fi

 		# XXX policy decision:
 		if $suicide_on_failure_if_primary && [[ $drbd_fence_peer_exit_code != [3457] ]]; then
 			set_states_from_proc_drbd
 			[[ "${DRBD_role[*]}" = *Primary* ]] && commit_suicide
 		fi

 		return $rc
 		;;
 	unfence)
 		if [[ -n $have_constraint ]]; then
 			set_states_from_proc_drbd
 			if $DRBD_disk_all_uptodate && $DRBD_pdsk_all_uptodate; then
 				if $unfence_only_if_owner_match && [[ "$have_constraint" != "$(set +x; echo "$new_constraint" |
 					sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")" ]]
 				then
 					echo WARNING "Constraint owner does not match, leaving constraint in place."
 				else
 					# try to remove it based on that xml-id
 					remove_constraint && echo INFO "Removed constraint '$id_prefix-$master_id'"
 				fi
 			else
 				local w="My"
 				$DRBD_disk_all_uptodate && w="Peer's"
 				echo WARNING "$w disk(s) are NOT all UpToDate, leaving constraint in place."
 				return 1
 			fi
 		else
 			$quiet || echo "No constraint in place, nothing to do."
 			return 0
 		fi
 	esac
 }

 double_check_after_fencing()
 {
 	set_states_from_proc_drbd
 	: == DEBUG == DRBD_peer=${DRBD_peer[*]} ===
 	: == DEBUG == DRBD_pdsk=${DRBD_pdsk[*]} ===
 	if $DRBD_pdsk_all_uptodate ; then
 		echo WARNING "All peer disks are UpToDate (again), trying to remove the constraint again."
 		remove_constraint && drbd_fence_peer_exit_code=1 rc=0
 		return
 	fi
 }

 guess_if_pacemaker_will_fence()
 {
 	# try to guess whether it is useful to wait and poll again,
 	# (node fencing in progress...),
 	# or if pacemaker thinks the node is "clean" dead.
 	local x

 	# "return values:"
 	crmd='' in_ccm='' expected='' join='' will_fence=false

 	# Older pacemaker has an "ha" attribute, too.
 	# For stonith-enabled=false, the "crmd" attribute may stay "online",
 	# but once ha="dead", we can stop waiting for changes.
 	ha_dead=false

 	node_state=${node_state%>}
 	node_state=${node_state%/}
 	for x in ${node_state} ; do
 		case $x in
 		in_ccm=\"*\")	x=${x#*=\"}; x=${x%\"}; in_ccm=$x ;;
 		crmd=\"*\")	x=${x#*=\"}; x=${x%\"}; crmd=$x ;;
 		expected=\"*\")	x=${x#*=\"}; x=${x%\"}; expected=$x ;;
 		join=\"*\")	x=${x#*=\"}; x=${x%\"}; join=$x ;;
 		ha=\"dead\")	ha_dead=true ;;
 		esac
 	done

 	# if it is not enabled, no point in waiting for it.
 	if ! $stonith_enabled ; then
 		# "normalize" the rest of the logic
 		# where this is called.
 		# for stonith-enabled=false, and ha="dead",
 		# reset crmd="offline".
 		# Then we stop polling the cib for changes.

 		$ha_dead && crmd="offline"
 		return
 	fi

 	if [[ -z $node_state ]] ; then
 		# if we don't know nothing about the peer,
 		# and startup_fencing is explicitly disabled,
 		# no fencing will take place.
 		$startup_fencing || return
 	fi

 	# for further inspiration, see pacemaker:lib/pengine/unpack.c, determine_online_status_fencing()
 	[[ -z $in_ccm ]] && will_fence=true
 	[[ $crmd = "banned" ]] && will_fence=true
 	if [[ ${expected-down} = "down" && $in_ccm = "false"  && $crmd != "online" ]]; then
 		: "pacemaker considers this as clean down"
 	elif [[ $in_ccm = false ]] || [[ $crmd != "online" ]]; then
 		will_fence=true
 	fi
 }

 # return value in $peer_state:
 # DC-unreachable
 #	We have not been able to contact the DC.
 # fenced
 #	According to the node_state recorded in the cib,
 #	the peer is offline and expected down
 #	(which means successfully fenced, if stonith is enabled)
 # reachable
 #	cib says it's online, and crmadmin -S says peer state is "ok"
 # unreachable
 #	cib says it's offline (but does not yet say "expected" down)
 #	and we reached the timeout
 # unknown
 #	cib does not say it was offline (or we don't know who the peer is)
 #	and we reached the timeout
 #
 check_peer_node_reachable()
 {
 	# we are going to increase the cib timeout with every timeout (see below).
 	# for the actual invocation, we use int(cibtimeout/10).
 	# scaled by 5 / 4 with each iteration,
 	# this results in a timeout sequence of 1 2 2 3 4 5 6 7 9 ... seconds
 	local cibtimeout=18
 	local full_timeout
 	local nr_other_nodes
 	local other_node_uname_attrs

 	# we have a cibadmin -Ql in cib_xml already
 	# filter out <node uname, but ignore type="ping" nodes,
 	# they don't run resources
 	other_node_uname_attrs=$(set +x; echo "$cib_xml" |
 		sed -e '/<node /!d; / type="ping"/d;s/^.* \(uname="[^"]*"\).*>$/\1/' |
 		grep -v -F uname=\"$HOSTNAME\")
 	set -- $other_node_uname_attrs
 	nr_other_nodes=$#

 	while :; do
 		local state_lines='' node_state='' crmd='' in_ccm=''
 		local expected='' join='' will_fence='' ha_dead=''

 		while :; do
 			local t=$SECONDS
 			#
 			# Update our view of the cib, ask the DC this time.
 			# Timeout, in case no DC is available.
 			# Caution, some cibadmin (pacemaker 0.6 and earlier)
 			# apparently use -t use milliseconds, so will timeout
 			# many times until a suitably long timeout is reached
 			# by increasing below.
 			#
 			# Why not use the default timeout?
 			# Because that would unecessarily wait for 30 seconds
 			# or longer, even if the DC is re-elected right now,
 			# and available within the next second.
 			#
 			get_cib_xml -Q -t $(( cibtimeout/10 )) && break

 			# bash magic $SECONDS is seconds since shell invocation.
 			if (( $SECONDS > $dc_timeout )) ; then
 				# unreachable: cannot even reach the DC
 				peer_state="DC-unreachable"
 				return
 			fi

 			# avoid busy loop
 			[[ $t = $SECONDS ]] && sleep 1

 			# try again, longer timeout.
 			let "cibtimeout = cibtimeout * 5 / 4"
 		done
 		state_lines=$( set +x; echo "$cib_xml" | grep '<node_state ' |
 			grep -F -e "$other_node_uname_attrs" )

 		if $CTS_mode; then
 			# CTS requires startup-fencing=false.
 			# For PartialStart, NearQuorumPoint and similar tests,
 			# we would likely stay Consistent, and refuse to Promote.
 			# And CTS would be very unhappy.
 			# Pretend that the peer was reachable if we are missing a node_state entry for it.
 			if [[ $DRBD_PEER ]] && ! echo "$state_lines" | grep -q -F uname=\"$DRBD_PEER\" ; then
 				peer_state="reachable"
 				echo WARNING "CTS-mode: pretending that unseen node $DRBD_PEER was reachable"
 				return
 			fi
 		fi

 		# very unlikely: no DRBD_PEER passed in,
 		# but in fact only one other cluster node.
 		# Use that one as DRBD_PEER.
 		if [[ -z $DRBD_PEER ]] && [[ $nr_other_nodes = 1 ]]; then
 			DRBD_PEER=${other_node_uname_attrs#uname=\"}
 			DRBD_PEER=${DRBD_PEER%\"}
 		fi

 		if [[ -z $DRBD_PEER ]]; then
 			# Multi node cluster, but unknown DRBD Peer.
 			# This should not be a problem, unless you have
 			# no_quorum_policy=ignore in an N > 2 cluster.
 			# (yes, I've seen such beasts in the wild!)
 			# As we don't know the peer,
 			# we could only safely return here if *all*
 			# potential peers are confirmed down.
 			# Don't try to be smart, just wait for the full
 			# timeout, which should allow STONITH to
 			# complete.
 			full_timeout=$(( $timeout - $SECONDS ))
 			if (( $full_timeout > 0 )) ; then
 				echo WARNING "don't know who my peer is; sleep $full_timeout seconds just in case"
 				sleep $full_timeout
 			fi

 			# In the unlikely case that we don't know our DRBD peer,
 			#	there is no point in polling the cib again,
 			#	that won't teach us who our DRBD peer is.
 			#
 			#	We waited $full_timeout seconds already,
 			#	to allow for node level fencing to shoot us.
 			#
 			#	So if we are still alive, then obviously no-one has shot us.
 			#

 			peer_state="unknown"
 			return
 		fi

 		#
 		# we know the peer or/and are a two node cluster
 		#

 		node_state=$(set +x; echo "$state_lines" | grep -F uname=\"$DRBD_PEER\")

 		# populates in_ccm, crmd, exxpected, join, will_fence=[false|true]
 		guess_if_pacemaker_will_fence

 		if ! $will_fence && [[ $crmd != "online" ]] ; then

 			# "legacy" cman + pacemaker clusters older than 1.1.10
 			# may "forget" about startup fencing.
 			# We can detect this because the "expected" attribute is missing.
 			# Does not make much difference for our logic, though.
 			[[ $expected/$in_ccm = "down/false" ]] && peer_state="fenced" || peer_state="unreachable"

 			return
 		fi

 		# So the cib does still indicate the peer was reachable.
 		#
 		# try crmadmin; if we can sucessfully query the state of the remote crmd,
 		# it is obviously reachable.
 		#
 		# Do this only after we have been able to reach a DC above.
 		# Note: crmadmin timeout is in milli-seconds, and defaults to 30000 (30 seconds).
 		# Our variable $cibtimeout should be in deci-seconds (see above)
 		# (unless you use a very old version of pacemaker, so don't do that).
 		# Convert deci-seconds to milli-seconds, and double it.
 		if [[ $crmd = "online" ]] ; then
 			local out
 			if out=$( crmadmin -t $(( cibtimeout * 200 )) -S $DRBD_PEER ) \
 			&& [[ $out = *"(ok)" ]]; then
 				peer_state="reachable"
 				return
 			fi
 		fi

 		# We know our DRBD peer.
 		# We are still not sure about its status, though.
 		#
 		# It is not (yet) "expected down" per the cib, but it is not
 		# reliably reachable via crmadmin -S either.
 		#
 		# If we already polled for longer than timeout, give up.
 		#
 		# For a resource-and-stonith setup, or dual-primaries (which
 		# you should only use with resource-and-stonith, anyways),
 		# the recommended timeout is larger than the deadtime or
 		# stonith timeout, and according to beekhof maybe should be
 		# tuned up to the election-timeout (which, btw, defaults to 2
 		# minutes!).
 		#
 		if (( $SECONDS >= $timeout )) ; then
 			[[ $crmd = offline ]] && peer_state="unreachable" || peer_state="unknown"
 			return
 		fi

 		# wait a bit before we poll the DC again
 		sleep 2
 	done
 	# NOT REACHED
 }

 set_states_from_proc_drbd()
 {
 	local IFS line lines i disk pdsk
 	# DRBD_MINOR exported by drbdadm since 8.3.3
 	[[ $DRBD_MINOR ]] || DRBD_MINOR=$(drbdadm ${DRBD_CONF:+ -c "$DRBD_CONF"} sh-minor $DRBD_RESOURCE) || return

 	# if we have more than one minor, do a word split, ...
 	set -- $DRBD_MINOR
 	# ... and convert into regex:
 	IFS="|$IFS"; DRBD_MINOR="($*)"; IFS=${IFS#?}

 	# We must not recurse into netlink,
 	# this may be a callback triggered by "drbdsetup primary".
 	# grep /proc/drbd instead

 	DRBD_peer=()
 	DRBD_role=()
 	DRBD_disk=()
 	DRBD_pdsk=()
 	DRBD_disk_all_uptodate=true
 	DRBD_disk_all_consistent=true
 	DRBD_pdsk_all_uptodate=true

 	IFS=$'\n'
 	lines=($(sed -nre "/^ *$DRBD_MINOR: cs:/ { s/:/ /g; p; }" /proc/drbd))
 	IFS=$' \t\n'

 	i=0
 	for line in "${lines[@]}"; do
 		set -- $line
 		DRBD_peer[i]=${5#*/}
 		DRBD_role[i]=${5%/*}
 		pdsk=${7#*/}
 		disk=${7%/*}
 		DRBD_disk[i]=${disk:-Unconfigured}
 		DRBD_pdsk[i]=${pdsk:-DUnknown}
 		case $disk in
 		UpToDate) ;;
 		Consistent)
 			DRBD_disk_all_uptodate=false ;;
 		*)
 			DRBD_disk_all_uptodate=false
 			DRBD_disk_all_consistent=false ;;
 		esac
 		[[ $pdsk != UpToDate ]] && DRBD_pdsk_all_uptodate=false
 		let i++
 	done
 	if (( i == 0 )) ; then
 		DRBD_pdsk_all_uptodate=false
 		DRBD_disk_all_uptodate=false
 		DRBD_disk_all_consistent=false
 	fi
 }
 ############################################################

 # try to get possible output on stdout/err to syslog
 PROG=${0##*/}
 redirect_to_logger()
 {
 	local lf=${1:-local5}
 	case $lf in
 	# do we want to exclude some?
 	auth|authpriv|cron|daemon|ftp|kern|lpr|mail|news|syslog|user|uucp|local[0-7])
 		: OK ;;
 	*)
 		echo >&2 "invalid logfacility: $lf"
 		return
 		;;
 	esac
 	# Funky redirection to avoid logger feeding its own output to itself accidentally.
 	# Funky double exec to avoid an intermediate sub-shell.
 	# Sometimes, the sub-shell lingers around, keeps file descriptors open,
 	# and logger then won't notice the main script has finished,
 	# forever waiting for further input.
 	# The second exec replaces the subshell, and logger will notice directly
 	# when its stdin is closed once the main script exits.
 	# This avoids the spurious logger processes.
 	exec > >( exec 1>&- 2>&- logger -t "$PROG[$$]" -p $lf.info ) 2>&1
 }
 if [[ $- != *x* ]]; then
 	# you may override with --logfacility below
 	redirect_to_logger local5
 fi

 # clean environment just in case.
 unset fencing_attribute id_prefix timeout dc_timeout unreachable_peer_is
 unset flock_timeout flock_required lock_dir lock_file
 quiet=false
 unfence_only_if_owner_match=false
 CTS_mode=false
 suicide_on_failure_if_primary=false

 # poor mans command line argument parsing,
 # allow for command line overrides
 set -- "$@" $OCF_RESKEY_unfence_extra_args
 while [[ $# != 0 ]]; do
 	case $1 in
 	--logfacility=*)
 		redirect_to_logger ${1#*=}
 		;;
 	--logfacility)
 		redirect_to_logger $2
 		shift
 		;;
 	--resource=*)
 		DRBD_RESOURCE=${1#*=}
 		;;
 	-r|--resource)
 		DRBD_RESOURCE=$2
 		shift
 		;;
 	--master-id=*)
 		master_id=${1#*=}
 		;;
 	-i|--master-id)
 		master_id=$2
 		shift
 		;;
 	--role=*)
 		role=${1#*=}
 		;;
 	-l|--role)
 		role=${2}
 		shift
 		;;
 	--fencing-attribute=*)
 		fencing_attribute=${1#*=}
 		;;
 	-a|--fencing-attribute)
 		fencing_attribute=$2
 		shift
 		;;
 	--id-prefix=*)
 		id_prefix=${1#*=}
 		;;
 	-p|--id-prefix)
 		id_prefix=$2
 		shift
 		;;
 	--timeout=*)
 		timeout=${1#*=}
 		;;
 	-t|--timeout)
 		timeout=$2
 		shift
 		;;
 	--dc-timeout=*)
 		dc_timeout=${1#*=}
 		;;
 	-d|--dc-timeout)
 		dc_timeout=$2
 		shift
 		;;
 	--quiet)
 		quiet=true
 		;;
 	--unfence-only-if-owner-match)
 		unfence_only_if_owner_match=true
 		;;
 	--flock-required)
 		flock_required=true
 		;;
 	--flock-timeout=*)
 		flock_timeout=${1#*=}
 		;;
 	--flock-timeout)
 		flock_timeout=$2
 		shift
 		;;
 	--lock-dir=*)
 		lock_dir=${1#*=}
 		;;
 	--lock-dir)
 		lock_dir=$2
 		shift
 		;;
 	--lock-file=*)
 		lock_file=${1#*=}
 		;;
 	--lock-file)
 		lock_file=$2
 		shift
 		;;
 	--net-hickup=*|--network-hickup=*)
 		net_hickup_time=${1#*=}
 		;;
 	--net-hickup|--network-hickup)
 		net_hickup_time=$2
 		shift
 		;;
 	--CTS-mode)
 		CTS_mode=true
 		;;
 	--unreachable-peer-is-outdated)
 		# This is NOT to be scripted.
 		# Or people will put this into the handler definition in
 		# drbd.conf, and all this nice work was useless.
 		test -t 0 &&
 		unreachable_peer_is=outdated
 		;;
 	--suicide-on-failure-if-primary)
 		suicide_on_failure_if_primary=true
 		;;
 	-*)
 		echo >&2 "ignoring unknown option $1"
 		;;
 	*)
 		echo >&2 "ignoring unexpected argument $1"
 		;;
 	esac
 	shift
 done

 #
 # Sanitize lock_file and lock_dir
 #
 if [[ ${lock_dir:=/var/lock/drbd} != /* ]] ; then
 	echo WARNING "lock_dir needs to be an absolute path, not [$lock_dir]; using default."
 	lock_dir=/var/lock/drbd
 fi
 case $lock_file in
 "")	lock_file=$lock_dir/fence.${DRBD_RESOURCE//\//_} ;;
 NONE)	: ;;
 /*)	: ;;
 *)	lock_file=$lock_dir/$lock_file ;;
 esac
 if [[ $lock_file != NONE && $lock_file != $lock_dir/* ]]; then
 	lock_dir=${lock_file%/*}; : ${lock_dir:=/}
 	: == DEBUG == "override: lock_dir=$lock_dir to match lock_file=$lock_file"
 fi

 # DRBD_RESOURCE: from environment
 # master_id: parsed from cib

 : "== unreachable_peer_is == ${unreachable_peer_is:=unknown}"
 # apply defaults:
 : "== fencing_attribute   == ${fencing_attribute:="#uname"}"
 : "== id_prefix           == ${id_prefix:="drbd-fence-by-handler"}"
 : "== role                == ${role:="Master"}"

 # defaults suitable for most cases
 : "== net_hickup_time     == ${net_hickup_time:=0}"
 : "== timeout             == ${timeout:=90}"
 : "== dc_timeout          == ${dc_timeout:=20}"
 : "== flock_timeout       == ${flock_timeout:=120}"
 : "== flock_required      == ${flock_required:=false}"
 : "== lock_file           == ${lock_file}"
 : "== lock_dir            == ${lock_dir}"


 # check envars normally passed in by drbdadm
 # TODO DRBD_CONF is also passed in.  we may need to use it in the
 # xpath query, in case someone is crazy enough to use different
 # conf files with the _same_ resource name.
 # for now: do not do that, or hardcode the cib id of the master
 # in the handler section of your drbd conf file.
 for var in DRBD_RESOURCE; do
 	if [ -z "${!var}" ]; then
 		echo "Environment variable \$$var not found (this is normally passed in by drbdadm)." >&2
 		exit 1
 	fi
 done

 # Fixup id-prefix to include the resource name
 # There may be multiple drbd instances part of the same M/S Group, pointing to
 # the same master-id. Still they need to all have their own constraint, to be
 # able to unfence independently when they finish their resync independently.
 # Be nice to people who already explicitly configure an id prefix containing
 # the resource name.
 if [[ $id_prefix != *"-$DRBD_RESOURCE" ]] ; then
 	id_prefix="$id_prefix-$DRBD_RESOURCE"
 	: "== id_prefix           == ${id_prefix}"
 fi

 # make sure it contains what we expect
 HOSTNAME=$(uname -n)

 $quiet || {
 	for k in ${!DRBD_*} UP_TO_DATE_NODES; do printf "%s=%q " "$k" "${!k}"; done
 	printf '%q' "$0"
 	[[ $# != 0 ]] && printf ' %q' "$@"
 	printf '\n'
 }

 # to be set by drbd_peer_fencing()
 drbd_fence_peer_exit_code=1

 got_flock=false
 if [[ $lock_file != NONE ]] ; then
 	test -d "$lock_dir"			||
 		mkdir -p -m 0700 "$lock_dir"	||
 		echo WARNING "mkdir -p $lock_dir failed"

 	if exec 9>"$lock_file" && flock --exclusive --timeout $flock_timeout 9
 	then
 		got_flock=true
 	else
 		echo WARNING "Could not get flock on $lock_file"
 		$flock_required && exit 1

 		# If I cannot get the lock file, I can at least still try to place the constraint
 	fi
 	: == DEBUG == $SECONDS seconds, got_flock=$got_flock ==
 fi

 case $PROG in
     crm-fence-peer.sh)
 	if drbd_peer_fencing fence; then
 		: == DEBUG == $cibadmin_invocations cibadmin calls ==
 		: == DEBUG == $SECONDS seconds ==
 		[[ $drbd_fence_peer_exit_code = [347] ]] && double_check_after_fencing
 		exit $drbd_fence_peer_exit_code
 	fi
 	;;
     crm-unfence-peer.sh)
 	if drbd_peer_fencing unfence; then
 		: == DEBUG == $cibadmin_invocations cibadmin calls ==
 		: == DEBUG == $SECONDS seconds ==
 		exit 0
 	fi
 esac 9>&- # Don't want to "leak" the lock fd to child processes.

 # 1: unexpected error
 exit 1