drbd-utils/scripts/crm-fence-peer.9.sh - actifio - Git at Google

 #!/bin/bash
 #

 export LANG=C LC_ALL=C TZ=":/etc/localtime"

 grep_rsc_location()
 {
 	# expected input: exactly one tag per line: "^[[:space:]]*<.*/?>$"
 	sed -ne '
 	# within the rsc_location constraint with that id,
 	/<rsc_location .*\bid="'"$1"'"/, /<\/rsc_location>/ {
 		# make sure expressions have their attributes ordered
 		# as we expect them later
 		s/\(<expression\)\( .*\)\( attribute="[^"]*"\)/\1\3\2/
 		s/\(<expression attribute="[^"]*"\)\( .*\)\( operation="[^"]*"\)/\1\3\2/
 		s/\(<expression attribute="[^"]*" operation="[^"]*"\)\( .*\)\( value="[^"]*"\)/\1\3\2/
 		p;
 		/<\/rsc_location>/q # done, if closing tag is found
 	}'
 }

 sed_rsc_location_suitable_for_string_compare()
 {
 	# expected input: exactly one tag per line: "^[[:space:]]*<.*/?>$"
 	sed -ne '
 	# within the rsc_location constraint with that id,
 	/<rsc_location .*\bid="'"$1"'"/, /<\/rsc_location>/ {
 		/<\/rsc_location>/q # done, if closing tag is found
 		s/^[[:space:]]*//   # trim spaces
 		s/ *\bid="[^"]*"//  # remove id tag
 		/^<!--/d # remove comments
 		# print each attribute on its own line, by
 		: attr
 		h # remember the current (tail of the) line
 		# remove all but the first attribute, and print,
 		s/^\([^[:space:]]*[[:space:]][^= ]*="[^"]*"\).*$/\1/p
 		g # then restore the remembered line,
 		# and remove the first attribute.
 		s/^\([^[:space:]]*\)[[:space:]][^= ]*="[^"]*"\(.*\)$/\1\2/
 		# then repeat, until no more attributes are left
 		t attr
 	}' | sort
 }

 cibadmin_invocations=0
 remove_constraint()
 {
 	cibadmin_invocations=$(( $cibadmin_invocations + 1 ))
 	cibadmin -D -X "<rsc_location rsc=\"$master_id\" id=\"$id_prefix-$master_id\"/>"
 }

 restrict_existing_constraint_further()
 {
 	[[ ${#EXCLUDE_NODES[@]} != 0 ]] || return

 	new_constraint=$have_constraint

 	# compare with setup_new_constraint()
 	local i n a v
 	for i in "${!EXCLUDE_NODES[@]}"; do
 		n=${EXCLUDE_NODES[i]}
 		a=${ATTRIBUTES[i]:-}
 		if [[ -z "$a" ]] ; then
 			a=$fencing_attribute
 			if [[ $a = "#uname" ]]; then
 				v=$n
 			elif ! v=$(crm_attribute -Q -t nodes -N $n -n $a 2>/dev/null); then
 				# FALLBACK.
 				a="#uname"
 				v=$n
 			fi
 			ATTRIBUTES[i]=$a
 			VALUES[i]=$v
 		else
 			v=${VALUES[i]}
 		fi
 		# see grep_rsc_location(), which is supposed to fix the order of xml attributes
 		new_constraint=$(set +x; echo "$new_constraint" |
 			grep -v "<expression attribute=\"$a\" operation=\"ne\" value=\"$v\"")
 	done
 }

 create_or_modify_constraint()
 {
 	local DIR
 	local ex=1

 	DIR=$(mktemp -d)
 	cd "$DIR" || exit 1

 	cleanup()
 	{
 		trap - EXIT HUP INT QUIT TERM
 		local ex=$? sig=${1:-}
 		cd - && rm -rf "$DIR"
 		[[ $sig = EXIT ]] && exit $ex
 		[[ $sig ]] && kill -$sig $$
 	}

 	trap "cleanup EXIT" EXIT
 	trap "cleanup HUP" HUP
 	trap "cleanup INT" INT
 	trap "cleanup QUIT" QUIT
 	trap "cleanup TERM" TERM

 	local create_modify_replace="--modify --allow-create"
 	while :; do
 # ==================================================================
 		cibadmin -Q | tee cib.xml.orig > cib.xml
 		export CIB_file=cib.xml

 		set -- $( crm_mon -1nL "$id_prefix-$master_id" | sed -n \
 			-e '/^Current DC:.*partition with quorum/ { s/.*/quorum=1/p };' \
 			-e '1,/^Negative Location Constraints:/ d' \
 			-e '/^ *\([^[:space:]]*\)[[:space:]]prevents '"$master_id"' from running.*on '"$HOSTNAME"'$/ { s/.*/already_rejected/p }' )

 		if [[ $# != 1 || $1 != "quorum=1" ]] ; then
 			: "sorry, want a quorate partition, and not be rejected by constraint already"
 			break
 		fi

 		if [[ $ACTION = fence ]]; then
 			# fence should only restrict further, not lift restrictions.
 			# There may have been a race between multiple instances of this script.
 			have_constraint=$(grep_rsc_location "$id_prefix-$master_id" < $CIB_file)
 			if [[ -n "$have_constraint" ]] ; then
 				create_modify_replace="--replace"
 				restrict_existing_constraint_further
 			fi
 		fi

 		# comments seem to not work in any sane way yet :-(
 		# new_constraint=${new_constraint/>/$'>\n'"<!-- $ACTION at $start_time_utc on $HOSTNAME mask $UP_TO_DATE_NODES -->"}
 		cibadmin $create_modify_replace -o constraints -X "$new_constraint"

 		crm_diff=$(crm_diff -o $CIB_file.orig -n $CIB_file)
 		unset CIB_file
 # ==================================================================

 		cibadmin_invocations=$(( $cibadmin_invocations + 1 ))
 		echo "$crm_diff" | cibadmin --patch --xml-pipe
 		ex=$?
 		case $ex in
 		0)
 			: "0 ==> cib successfully changed"
 			break
 			;;

 		205)	: "205 aka pcmk_err_old_data ==> going to retry in a bit"
 			(( $SECONDS >= $timeout )) && break
 			sleep 1
 			continue
 			;;

 		*)	: "$ex ==> cib modify failed, giving up"
 			break
 			;;
 		esac
 	done
 	cleanup
 	unset -f cleanup
 	return $ex
 }

 cib_xml=""
 cib_xml_first_line=""
 crm_feature_set=""
 admin_epoch=""
 epoch=""
 num_updates=""
 have_quorum=""
 get_cib_xml() {
 	cibadmin_invocations=$(( $cibadmin_invocations + 1 ))
 	cib_xml=$( set +x; cibadmin "$@" )
 	cib_xml_first_line=${cib_xml%%>*}

 	set -- ${cib_xml_first_line}
 	local x
 	for x ; do
 	case $x in
 	crm_feature_set=*)	x=${x#*'="'}; x=${x%'"'}; crm_feature_set=$x ;;
 	admin_epoch=*)		x=${x#*'="'}; x=${x%'"'}; admin_epoch=$x ;;
 	epoch=*)		x=${x#*'="'}; x=${x%'"'}; epoch=$x ;;
 	num_updates=*)		x=${x#*'="'}; x=${x%'"'}; num_updates=$x ;;
 	have-quorum=*)		x=${x#*'="'}; x=${x%'"'}; have_quorum=$x ;;
 	esac
 	done
 }


 # if not passed in, try to "guess" it from the cib
 # we only know the DRBD_RESOURCE.
 fence_peer_init()
 {
 	# we know which instance we are: $OCF_RESOURCE_INSTANCE.
 	# but we do not know the xml ID of the <master/> :(
 	# cibadmin -Ql --xpath \
 	# '//master[primitive[@type="drbd" and instance_attributes/nvpair[@name = "drbd_resource" and @value="r0"]]]/@id'
 	# but I'd have to pipe that through sed anyways, because @attribute
 	# xpath queries are not supported.
 	# and I'd be incompatible with older cibadmin not supporting --xpath.
 	# be cool, sed it out:
 	: ${master_id=$(set +x; echo "$cib_xml" |
 		sed -ne '/<master /,/<\/master>/ {
 			   /<master / h;
 			     /<primitive/,/<\/primitive/ {
 			       /<instance_attributes/,/<\/instance_attributes/ {
 				 /<nvpair .*\bname="drbd_resource"/ {
 				   /.*\bvalue="'"$DRBD_RESOURCE"'"/! d
 				   x
 				   s/^.*\bid="\([^"]*\)".*/\1/p
 				   q
 				 };};};}')}
 	if [[ -z $master_id ]] ; then
 		echo WARNING "drbd-fencing could not determine the master id of drbd resource $DRBD_RESOURCE"
 		return 1;
 	fi
 	return 0
 }

 # drbd_fence_peer_exit_code is per the exit code
 # convention of the DRBD "fence-peer" handler,
 # obviously.
 # 3: peer is already outdated or worse (e.g. inconsistent)
 # 4: peer has been successfully fenced
 # 5: peer not reachable, assumed to be dead
 # 6: please outdate yourself, peer is known (or likely)
 #    to have better data, or is even currently primary.
 #    (actually, currently it is "peer is active primary now", but I'd like to
 #    change that meaning slightly towards the above meaning)
 # 7: peer has been STONITHed, thus assumed to be properly fenced
 #    XXX IMO, this should rather be handled like 5, not 4.

 # NOTE:
 #    On loss of all cluster comm (cluster split-brain),
 #    without STONITH configured, you always still risk data divergence.
 #
 # There are different timeouts:
 #
 # --timeout is how long we poll the DC for a definite "unreachable" node state,
 # before we give up and say "unknown".
 # This should be longer than "dead time" or "stonith timeout",
 # the time it takes the cluster manager to declare the other node dead and
 # shoot it, just to be sure.
 #
 # --dc-timeout is how long we try to contact a DC before we give up.
 # This is necessary, because placing the constraint will fail (with some
 # internal timeout) if no DC was available when we request the constraint.
 # Which is likely if the DC crashed. Then the surviving DRBD Primary needs
 # to wait for a new DC to be elected. Usually such election takes only
 # fractions of a second, but it can take much longer (the default election
 # timeout in pacemaker is ~2 minutes!).
 #
 # --network-hickup is how long we wait for the replication link to recover,
 # if crmadmin confirms that the peer is in fact still alive.
 # It may have been just a network hickup. If so, no need to potentially trigger
 # node level fencing.
 #
 # a) Small-ish (1s) timeout, medium (10..20s) dc-timeout:
 #    Intended use case: fencing resource-only, no STONITH configured.
 #
 #    Even with STONITH properly configured, on cluster-split-brain this method
 #    risks to complete transactions to user space which can be lost due to
 #    STONITH later.
 #
 #    With dual-primary setup (cluster file system),
 #    you should use method b).
 #
 # b) timeout >= deadtime, dc-timeout > timeout
 #    Intended use case: fencing resource-and-stonith, STONITH configured.
 #
 #    Difference to a)
 #
 #       If peer is still reachable according to the cib,
 #	we first poll the cib/try to confirm with crmadmin,
 #	until either crmadmin confirms reachability, timeout has elapsed,
 #	or the peer becomes definitely unreachable.
 #
 #	This gives STONITH the chance to kill us.
 #	With "fencing resource-and-stontith;" this protects us against
 #	completing transactions to userland which might otherwise be lost.
 #
 #	We then place the constraint (if we are UpToDate), as explained below,
 #	and return reachable/unreachable according to our last cib status poll
 #	or crmadmin -S result.
 #

 #
 #    replication link loss, current Primary calls this handler:
 #	We are UpToDate, but we potentially need to wait for a DC election.
 #	Once we have contacted the DC, we poll the cib until the peer is
 #	confirmed unreachable, or crmadmin -S confirms it as reachable,
 #	or timeout expired.
 #	Then we place the constraint, and are done.
 #
 #	If it is complete communications loss, one will stonith the other.
 #	For two-node clusters with no-quorum-policy=ignore, we will have a
 #	deathmatch shoot-out, which the former DC is likely to win.
 #
 #	In dual-primary setups, if it is only replication link loss, both nodes
 #	will call this handler, but only one will succeed to place the
 #	constraint. The other will then typically need to "commit suicide".
 #	With stonith enabled, and --suicide-on-failure-if-primary,
 #	we will trigger a node level fencing, telling
 #	pacemaker to "terminate" that node,
 #	and scheduling a reboot -f just in case.
 #
 #    Primary crash, promotion of former Secondary:
 #	DC-election, if any, will have taken place already.
 #	We are UpToDate, we place the constraint, done.
 #
 #    node or cluster crash, promotion of Secondary with replication link down:
 #	We are "Only" Consistent.  Usually any "init-dead-time" or similar has
 #	expired already, and the cib node states are already authoritative
 #	without doing additional waiting.  If the peer is still reachable, we
 #	place the constraint - if the peer had better data, it should have a
 #	higher master score, and we should not have been asked to become
 #	primary.  If the peer is not reachable, we don't do anything, and DRBD
 #	will refuse to be promoted. This is necessary to avoid problems
 #	With data diversion, in case this "crash" was due to a STONITH operation,
 #	maybe the reboot did not fix our cluster communications!
 #
 #	Note that typically, if STONITH is in use, it has been done on any
 #	unreachable node _before_ we are promoted, so the cib should already
 #	know that the peer is dead - if it is.
 #

 # slightly different logic than crm_is_true
 crm_is_not_false()
 {
 	case ${1:-} in
 	no|n|false|0|off)
 		false ;;
 	*)
 		true ;;
 	esac
 }

 check_cluster_properties()
 {
 	local x properties=$(set +x; echo "$cib_xml" |
 		sed -n -e '/<crm_config/,/<\/crm_config/ !d;' \
 			-e '/<cluster_property_set/,/<\/cluster_property_set/ !d;' \
 			-e '/<nvpair / !d' \
 			-e 's/^.* name="\([^"]*\)".* value="\([^"]*\)".*$/\1=\2/p' \
 			-e 's/^.* value="\([^"]*\)".* name="\([^"]*\)".*$/\2=\1/p')

 	for x in $properties ; do
 		case $x in
 		startup[-_]fencing=*)	startup_fencing=${x#*=} ;;
 		stonith[-_]enabled=*)	stonith_enabled=${x#*=} ;;
 		esac
 	done

 	crm_is_not_false ${startup_fencing:-} && startup_fencing=true || startup_fencing=false
 	crm_is_not_false ${stonith_enabled:-} && stonith_enabled=true || stonith_enabled=false
 }


 #
 # In case this is a two-node cluster (still common with
 # DRBD clusters) it does not have real quorum.
 # If it is configured to do STONITH, and reboot,
 # and after reboot that STONITHed node cluster comm is
 # still broken, it will shoot the still online node,
 # and try to go online with stale data.
 # Exactly what this "fence" handler should prevent.
 # But setting constraints in a cluster partition with
 # "no-quorum-policy=ignore" will usually succeed.
 #
 # So we need to differentiate between node reachable or
 # not, and DRBD "Consistent" or "UpToDate".
 #
 try_place_constraint()
 {
 	local peer_state

 	rc=1

 	while :; do
 		check_peer_node_reachable
 		! $all_excluded_peers_reachable && break
 		# if it really is still reachable, maybe the replication link
 		# recovers by itself, and we can get away without taking action?
 		(( $net_hickup_time > $SECONDS )) || break
 		sleep $(( net_hickup_time - SECONDS ))
 	done

 	if $fail_if_no_quorum ; then
 		if [[ $have_quorum = 1 ]] ; then
 			# double check
 			have_quorum=$(crm_node --quorum -VVVVV)
 			[[ $have_quorum = 0 ]] && echo WARNING "Cib still had quorum, but no quorum according to crm_node --quorum"
 		fi
 		if [[ $have_quorum != 1 ]] ; then
 			echo WARNING "Found $cib_xml_first_line"
 			echo WARNING "I don't have quorum; did not place the constraint!"
 			rc=0
 			return
 		fi
 	fi

 	set_states_from_proc_drbd_or_events2
 	if : "all peer disks UpToDate?"; $status_pdsk_all_up_to_date ; then
 		echo WARNING "All peer disks are UpToDate! Did not place the constraint."
 		rc=0
 		return
 	fi

 	: == DEBUG == CTS_mode=$CTS_mode ==
 	: == DEBUG == status_disk_all_consistent=$status_disk_all_consistent ==
 	: == DEBUG == status_disk_all_up_to_date=$status_disk_all_up_to_date ==
 	: == DEBUG == all_excluded_peers_reachable=$all_excluded_peers_reachable ==
 	: == DEBUG == all_excluded_peers_fenced=$all_excluded_peers_fenced ==

 	if : "Unconfigured?" ; $status_unconfigured; then
 		# Someone called this script, without the corresponding drbd
 		# resource being configured. That's not very useful.
 		echo WARNING "could not determine my disk state: did not place the constraint!"
 		rc=0
 		# keep drbd_fence_peer_exit_code at "generic error",
 		# which will cause a "script is broken" message in case it was
 		# indeed called as handler from within drbd

 	# No, NOT fenced/Consistent:
 	# just because we have been able to shoot him
 	# does not make our data any better.
 	elif : "all peers reachable and all local disks consistent?";
 		$all_excluded_peers_reachable && $status_disk_all_consistent; then
 		#                reachable && $status_disk_all_up_to_date
 		#	is implicitly handled here as well.
 		create_or_modify_constraint &&
 		drbd_fence_peer_exit_code=4 rc=0 &&
 		echo INFO "peers are reachable, my disk is ${DRBD_disk[*]}: placed constraint '$id_prefix-$master_id'"

 	elif : "all peers fenced (clean offline) and all local disks UpToDate?";
 		$all_excluded_peers_fenced && $status_disk_all_up_to_date ; then
 		create_or_modify_constraint &&
 		drbd_fence_peer_exit_code=7 rc=0 &&
 		echo INFO "peers are (node-level) fenced, my disk is ${DRBD_disk[*]}: placed constraint '$id_prefix-$master_id'"

 	# Peer is neither "reachable" nor "fenced" (above would have matched)
 	# So we just hit some timeout.
 	# As long as we are UpToDate, place the constraint and continue.
 	# If you don't like that, use a ridiculously high timeout,
 	# or patch this script.
 	elif : "some peer UNCLEAN, but all local disks UpToDate?"; $status_disk_all_up_to_date ; then
 		# We could differentiate between unreachable,
 		# and DC-unreachable.  In the latter case, placing the
 		# constraint will fail anyways, and  drbd_fence_peer_exit_code
 		# will stay at "generic error".
 		create_or_modify_constraint &&
 		drbd_fence_peer_exit_code=5 rc=0 &&
 		echo INFO "some peer is still UNCLEAN, my disk is UpToDate: placed constraint '$id_prefix-$master_id' anyways"

 	# This block is reachable by operator intervention only
 	# (unless you are hacking this script and know what you are doing)
 	elif : "not all peers reachable, --unreachable-peer-is-outdated, all local disks consistent?";
 		! $all_excluded_peers_reachable && [[ $unreachable_peer_is = outdated ]] && $status_disk_all_consistent; then
 		# If the peer is not reachable, but we are only Consistent, we
 		# may need some way to still allow promotion.
 		# Easy way out: --force primary with drbdsetup.
 		# But that would not place the constraint, nor outdate the
 		# peer.  With this --unreachable-peer-is-outdated, we still try
 		# to set the constraint.  Next promotion attempt will find the
 		# "correct" constraint, consider the peer as successfully
 		# fenced, and continue.
 		create_or_modify_constraint &&
 		drbd_fence_peer_exit_code=5 rc=0 &&
 		echo WARNING "peer is unreachable, my disk is only Consistent: --unreachable-peer-is-outdated FORCED constraint '$id_prefix-$master_id'" &&
 		echo WARNING "This MAY RISK DATA INTEGRITY"

 	# So I'm not UpToDate, and (some) peer is not reachable.
 	# Tell the module about "not reachable", and don't do anything else.
 	else
 		echo WARNING "some peer is UNCLEAN, my disk is not UpToDate, did not place the constraint!"
 		drbd_fence_peer_exit_code=5 rc=0
 		# I'd like to return 6 here, otherwise pacemaker will retry
 		# forever to promote, even though 6 is not strictly correct.
 	fi
 	return $rc
 }

 commit_suicide()
 {
 	local reboot_timeout=20
 	local extra_msg

 	if $stonith_enabled ; then
 		# avoid double fence, tell pacemaker to kill me
 		echo WARNING "trying to have pacemaker kill me now!"
 		crm_attribute -t status -N $HOSTNAME -n terminate -v 1
 		echo WARNING "told pacemaker to kill me, but scheduling reboot -f in 300 seconds just in case"

 		# -------------------------
 		echo WARNING $'\n'"    told pacemaker to kill me,"\
 			     $'\n'"    but scheduling reboot -f in 300 seconds just in case."\
 			     $'\n'"    kill $$ # to cancel" | wall
 		# -------------------------

 		reboot_timeout=300
 		extra_msg="Pacemaker terminate pending. If that fails, I'm "

 	else
 		# -------------------------
 		echo WARNING $'\n'"    going to reboot -f in $reboot_timeout seconds"\
 			     $'\n'"    kill $$ # to cancel!" | wall
 		# -------------------------
 	fi

 	reboot_timeout=$(( reboot_timeout + SECONDS ))
 	# pacemaker apparently cannot kill me.
 	while (( $SECONDS < $reboot_timeout )); do
 		echo WARNING "${extra_msg}going to reboot -f in $(( reboot_timeout - SECONDS )) seconds! To cancel: kill $$"
 		sleep 2
 	done
 	echo WARNING "going to reboot -f now!"
 	reboot -f
 	sleep 864000
 }

 setup_node_lists()
 {
 	EXCLUDE_NODES=()
 	INCLUDE_NODES=()
 	SKIP_NODES=()
 	ATTRIBUTES=()
 	VALUES=()

 	if [[ -z $UP_TO_DATE_NODES ]] ; then
 		setup_node_lists_8 || return
 	else
 		setup_node_lists_9 || return
 	fi
 }

 setup_node_lists_8()
 {
 	INCLUDE_NODES=( $HOSTNAME )
 	EXCLUDE_NODES=( $DRBD_PEER ) # not quoted, so may be empty array.
 }

 is_up_to_date_node() { (( (UP_TO_DATE_NODES & (1<<$1)) != 0 )); }

 setup_node_lists_9()
 {
 	local i k v

 	: === UP_TO_DATE_NODES = $UP_TO_DATE_NODES ===
 	if [[ $UP_TO_DATE_NODES != 0x[0-9a-fA-F]* ]] || [[ ${UP_TO_DATE_NODES#0x} == *[!0-9a-fA-F]* ]] ; then
 		echo WARNING "Unexpected input UP_TO_DATE_NODES=$UP_TO_DATE_NODES, expected 0x... hex mask"
 		return 1
 	fi

 	: === DRBD_MY_NODE_ID = $DRBD_MY_NODE_ID ===
 	case $DRBD_MY_NODE_ID in
 	[0-9]|[1-9][0-9]) : "looks OK" ;;
 	*)
 		echo WARNING "Unexpected input, DRBD_MY_NODE_ID=$DRBD_MY_NODE_ID should be a decimal number"
 		return 1
 	esac
 	if ! is_up_to_date_node $DRBD_MY_NODE_ID ; then
 		echo WARNING "I ($DRBD_MY_NODE_ID) am not a member of the UP_TO_DATE_NODES=$UP_TO_DATE_NODES set myself."
 		return 1
 	fi

 	k=DRBD_NODE_ID_$DRBD_MY_NODE_ID ; v=${!k}
 	[[ $HOSTNAME = $v ]] || echo WARNING "My node id ($DRBD_MY_NODE_ID) does not resolve to my hostname ($HOSTNAME) but to $v"

 	for i in {0..31}; do
 		k=DRBD_NODE_ID_$i ; v=${!k:-}
 		[[ $v ]] || continue

 		if is_up_to_date_node $i; then
 			INCLUDE_NODES[i]=$v
 		else
 			EXCLUDE_NODES[i]=$v
 		fi
 	done
 	return 0
 }

 have_expected_contraint()
 {
 	# do we have the exactly matching constraint already?
 	[[ "$have_constraint" = "$new_constraint" ]] && return 0

 	new_constraint_for_compare=$(set +x; echo "$new_constraint" |
 		sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")
 	have_constraint_for_compare=$(set +x; echo "$have_constraint" |
 		sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")

 	# do we have a semantically equivalent constraint?
 	[[ "$have_constraint_for_compare" = "$new_constraint_for_compare" ]] && return 0

 	return 1
 }

 _node_already_rejected()
 {
 	local node_name=$1

 	# Not so easy. May be a second link failure and fence action,
 	# and resulting update to that constraint, but may also be
 	# the result of a fencing shoot-out race.
 	# Are we still part of the allowed crowd?

         # if we differ by more than number and content of expressions,
         # that's not the "expected" constraint.
         #[[ "$(set +x; echo "$have_constraint_for_compare" | grep -v '<expression')" != \
         #   "$(set +x; echo "$new_constraint_for_compare" | grep -v '<expression')" ]] &&
         #        return (what exactly?)
 	#
 	# allow for permutation in attribute order,
 	# but require "$my_attribute ne $my_value" to be present.
 	# ! echo "$have_constraint" |
 	# 	grep -Ee '<expression .*\<operation="ne"' |
 	# 	grep -Fe " attribute=\"$my_attribute\"" |
 	# 	grep -qFe " value=\"$my_value\"" \
 	#

 	# Maybe we better just ask crm_mon instead:
 	( set +x; echo "$cib_xml" | CIB_file=/proc/self/fd/0 crm_mon -1nL "$id_prefix-$master_id" |
 		grep -q "prevents $master_id from running .*\<on $node_name$" )
 }

 # you should call have_expected_contraint() first.
 existing_constraint_rejects_me()
 {
 	_node_already_rejected $HOSTNAME
 }

 setup_new_constraint()
 {
 	new_constraint="<rsc_location rsc=\"$master_id\" id=\"$id_prefix-$master_id\">"$'\n'
 	# double negation: do not run but with my data.
 	new_constraint+=" <rule role=\"$role\" score=\"-INFINITY\" id=\"$id_prefix-rule-$master_id\">"$'\n'

 	local i n a v
 	for i in "${!INCLUDE_NODES[@]}"; do
 		n=${INCLUDE_NODES[i]}
 		a=${ATTRIBUTES[i]:-}
 		if [[ -z "$a" ]] ; then
 			a=$fencing_attribute
 			if [[ $a = "#uname" ]]; then
 				v=$n
 			elif ! v=$(crm_attribute -Q -t nodes -N $n -n $a 2>/dev/null); then
 				# FALLBACK.
 				a="#uname"
 				v=$n
 			fi
 			ATTRIBUTES[i]=$a
 			VALUES[i]=$v
 		else
 			v=${VALUES[i]}
 		fi
 		[[ $i = $DRBD_MY_NODE_ID ]] && my_attribute=$a my_value=$v
 		# double negation: do not run but with my data.
 		new_constraint+="  <expression attribute=\"$a\" operation=\"ne\" value=\"$v\" id=\"$id_prefix-expr-$i-$master_id\"/>"$'\n'
 	done
 	new_constraint+=$' </rule>\n</rsc_location>\n'
 }

 # drbd_peer_fencing fence|unfence
 drbd_peer_fencing()
 {
 	# We are going to increase the cib timeout with every timeout,
 	# see get_cib_xml_from_dc().
 	# For the actual invocation, we use int(cibtimeout/10).
 	# scaled by 5 / 4 with each iteration,
 	# this results in a timeout sequence of 1 2 2 3 4 5 6 7 9 ... seconds
 	local cibtimeout=18

 	local rc

 	local have_constraint
 	local have_constraint_for_compare
 	local had_constraint_on_entry
 	local new_constraint
 	local new_constraint_for_compare
 	local my_attribute my_value

 	# if I cannot query the local cib, give up
 	get_cib_xml -Ql || return

 	# input to fence_peer_init:
 	# $DRBD_RESOURCE is set by command line or from environment.
 	# $id_prefix is set by command line or default.
 	# $master_id is set by command line or will be parsed from the cib.
 	fence_peer_init || return

 	if [[ $1 = fence ]] || [[ -n $UP_TO_DATE_NODES ]] || $unfence_only_if_owner_match ; then
 		setup_node_lists || return 1
 		setup_new_constraint
 	fi
 	have_constraint=$(set +x; echo "$cib_xml" | grep_rsc_location "$id_prefix-$master_id")
 	[[ -z $have_constraint ]] && had_constraint_on_entry=false || had_constraint_on_entry=true

 	case $1 in
 	fence)

 		local startup_fencing stonith_enabled
 		check_cluster_properties

 		if ! $had_constraint_on_entry ; then

 			# try to place it.
 			try_place_constraint && return

 			# maybe callback and operator raced for the same constraint?
 			# before we potentially trigger node level fencing
 			# or keep IO frozen, double check.

 			get_cib_xml_from_dc
 			have_constraint=$(set +x; echo "$cib_xml" | grep_rsc_location "$id_prefix-$master_id")
 		fi

 		if [[ -n "$have_constraint" ]] ; then
 			if have_expected_contraint ; then
 				echo INFO "suitable constraint already placed: '$id_prefix-$master_id'"
 				drbd_fence_peer_exit_code=4
 				rc=0
 				return
 			fi

 			if existing_constraint_rejects_me; then
 				echo WARNING "constraint already exists, and rejects me: $have_constraint"
 			else
 				try_place_constraint && return
 				echo WARNING "constraint already exists, could not modify it: $have_constraint"
 				# TODO
 				# what about the exit status?
 				# am I allowed to continue, or not?
 			fi

 			# anything != 0 will do;
 			# 21 happend to be "The object already exists" with my cibadmin
 			rc=21

 			# maybe: drbd_fence_peer_exit_code=6
 			# as this is not the constraint we'd like to set,
 			# it is likely the inverse, so we probably can assume
 			# that the peer is active primary, or at least has
 			# better data than us, and wants us outdated.
 		fi

 		if [[ $rc != 0 ]]; then
 			# at least we tried.
 			# maybe it was already in place?
 			echo WARNING "DATA INTEGRITY at RISK: could not place the fencing constraint!"

 			# actually, at risk only if we did not freeze IO locally, or allow to resume.
 			# which depends on the policies you set.
 		fi

 		# XXX policy decision:
 		if $suicide_on_failure_if_primary && [[ $drbd_fence_peer_exit_code != [3457] ]]; then
 			set_states_from_proc_drbd_or_events2
 			$status_primary && commit_suicide
 		fi

 		return $rc
 		;;
 	unfence)
 		if [[ -n $have_constraint ]]; then
 			set_states_from_proc_drbd_or_events2
 			# if $unfence_only_if_owner_match && ! have_expected_contraint ; then
 			if $unfence_only_if_owner_match && existing_constraint_rejects_me ; then
 				echo WARNING "Constraint owner does not match, leaving constraint in place."
 			else
 				if $status_disk_all_up_to_date && $status_pdsk_all_up_to_date; then
 					# try to remove it based on that xml-id
 					remove_constraint && echo INFO "Removed constraint '$id_prefix-$master_id'"
 				else
 					if have_expected_contraint; then
 						$quiet || echo "expected constraint still in place, nothing to do"
 					else
 						# only one of several possible peers was sync'ed up.
 						# allow that one, but not all, yet.
 						create_or_modify_constraint || echo WARNING "could not modify, leaving constraint in place."
 					fi
 				fi
 			fi
 		else
 			if [[ ${#EXCLUDE_NODES[@]} != 0 ]] ; then
 				echo WARNING "No constraint in place, called for unfence, but (${EXCLUDE_NODES[*]}) still supposed to be excluded. Weird."
 			else
 				$quiet || echo "No constraint in place, nothing to do."
 			fi
 			return 0
 		fi
 	esac
 }

 double_check_after_fencing()
 {
 	set_states_from_proc_drbd_or_events2
 	if $status_pdsk_all_up_to_date ; then
 		echo WARNING "All peer disks are UpToDate (again), trying to remove the constraint again."
 		remove_constraint && drbd_fence_peer_exit_code=1 rc=0
 		return
 	fi
 }

 guess_if_pacemaker_will_fence()
 {
 	# try to guess whether it is useful to wait and poll again,
 	# (node fencing in progress...),
 	# or if pacemaker thinks the node is "clean" dead.
 	local x

 	# "return values:"
 	crmd='' in_ccm='' expected='' join='' will_fence=false

 	# Older pacemaker has an "ha" attribute, too.
 	# For stonith-enabled=false, the "crmd" attribute may stay "online",
 	# but once ha="dead", we can stop waiting for changes.
 	ha_dead=false

 	node_state=${node_state%>}
 	node_state=${node_state%/}
 	for x in ${node_state} ; do
 		case $x in
 		in_ccm=\"*\")	x=${x#*=\"}; x=${x%\"}; in_ccm=$x ;;
 		crmd=\"*\")	x=${x#*=\"}; x=${x%\"}; crmd=$x ;;
 		expected=\"*\")	x=${x#*=\"}; x=${x%\"}; expected=$x ;;
 		join=\"*\")	x=${x#*=\"}; x=${x%\"}; join=$x ;;
 		ha=\"dead\")	ha_dead=true ;;
 		esac
 	done

 	# if it is not enabled, no point in waiting for it.
 	if ! $stonith_enabled ; then
 		# "normalize" the rest of the logic
 		# where this is called.
 		# for stonith-enabled=false, and ha="dead",
 		# reset crmd="offline".
 		# Then we stop polling the cib for changes.

 		$ha_dead && crmd="offline"
 		return
 	fi

 	if [[ -z $node_state ]] ; then
 		# if we don't know nothing about the peer,
 		# and startup_fencing is explicitly disabled,
 		# no fencing will take place.
 		$startup_fencing || return
 	fi

 	# for further inspiration, see pacemaker:lib/pengine/unpack.c, determine_online_status_fencing()
 	[[ -z $in_ccm ]] && will_fence=true
 	[[ $crmd = "banned" ]] && will_fence=true
 	if [[ ${expected-down} = "down" && $in_ccm = "false"  && $crmd != "online" ]]; then
 		: "pacemaker considers this as clean down"
 	elif [[ $in_ccm = false ]] || [[ $crmd != "online" ]]; then
 		will_fence=true
 	fi
 }

 # return values in
 #	$all_excluded_peers_reachable
 #	$all_excluded_peers_fenced
 check_peer_node_reachable()
 {
 	local full_timeout
 	local nr_other_nodes
 	local other_node_uname_attrs

 	# we have a cibadmin -Ql in cib_xml already
 	# filter out <node uname, but ignore type="ping" nodes,
 	# they don't run resources
 	other_node_uname_attrs=$(set +x; echo "$cib_xml" |
 		sed -e '/<node /!d; / type="ping"/d;s/^.* \(uname="[^"]*"\).*>$/\1/' |
 		grep -v -F uname=\"$HOSTNAME\")
 	set -- $other_node_uname_attrs
 	nr_other_nodes=$#

 	if [[ -z $UP_TO_DATE_NODES ]]; then
 		if [[ -z $DRBD_PEER ]] && [[ $nr_other_nodes = 1 ]]; then
 			# very unlikely: old DRBD, no DRBD_PEER passed in,
 			# but in fact only one other cluster node.
 			# Use that one as DRBD_PEER.
 			DRBD_PEER=${other_node_uname_attrs#uname=\"}
 			DRBD_PEER=${DRBD_PEER%\"}
 		fi
 		# This time, quoted.
 		# Yes, it may be empty, resulting in [0]="".
 		EXCLUDE_NODES=( "$DRBD_PEER" )
 	fi

 	get_cib_xml_from_dc || {
 		all_excluded_peers_reachable=false
 		all_excluded_peers_fenced=false
 		return
 	}

 	all_excluded_peers_reachable=true
 	all_excluded_peers_fenced=true

 	if [[ ${#EXCLUDE_NODES[@]} != 0 ]] ; then
 		for DRBD_PEER in "${EXCLUDE_NODES[@]}"; do
 			# If it is already rejected,
 			# we do not really care if it is currently reachable,
 			# or currently UNCLEAN or what not.
 			# What's done, is done.
 			_node_already_rejected $DRBD_PEER && continue

 			_check_peer_node_reachable $DRBD_PEER
 			[[ $peer_state != reachable ]] && all_excluded_peers_reachable=false
 			[[ $peer_state != fenced ]] && all_excluded_peers_fenced=false
 		done
 	fi
 }

 get_cib_xml_from_dc()
 {
 	while :; do
 		local t=$SECONDS
 		#
 		# Update our view of the cib, ask the DC this time.
 		# Timeout, in case no DC is available.
 		# Caution, some cibadmin (pacemaker 0.6 and earlier)
 		# apparently use -t use milliseconds, so will timeout
 		# many times until a suitably long timeout is reached
 		# by increasing below.
 		#
 		# Why not use the default timeout?
 		# Because that would unecessarily wait for 30 seconds
 		# or longer, even if the DC is re-elected right now,
 		# and available within the next second.
 		#
 		get_cib_xml -Q -t $(( cibtimeout/10 )) && return 0

 		# bash magic $SECONDS is seconds since shell invocation.
 		(( $SECONDS > $dc_timeout )) && return 1

 		# avoid busy loop
 		[[ $t = $SECONDS ]] && sleep 1

 		# try again, longer timeout.
 		let "cibtimeout = cibtimeout * 5 / 4"
 	done
 }

 # return value in $peer_state
 # DC-unreachable
 #	We have not been able to contact the DC.
 # fenced
 #	According to the node_state recorded in the cib,
 #	the peer is offline and expected down
 #	(which means successfully fenced, if stonith is enabled)
 # reachable
 #	cib says it's online, and crmadmin -S says peer state is "ok"
 # unreachable
 #	cib says it's offline (but does not yet say "expected" down)
 #	and we reached the timeout
 # unknown
 #	cib does not say it was offline (or we don't know who the peer is)
 #	and we reached the timeout
 #
 _check_peer_node_reachable()
 {
 	DRBD_PEER=$1
 	while :; do
 		local state_lines='' node_state='' crmd='' in_ccm=''
 		local expected='' join='' will_fence='' ha_dead=''

 		state_lines=$( set +x; echo "$cib_xml" | grep '<node_state ' |
 			grep -F -e "$other_node_uname_attrs" )

 		if $CTS_mode; then
 			# CTS requires startup-fencing=false.
 			# For PartialStart, NearQuorumPoint and similar tests,
 			# we would likely stay Consistent, and refuse to Promote.
 			# And CTS would be very unhappy.
 			# Pretend that the peer was reachable if we are missing a node_state entry for it.
 			if [[ $DRBD_PEER ]] && ! echo "$state_lines" | grep -q -F uname=\"$DRBD_PEER\" ; then
 				peer_state="reachable"
 				echo WARNING "CTS-mode: pretending that unseen node $DRBD_PEER was reachable"
 				return
 			fi
 		fi

 		if [[ -z $DRBD_PEER ]]; then
 			# Multi node cluster, but unknown DRBD Peer.
 			# This should not be a problem, unless you have
 			# no_quorum_policy=ignore in an N > 2 cluster.
 			# (yes, I've seen such beasts in the wild!)
 			# As we don't know the peer,
 			# we could only safely return here if *all*
 			# potential peers are confirmed down.
 			# Don't try to be smart, just wait for the full
 			# timeout, which should allow STONITH to
 			# complete.
 			full_timeout=$(( $timeout - $SECONDS ))
 			if (( $full_timeout > 0 )) ; then
 				echo WARNING "don't know who my peer is; sleep $full_timeout seconds just in case"
 				sleep $full_timeout
 			fi

 			# In the unlikely case that we don't know our DRBD peer,
 			#	there is no point in polling the cib again,
 			#	that won't teach us who our DRBD peer is.
 			#
 			#	We waited $full_timeout seconds already,
 			#	to allow for node level fencing to shoot us.
 			#
 			#	So if we are still alive, then obviously no-one has shot us.
 			#

 			peer_state="unknown"
 			return
 		fi

 		#
 		# we know the peer or/and are a two node cluster
 		#

 		node_state=$(set +x; echo "$state_lines" | grep -F uname=\"$DRBD_PEER\")

 		# populates in_ccm, crmd, exxpected, join, will_fence=[false|true]
 		guess_if_pacemaker_will_fence

 		if ! $will_fence && [[ $crmd != "online" ]] ; then

 			# "legacy" cman + pacemaker clusters older than 1.1.10
 			# may "forget" about startup fencing.
 			# We can detect this because the "expected" attribute is missing.
 			# Does not make much difference for our logic, though.
 			[[ $expected/$in_ccm = "down/false" ]] && peer_state="fenced" || peer_state="unreachable"

 			return
 		fi

 		# So the cib does still indicate the peer was reachable.
 		#
 		# try crmadmin; if we can sucessfully query the state of the remote crmd,
 		# it is obviously reachable.
 		#
 		# Do this only after we have been able to reach a DC above.
 		# Note: crmadmin timeout is in milli-seconds, and defaults to 30000 (30 seconds).
 		# Our variable $cibtimeout should be in deci-seconds (see above)
 		# (unless you use a very old version of pacemaker, so don't do that).
 		# Convert deci-seconds to milli-seconds, and double it.
 		if [[ $crmd = "online" ]] ; then
 			local out
 			if out=$( crmadmin -t $(( cibtimeout * 200 )) -S $DRBD_PEER ) \
 			&& [[ $out = *"(ok)" ]]; then
 				peer_state="reachable"
 				return
 			fi
 		fi

 		# We know our DRBD peer.
 		# We are still not sure about its status, though.
 		#
 		# It is not (yet) "expected down" per the cib, but it is not
 		# reliably reachable via crmadmin -S either.
 		#
 		# If we already polled for longer than timeout, give up.
 		#
 		# For a resource-and-stonith setup, or dual-primaries (which
 		# you should only use with resource-and-stonith, anyways),
 		# the recommended timeout is larger than the deadtime or
 		# stonith timeout, and according to beekhof maybe should be
 		# tuned up to the election-timeout (which, btw, defaults to 2
 		# minutes!).
 		#
 		if (( $SECONDS >= $timeout )) ; then
 			[[ $crmd = offline ]] && peer_state="unreachable" || peer_state="unknown"
 			return
 		fi

 		# wait a bit before we poll the DC again
 		sleep 2
 		get_cib_xml_from_dc || {
 			# unreachable: cannot even reach the DC
 			peer_state="DC-unreachable"
 			return
 		}
 	done
 	# NOT REACHED
 }

 source_drbd_shellfuncs()
 {
         local dir=.
         [[ $0 = */* ]] && dir=${0%/*}
         for dir in $dir /usr/lib/ocf/resource.d/linbit ; do
 		test -r "$dir/drbd.shellfuncs.sh" || continue
                 source "$dir/drbd.shellfuncs.sh" && return
         done
         echo WARNING "unable to source drbd.shellfuncs.sh"
 }
 source_drbd_shellfuncs

 set_states_from_proc_drbd_or_events2()
 {
 	if test -n $UP_TO_DATE_NODES ; then
 		_drbd_set_status_variables_from_events2
 	else
 		# fallback, in case someone tries to use this
 		# with older DRBD
 		set_states_from_proc_drbd
 	fi
 }
 set_states_from_proc_drbd()
 {
 	local IFS line lines i disk pdsk
 	# DRBD_MINOR exported by drbdadm since 8.3.3
 	[[ $DRBD_MINOR ]] || DRBD_MINOR=$(drbdadm ${DRBD_CONF:+ -c "$DRBD_CONF"} sh-minor $DRBD_RESOURCE) || return

 	# if we have more than one minor, do a word split, ...
 	set -- $DRBD_MINOR
 	# ... and convert into regex:
 	IFS="|$IFS"; DRBD_MINOR="($*)"; IFS=${IFS#?}

 	# in a fence-peer handler, at least on certain older DRBD versions,
 	# we must not recurse into netlink, this may be a synchronous callback
 	# triggered by "drbdsetup primary", while holding the genl_lock.
 	# grep /proc/drbd instead

 	local DRBD_peer=()
 	local DRBD_role=()
 	DRBD_disk=()	# used in informational log lines later
 	local DRBD_pdsk=()
 	status_disk_all_up_to_date=true
 	status_disk_all_consistent=true
 	status_pdsk_all_up_to_date=true

 	IFS=$'\n'
 	lines=($(sed -nre "/^ *$DRBD_MINOR: cs:/ { s/:/ /g; p; }" /proc/drbd))
 	IFS=$' \t\n'

 	i=0
 	for line in "${lines[@]}"; do
 		set -- $line
 		DRBD_peer[i]=${5#*/}
 		DRBD_role[i]=${5%/*}
 		pdsk=${7#*/}
 		disk=${7%/*}
 		DRBD_disk[i]=${disk:-Unconfigured}
 		DRBD_pdsk[i]=${pdsk:-DUnknown}
 		case $disk in
 		UpToDate) ;;
 		Consistent)
 			status_disk_all_up_to_date=false ;;
 		*)
 			status_disk_all_up_to_date=false
 			status_disk_all_consistent=false ;;
 		esac
 		[[ $pdsk != UpToDate ]] && status_pdsk_all_up_to_date=false
 		let i++
 	done
 	if (( i == 0 )) ; then
 		status_pdsk_all_up_to_date=false
 		status_disk_all_up_to_date=false
 		status_disk_all_consistent=false
 	fi
 	: == DEBUG == DRBD_role=${DRBD_role[*]} ===
 	: == DEBUG == DRBD_peer=${DRBD_peer[*]} ===
 	: == DEBUG == DRBD_pdsk=${DRBD_pdsk[*]} ===

 	status_primary=false
 	status_unconfigured=false
 	case ${DRBD_role[*]} in
 	*Primary*) status_primary=true ;;
 	*Secondary*) : "at least it is configured" ;;
 	*)	status_unconfigured=true ;;
 	esac
 }

 ############################################################

 # try to get possible output on stdout/err to syslog
 PROG=${0##*/}
 redirect_to_logger()
 {
 	local lf=${1:-local5}
 	case $lf in
 	# do we want to exclude some?
 	auth|authpriv|cron|daemon|ftp|kern|lpr|mail|news|syslog|user|uucp|local[0-7])
 		: OK ;;
 	*)
 		echo >&2 "invalid logfacility: $lf"
 		return
 		;;
 	esac
 	# Funky redirection to avoid logger feeding its own output to itself accidentally.
 	# Funky double exec to avoid an intermediate sub-shell.
 	# Sometimes, the sub-shell lingers around, keeps file descriptors open,
 	# and logger then won't notice the main script has finished,
 	# forever waiting for further input.
 	# The second exec replaces the subshell, and logger will notice directly
 	# when its stdin is closed once the main script exits.
 	# This avoids the spurious logger processes.
 	exec > >( exec 1>&- 2>&- logger -t "$PROG[$$]" -p $lf.info ) 2>&1
 }
 if [[ $- != *x* ]]; then
 	# you may override with --logfacility below
 	redirect_to_logger local5
 fi

 # clean environment just in case.
 unset fencing_attribute id_prefix timeout dc_timeout unreachable_peer_is
 unset flock_timeout flock_required lock_dir lock_file
 quiet=false
 unfence_only_if_owner_match=false
 CTS_mode=false
 suicide_on_failure_if_primary=false
 fail_if_no_quorum=true

 # poor mans command line argument parsing,
 # allow for command line overrides
 set -- "$@" ${OCF_RESKEY_unfence_extra_args:-}
 while [[ $# != 0 ]]; do
 	case $1 in
 	--logfacility=*)
 		redirect_to_logger ${1#*=}
 		;;
 	--logfacility)
 		redirect_to_logger $2
 		shift
 		;;
 	--resource=*)
 		DRBD_RESOURCE=${1#*=}
 		;;
 	-r|--resource)
 		DRBD_RESOURCE=$2
 		shift
 		;;
 	--master-id=*)
 		master_id=${1#*=}
 		;;
 	-i|--master-id)
 		master_id=$2
 		shift
 		;;
 	--role=*)
 		role=${1#*=}
 		;;
 	-l|--role)
 		role=${2}
 		shift
 		;;
 	--fencing-attribute=*)
 		fencing_attribute=${1#*=}
 		;;
 	-a|--fencing-attribute)
 		fencing_attribute=$2
 		shift
 		;;
 	--id-prefix=*)
 		id_prefix=${1#*=}
 		;;
 	-p|--id-prefix)
 		id_prefix=$2
 		shift
 		;;
 	--timeout=*)
 		timeout=${1#*=}
 		;;
 	-t|--timeout)
 		timeout=$2
 		shift
 		;;
 	--dc-timeout=*)
 		dc_timeout=${1#*=}
 		;;
 	-d|--dc-timeout)
 		dc_timeout=$2
 		shift
 		;;
 	--quiet)
 		quiet=true
 		;;
 	--unfence-only-if-owner-match)
 		unfence_only_if_owner_match=true
 		;;
 	--flock-required)
 		flock_required=true
 		;;
 	--flock-timeout=*)
 		flock_timeout=${1#*=}
 		;;
 	--flock-timeout)
 		flock_timeout=$2
 		shift
 		;;
 	--lock-dir=*)
 		lock_dir=${1#*=}
 		;;
 	--lock-dir)
 		lock_dir=$2
 		shift
 		;;
 	--lock-file=*)
 		lock_file=${1#*=}
 		;;
 	--lock-file)
 		lock_file=$2
 		shift
 		;;
 	--net-hickup=*|--network-hickup=*)
 		net_hickup_time=${1#*=}
 		;;
 	--net-hickup|--network-hickup)
 		net_hickup_time=$2
 		shift
 		;;
 	--CTS-mode)
 		CTS_mode=true
 		;;
 	--unreachable-peer-is-outdated)
 		# This is NOT to be scripted.
 		# Or people will put this into the handler definition in
 		# drbd.conf, and all this nice work was useless.
 		test -t 0 &&
 		unreachable_peer_is=outdated
 		;;
 	--suicide-on-failure-if-primary)
 		suicide_on_failure_if_primary=true
 		;;
 	-*)
 		echo >&2 "ignoring unknown option $1"
 		;;
 	*)
 		echo >&2 "ignoring unexpected argument $1"
 		;;
 	esac
 	shift
 done

 #
 # Sanitize lock_file and lock_dir
 #
 if [[ ${lock_dir:=/var/lock/drbd} != /* ]] ; then
 	echo WARNING "lock_dir needs to be an absolute path, not [$lock_dir]; using default."
 	lock_dir=/var/lock/drbd
 fi
 case ${lock_file:-""} in
 "")	lock_file=$lock_dir/fence.${DRBD_RESOURCE//\//_} ;;
 NONE)	: ;;
 /*)	: ;;
 *)	lock_file=$lock_dir/$lock_file ;;
 esac
 if [[ $lock_file != NONE && $lock_file != $lock_dir/* ]]; then
 	lock_dir=${lock_file%/*}; : ${lock_dir:=/}
 	: == DEBUG == "override: lock_dir=$lock_dir to match lock_file=$lock_file"
 fi

 # DRBD_RESOURCE: from environment
 # master_id: parsed from cib

 : "== unreachable_peer_is == ${unreachable_peer_is:=unknown}"
 # apply defaults:
 : "== fencing_attribute   == ${fencing_attribute:="#uname"}"
 : "== id_prefix           == ${id_prefix:="drbd-fence-by-handler"}"
 : "== role                == ${role:="Master"}"

 # defaults suitable for most cases
 : "== net_hickup_time     == ${net_hickup_time:=0}"
 : "== timeout             == ${timeout:=90}"
 : "== dc_timeout          == ${dc_timeout:=20}"
 : "== flock_timeout       == ${flock_timeout:=120}"
 : "== flock_required      == ${flock_required:=false}"
 : "== lock_file           == ${lock_file}"
 : "== lock_dir            == ${lock_dir}"


 # check envars normally passed in by drbdadm
 # TODO DRBD_CONF is also passed in.  we may need to use it in the
 # xpath query, in case someone is crazy enough to use different
 # conf files with the _same_ resource name.
 # for now: do not do that, or hardcode the cib id of the master
 # in the handler section of your drbd conf file.
 for var in DRBD_RESOURCE; do
 	if [ -z "${!var}" ]; then
 		echo "Environment variable \$$var not found (this is normally passed in by drbdadm)." >&2
 		exit 1
 	fi
 done

 # Fixup id-prefix to include the resource name
 # There may be multiple drbd instances part of the same M/S Group, pointing to
 # the same master-id. Still they need to all have their own constraint, to be
 # able to unfence independently when they finish their resync independently.
 # Be nice to people who already explicitly configure an id prefix containing
 # the resource name.
 if [[ $id_prefix != *"-$DRBD_RESOURCE" ]] ; then
 	id_prefix="$id_prefix-$DRBD_RESOURCE"
 	: "== id_prefix           == ${id_prefix}"
 fi

 # make sure it contains what we expect
 HOSTNAME=$(uname -n)
 start_time_utc=$(date --utc +%s_%F_%T)

 $quiet || {
 	for k in ${!DRBD_*} UP_TO_DATE_NODES; do printf "%s=%q " "$k" "${!k}"; done
 	printf '%q' "$0"
 	[[ $# != 0 ]] && printf ' %q' "$@"
 	printf '\n'
 }

 # to be set by drbd_peer_fencing()
 drbd_fence_peer_exit_code=1

 got_flock=false
 if [[ $lock_file != NONE ]] ; then
 	test -d "$lock_dir"			||
 		mkdir -p -m 0700 "$lock_dir"	||
 		echo WARNING "mkdir -p $lock_dir failed"

 	if exec 9>"$lock_file" && flock --exclusive --timeout $flock_timeout 9
 	then
 		got_flock=true
 	else
 		echo WARNING "Could not get flock on $lock_file"
 		$flock_required && exit 1

 		# If I cannot get the lock file, I can at least still try to place the constraint
 	fi
 	: == DEBUG == $SECONDS seconds, got_flock=$got_flock ==
 fi

 case $PROG in
     crm-fence-peer.*)
 	ACTION=fence
 	if drbd_peer_fencing fence; then
 		: == DEBUG == $cibadmin_invocations cibadmin calls ==
 		: == DEBUG == $SECONDS seconds ==
 		[[ $drbd_fence_peer_exit_code = [347] ]] && double_check_after_fencing
 		exit $drbd_fence_peer_exit_code
 	fi
 	;;
     crm-unfence-peer.*)
 	ACTION=unfence
 	if drbd_peer_fencing unfence; then
 		: == DEBUG == $cibadmin_invocations cibadmin calls ==
 		: == DEBUG == $SECONDS seconds ==
 		exit 0
 	fi
 esac 9>&- # Don't want to "leak" the lock fd to child processes.

 # 1: unexpected error
 exit 1