| #!/bin/bash |
| # |
| |
| export LANG=C LC_ALL=C TZ=":/etc/localtime" |
| |
| sed_rsc_location_suitable_for_string_compare() |
| { |
| # expected input: exactly one tag per line: "^[[:space:]]*<.*/?>$" |
| sed -ne ' |
| # within the rsc_location constraint with that id, |
| /<rsc_location .*\bid="'"$1"'"/, /<\/rsc_location>/ { |
| /<\/rsc_location>/q # done, if closing tag is found |
| s/^[[:space:]]*// # trim spaces |
| s/ *\bid="[^"]*"// # remove id tag |
| # print each attribute on its own line, by |
| : attr |
| h # remember the current (tail of the) line |
| # remove all but the first attribute, and print, |
| s/^\([^[:space:]]*[[:space:]][^= ]*="[^"]*"\).*$/\1/p |
| g # then restore the remembered line, |
| # and remove the first attribute. |
| s/^\([^[:space:]]*\)[[:space:]][^= ]*="[^"]*"\(.*\)$/\1\2/ |
| # then repeat, until no more attributes are left |
| t attr |
| }' | sort |
| } |
| |
| cibadmin_invocations=0 |
| set_constraint() |
| { |
| cibadmin_invocations=$(( $cibadmin_invocations + 1 )) |
| cibadmin -C -o constraints -X "$new_constraint" |
| } |
| |
| remove_constraint() |
| { |
| cibadmin_invocations=$(( $cibadmin_invocations + 1 )) |
| cibadmin -D -X "<rsc_location rsc=\"$master_id\" id=\"$id_prefix-$master_id\"/>" |
| } |
| |
| cib_xml="" |
| get_cib_xml() { |
| cibadmin_invocations=$(( $cibadmin_invocations + 1 )) |
| cib_xml=$( set +x; cibadmin "$@" ) |
| } |
| |
| |
| # if not passed in, try to "guess" it from the cib |
| # we only know the DRBD_RESOURCE. |
| fence_peer_init() |
| { |
| # we know which instance we are: $OCF_RESOURCE_INSTANCE. |
| # but we do not know the xml ID of the <master/> :( |
| # cibadmin -Ql --xpath \ |
| # '//master[primitive[@type="drbd" and instance_attributes/nvpair[@name = "drbd_resource" and @value="r0"]]]/@id' |
| # but I'd have to pipe that through sed anyways, because @attribute |
| # xpath queries are not supported. |
| # and I'd be incompatible with older cibadmin not supporting --xpath. |
| # be cool, sed it out: |
| : ${master_id=$(set +x; echo "$cib_xml" | |
| sed -ne '/<master /,/<\/master>/ { |
| /<master / h; |
| /<primitive/,/<\/primitive/ { |
| /<instance_attributes/,/<\/instance_attributes/ { |
| /<nvpair .*\bname="drbd_resource"/ { |
| /.*\bvalue="'"$DRBD_RESOURCE"'"/! d |
| x |
| s/^.*\bid="\([^"]*\)".*/\1/p |
| q |
| };};};}')} |
| if [[ -z $master_id ]] ; then |
| echo WARNING "drbd-fencing could not determine the master id of drbd resource $DRBD_RESOURCE" |
| return 1; |
| fi |
| have_constraint=$(set +x; echo "$cib_xml" | |
| sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id") |
| return 0 |
| } |
| |
| # drbd_fence_peer_exit_code is per the exit code |
| # convention of the DRBD "fence-peer" handler, |
| # obviously. |
| # 3: peer is already outdated or worse (e.g. inconsistent) |
| # 4: peer has been successfully fenced |
| # 5: peer not reachable, assumed to be dead |
| # 6: please outdate yourself, peer is known (or likely) |
| # to have better data, or is even currently primary. |
| # (actually, currently it is "peer is active primary now", but I'd like to |
| # change that meaning slightly towards the above meaning) |
| # 7: peer has been STONITHed, thus assumed to be properly fenced |
| # XXX IMO, this should rather be handled like 5, not 4. |
| |
| # NOTE: |
| # On loss of all cluster comm (cluster split-brain), |
| # without STONITH configured, you always still risk data divergence. |
| # |
| # There are different timeouts: |
| # |
| # --timeout is how long we poll the DC for a definite "unreachable" node state, |
| # before we give up and say "unknown". |
| # This should be longer than "dead time" or "stonith timeout", |
| # the time it takes the cluster manager to declare the other node dead and |
| # shoot it, just to be sure. |
| # |
| # --dc-timeout is how long we try to contact a DC before we give up. |
| # This is necessary, because placing the constraint will fail (with some |
| # internal timeout) if no DC was available when we request the constraint. |
| # Which is likely if the DC crashed. Then the surviving DRBD Primary needs |
| # to wait for a new DC to be elected. Usually such election takes only |
| # fractions of a second, but it can take much longer (the default election |
| # timeout in pacemaker is ~2 minutes!). |
| # |
| # --network-hickup is how long we wait for the replication link to recover, |
| # if crmadmin confirms that the peer is in fact still alive. |
| # It may have been just a network hickup. If so, no need to potentially trigger |
| # node level fencing. |
| # |
| # a) Small-ish (1s) timeout, medium (10..20s) dc-timeout: |
| # Intended use case: fencing resource-only, no STONITH configured. |
| # |
| # Even with STONITH properly configured, on cluster-split-brain this method |
| # risks to complete transactions to user space which can be lost due to |
| # STONITH later. |
| # |
| # With dual-primary setup (cluster file system), |
| # you should use method b). |
| # |
| # b) timeout >= deadtime, dc-timeout > timeout |
| # Intended use case: fencing resource-and-stonith, STONITH configured. |
| # |
| # Difference to a) |
| # |
| # If peer is still reachable according to the cib, |
| # we first poll the cib/try to confirm with crmadmin, |
| # until either crmadmin confirms reachability, timeout has elapsed, |
| # or the peer becomes definitely unreachable. |
| # |
| # This gives STONITH the chance to kill us. |
| # With "fencing resource-and-stontith;" this protects us against |
| # completing transactions to userland which might otherwise be lost. |
| # |
| # We then place the constraint (if we are UpToDate), as explained below, |
| # and return reachable/unreachable according to our last cib status poll |
| # or crmadmin -S result. |
| # |
| |
| # |
| # replication link loss, current Primary calls this handler: |
| # We are UpToDate, but we potentially need to wait for a DC election. |
| # Once we have contacted the DC, we poll the cib until the peer is |
| # confirmed unreachable, or crmadmin -S confirms it as reachable, |
| # or timeout expired. |
| # Then we place the constraint, and are done. |
| # |
| # If it is complete communications loss, one will stonith the other. |
| # For two-node clusters with no-quorum-policy=ignore, we will have a |
| # deathmatch shoot-out, which the former DC is likely to win. |
| # |
| # In dual-primary setups, if it is only replication link loss, both nodes |
| # will call this handler, but only one will succeed to place the |
| # constraint. The other will then typically need to "commit suicide". |
| # With stonith enabled, and --suicide-on-failure-if-primary, |
| # we will trigger a node level fencing, telling |
| # pacemaker to "terminate" that node, |
| # and scheduling a reboot -f just in case. |
| # |
| # Primary crash, promotion of former Secondary: |
| # DC-election, if any, will have taken place already. |
| # We are UpToDate, we place the constraint, done. |
| # |
| # node or cluster crash, promotion of Secondary with replication link down: |
| # We are "Only" Consistent. Usually any "init-dead-time" or similar has |
| # expired already, and the cib node states are already authoritative |
| # without doing additional waiting. If the peer is still reachable, we |
| # place the constraint - if the peer had better data, it should have a |
| # higher master score, and we should not have been asked to become |
| # primary. If the peer is not reachable, we don't do anything, and DRBD |
| # will refuse to be promoted. This is necessary to avoid problems |
| # With data diversion, in case this "crash" was due to a STONITH operation, |
| # maybe the reboot did not fix our cluster communications! |
| # |
| # Note that typically, if STONITH is in use, it has been done on any |
| # unreachable node _before_ we are promoted, so the cib should already |
| # know that the peer is dead - if it is. |
| # |
| |
| # slightly different logic than crm_is_true |
| crm_is_not_false() |
| { |
| case $1 in |
| no|n|false|0|off) |
| false ;; |
| *) |
| true ;; |
| esac |
| } |
| |
| check_cluster_properties() |
| { |
| local x properties=$(set +x; echo "$cib_xml" | |
| sed -n -e '/<crm_config/,/<\/crm_config/ !d;' \ |
| -e '/<cluster_property_set/,/<\/cluster_property_set/ !d;' \ |
| -e '/<nvpair / !d' \ |
| -e 's/^.* name="\([^"]*\)".* value="\([^"]*\)".*$/\1=\2/p' \ |
| -e 's/^.* value="\([^"]*\)".* name="\([^"]*\)".*$/\2=\1/p') |
| |
| for x in $properties ; do |
| case $x in |
| startup[-_]fencing=*) startup_fencing=${x#*=} ;; |
| stonith[-_]enabled=*) stonith_enabled=${x#*=} ;; |
| esac |
| done |
| |
| crm_is_not_false $startup_fencing && startup_fencing=true || startup_fencing=false |
| crm_is_not_false $stonith_enabled && stonith_enabled=true || stonith_enabled=false |
| } |
| |
| |
| # |
| # In case this is a two-node cluster (still common with |
| # DRBD clusters) it does not have real quorum. |
| # If it is configured to do STONITH, and reboot, |
| # and after reboot that STONITHed node cluster comm is |
| # still broken, it will shoot the still online node, |
| # and try to go online with stale data. |
| # Exactly what this "fence" handler should prevent. |
| # But setting constraints in a cluster partition with |
| # "no-quorum-policy=ignore" will usually succeed. |
| # |
| # So we need to differentiate between node reachable or |
| # not, and DRBD "Consistent" or "UpToDate". |
| # |
| try_place_constraint() |
| { |
| local peer_state |
| |
| rc=1 |
| |
| while :; do |
| check_peer_node_reachable |
| [[ $peer_state != "reachable" ]] && break |
| # if it really is still reachable, maybe the replication link |
| # recovers by itself, and we can get away without taking action? |
| (( $net_hickup_time > $SECONDS )) || break |
| sleep $(( net_hickup_time - SECONDS )) |
| done |
| |
| set_states_from_proc_drbd |
| : == DEBUG == DRBD_peer=${DRBD_peer[*]} === |
| : == DEBUG == DRBD_pdsk=${DRBD_pdsk[*]} === |
| if $DRBD_pdsk_all_uptodate ; then |
| echo WARNING "All peer disks are UpToDate! Did not place the constraint." |
| rc=0 |
| return |
| fi |
| |
| : == DEBUG == CTS_mode=$CTS_mode == |
| : == DEBUG == DRBD_disk_all_consistent=$DRBD_disk_all_consistent == |
| : == DEBUG == DRBD_disk_all_uptodate=$DRBD_disk_all_uptodate == |
| : == DEBUG == $peer_state/${DRBD_disk[*]}/$unreachable_peer_is == |
| if [[ ${#DRBD_disk[*]} = 0 ]]; then |
| # Someone called this script, without the corresponding drbd |
| # resource being configured. That's not very useful. |
| echo WARNING "could not determine my disk state: did not place the constraint!" |
| rc=0 |
| # keep drbd_fence_peer_exit_code at "generic error", |
| # which will cause a "script is broken" message in case it was |
| # indeed called as handler from within drbd |
| |
| # No, NOT fenced/Consistent: |
| # just because we have been able to shoot him |
| # does not make our data any better. |
| elif [[ $peer_state = reachable ]] && $DRBD_disk_all_consistent; then |
| # = reachable ]] && $DRBD_disk_all_uptodate |
| # is implicitly handled here as well. |
| set_constraint && |
| drbd_fence_peer_exit_code=4 rc=0 && |
| echo INFO "peer is $peer_state, my disk is ${DRBD_disk[*]}: placed constraint '$id_prefix-$master_id'" |
| |
| elif [[ $peer_state = fenced ]] && $DRBD_disk_all_uptodate ; then |
| set_constraint && |
| drbd_fence_peer_exit_code=7 rc=0 && |
| echo INFO "peer is $peer_state, my disk is $DRBD_disk: placed constraint '$id_prefix-$master_id'" |
| |
| # Peer is neither "reachable" nor "fenced" (above would have matched) |
| # So we just hit some timeout. |
| # As long as we are UpToDate, place the constraint and continue. |
| # If you don't like that, use a ridiculously high timeout, |
| # or patch this script. |
| elif $DRBD_disk_all_uptodate ; then |
| # We could differentiate between unreachable, |
| # and DC-unreachable. In the latter case, placing the |
| # constraint will fail anyways, and drbd_fence_peer_exit_code |
| # will stay at "generic error". |
| set_constraint && |
| drbd_fence_peer_exit_code=5 rc=0 && |
| echo INFO "peer is not reachable, my disk is UpToDate: placed constraint '$id_prefix-$master_id'" |
| |
| # This block is reachable by operator intervention only |
| # (unless you are hacking this script and know what you are doing) |
| elif [[ $peer_state != reachable ]] && [[ $unreachable_peer_is = outdated ]] && $DRBD_disk_all_consistent; then |
| # If the peer is not reachable, but we are only Consistent, we |
| # may need some way to still allow promotion. |
| # Easy way out: --force primary with drbdsetup. |
| # But that would not place the constraint, nor outdate the |
| # peer. With this --unreachable-peer-is-outdated, we still try |
| # to set the constraint. Next promotion attempt will find the |
| # "correct" constraint, consider the peer as successfully |
| # fenced, and continue. |
| set_constraint && |
| drbd_fence_peer_exit_code=5 rc=0 && |
| echo WARNING "peer is unreachable, my disk is only Consistent: --unreachable-peer-is-outdated FORCED constraint '$id_prefix-$master_id'" && |
| echo WARNING "This MAY RISK DATA INTEGRITY" |
| |
| # So I'm not UpToDate, and peer is not reachable. |
| # Tell the module about "not reachable", and don't do anything else. |
| else |
| echo WARNING "peer is $peer_state, my disk is ${DRBD_disk[*]}: did not place the constraint!" |
| drbd_fence_peer_exit_code=5 rc=0 |
| # I'd like to return 6 here, otherwise pacemaker will retry |
| # forever to promote, even though 6 is not strictly correct. |
| fi |
| return $rc |
| } |
| |
| commit_suicide() |
| { |
| local reboot_timeout=20 |
| local extra_msg |
| |
| if $stonith_enabled ; then |
| # avoid double fence, tell pacemaker to kill me |
| echo WARNING "trying to have pacemaker kill me now!" |
| crm_attribute -t status -N $HOSTNAME -n terminate -v 1 |
| echo WARNING "told pacemaker to kill me, but scheduling reboot -f in 300 seconds just in case" |
| |
| # ------------------------- |
| echo WARNING $'\n'" told pacemaker to kill me,"\ |
| $'\n'" but scheduling reboot -f in 300 seconds just in case."\ |
| $'\n'" kill $$ # to cancel" | wall |
| # ------------------------- |
| |
| reboot_timeout=300 |
| extra_msg="Pacemaker terminate pending. If that fails, I'm " |
| |
| else |
| # ------------------------- |
| echo WARNING $'\n'" going to reboot -f in $reboot_timeout seconds"\ |
| $'\n'" kill $$ # to cancel!" | wall |
| # ------------------------- |
| fi |
| |
| reboot_timeout=$(( reboot_timeout + SECONDS )) |
| # pacemaker apparently cannot kill me. |
| while (( $SECONDS < $reboot_timeout )); do |
| echo WARNING "${extra_msg}going to reboot -f in $(( reboot_timeout - SECONDS )) seconds! To cancel: kill $$" |
| sleep 2 |
| done |
| echo WARNING "going to reboot -f now!" |
| reboot -f |
| sleep 864000 |
| } |
| |
| # drbd_peer_fencing fence|unfence |
| drbd_peer_fencing() |
| { |
| local rc |
| # input to fence_peer_init: |
| # $DRBD_RESOURCE is set by command line of from environment. |
| # $id_prefix is set by command line or default. |
| # $master_id is set by command line or will be parsed from the cib. |
| # output of fence_peer_init: |
| local have_constraint new_constraint |
| |
| # if I cannot query the local cib, give up |
| get_cib_xml -Ql || return |
| fence_peer_init || return |
| |
| if [[ $1 = fence ]] || $unfence_only_if_owner_match ; then |
| if [[ $fencing_attribute = "#uname" ]]; then |
| fencing_value=$HOSTNAME |
| elif ! fencing_value=$(crm_attribute -Q -t nodes -n $fencing_attribute 2>/dev/null); then |
| fencing_attribute="#uname" |
| fencing_value=$HOSTNAME |
| fi |
| # double negation: do not run but with my data. |
| new_constraint="\ |
| <rsc_location rsc=\"$master_id\" id=\"$id_prefix-$master_id\"> |
| <rule role=\"$role\" score=\"-INFINITY\" id=\"$id_prefix-rule-$master_id\"> |
| <expression attribute=\"$fencing_attribute\" operation=\"ne\" value=\"$fencing_value\" id=\"$id_prefix-expr-$master_id\"/> |
| </rule> |
| </rsc_location>" |
| fi |
| |
| case $1 in |
| fence) |
| |
| local startup_fencing stonith_enabled |
| check_cluster_properties |
| |
| if [[ -z $have_constraint ]] ; then |
| # try to place it. |
| |
| try_place_constraint && return |
| |
| # maybe callback and operator raced for the same constraint? |
| # before we potentially trigger node level fencing |
| # or keep IO frozen, double check. |
| # try_place_constraint has updated cib_xml from DC |
| |
| have_constraint=$(set +x; echo "$cib_xml" | |
| sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id") |
| fi |
| |
| if [[ "$have_constraint" = "$(set +x; echo "$new_constraint" | |
| sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")" ]]; then |
| echo INFO "suitable constraint already placed: '$id_prefix-$master_id'" |
| drbd_fence_peer_exit_code=4 |
| rc=0 |
| elif [[ -n "$have_constraint" ]] ; then |
| # if this id already exists, but looks different, we may have lost a shootout |
| echo WARNING "constraint $have_constraint already exists" |
| # anything != 0 will do; |
| # 21 happend to be "The object already exists" with my cibadmin |
| rc=21 |
| |
| # maybe: drbd_fence_peer_exit_code=6 |
| # as this is not the constraint we'd like to set, |
| # it is likely the inverse, so we probably can assume |
| # that the peer is active primary, or at least has |
| # better data than us, and wants us outdated. |
| fi |
| |
| if [[ $rc != 0 ]]; then |
| # at least we tried. |
| # maybe it was already in place? |
| echo WARNING "DATA INTEGRITY at RISK: could not place the fencing constraint!" |
| fi |
| |
| # XXX policy decision: |
| if $suicide_on_failure_if_primary && [[ $drbd_fence_peer_exit_code != [3457] ]]; then |
| set_states_from_proc_drbd |
| [[ "${DRBD_role[*]}" = *Primary* ]] && commit_suicide |
| fi |
| |
| return $rc |
| ;; |
| unfence) |
| if [[ -n $have_constraint ]]; then |
| set_states_from_proc_drbd |
| if $DRBD_disk_all_uptodate && $DRBD_pdsk_all_uptodate; then |
| if $unfence_only_if_owner_match && [[ "$have_constraint" != "$(set +x; echo "$new_constraint" | |
| sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")" ]] |
| then |
| echo WARNING "Constraint owner does not match, leaving constraint in place." |
| else |
| # try to remove it based on that xml-id |
| remove_constraint && echo INFO "Removed constraint '$id_prefix-$master_id'" |
| fi |
| else |
| local w="My" |
| $DRBD_disk_all_uptodate && w="Peer's" |
| echo WARNING "$w disk(s) are NOT all UpToDate, leaving constraint in place." |
| return 1 |
| fi |
| else |
| $quiet || echo "No constraint in place, nothing to do." |
| return 0 |
| fi |
| esac |
| } |
| |
| double_check_after_fencing() |
| { |
| set_states_from_proc_drbd |
| : == DEBUG == DRBD_peer=${DRBD_peer[*]} === |
| : == DEBUG == DRBD_pdsk=${DRBD_pdsk[*]} === |
| if $DRBD_pdsk_all_uptodate ; then |
| echo WARNING "All peer disks are UpToDate (again), trying to remove the constraint again." |
| remove_constraint && drbd_fence_peer_exit_code=1 rc=0 |
| return |
| fi |
| } |
| |
| guess_if_pacemaker_will_fence() |
| { |
| # try to guess whether it is useful to wait and poll again, |
| # (node fencing in progress...), |
| # or if pacemaker thinks the node is "clean" dead. |
| local x |
| |
| # "return values:" |
| crmd='' in_ccm='' expected='' join='' will_fence=false |
| |
| # Older pacemaker has an "ha" attribute, too. |
| # For stonith-enabled=false, the "crmd" attribute may stay "online", |
| # but once ha="dead", we can stop waiting for changes. |
| ha_dead=false |
| |
| node_state=${node_state%>} |
| node_state=${node_state%/} |
| for x in ${node_state} ; do |
| case $x in |
| in_ccm=\"*\") x=${x#*=\"}; x=${x%\"}; in_ccm=$x ;; |
| crmd=\"*\") x=${x#*=\"}; x=${x%\"}; crmd=$x ;; |
| expected=\"*\") x=${x#*=\"}; x=${x%\"}; expected=$x ;; |
| join=\"*\") x=${x#*=\"}; x=${x%\"}; join=$x ;; |
| ha=\"dead\") ha_dead=true ;; |
| esac |
| done |
| |
| # if it is not enabled, no point in waiting for it. |
| if ! $stonith_enabled ; then |
| # "normalize" the rest of the logic |
| # where this is called. |
| # for stonith-enabled=false, and ha="dead", |
| # reset crmd="offline". |
| # Then we stop polling the cib for changes. |
| |
| $ha_dead && crmd="offline" |
| return |
| fi |
| |
| if [[ -z $node_state ]] ; then |
| # if we don't know nothing about the peer, |
| # and startup_fencing is explicitly disabled, |
| # no fencing will take place. |
| $startup_fencing || return |
| fi |
| |
| # for further inspiration, see pacemaker:lib/pengine/unpack.c, determine_online_status_fencing() |
| [[ -z $in_ccm ]] && will_fence=true |
| [[ $crmd = "banned" ]] && will_fence=true |
| if [[ ${expected-down} = "down" && $in_ccm = "false" && $crmd != "online" ]]; then |
| : "pacemaker considers this as clean down" |
| elif [[ $in_ccm = false ]] || [[ $crmd != "online" ]]; then |
| will_fence=true |
| fi |
| } |
| |
| # return value in $peer_state: |
| # DC-unreachable |
| # We have not been able to contact the DC. |
| # fenced |
| # According to the node_state recorded in the cib, |
| # the peer is offline and expected down |
| # (which means successfully fenced, if stonith is enabled) |
| # reachable |
| # cib says it's online, and crmadmin -S says peer state is "ok" |
| # unreachable |
| # cib says it's offline (but does not yet say "expected" down) |
| # and we reached the timeout |
| # unknown |
| # cib does not say it was offline (or we don't know who the peer is) |
| # and we reached the timeout |
| # |
| check_peer_node_reachable() |
| { |
| # we are going to increase the cib timeout with every timeout (see below). |
| # for the actual invocation, we use int(cibtimeout/10). |
| # scaled by 5 / 4 with each iteration, |
| # this results in a timeout sequence of 1 2 2 3 4 5 6 7 9 ... seconds |
| local cibtimeout=18 |
| local full_timeout |
| local nr_other_nodes |
| local other_node_uname_attrs |
| |
| # we have a cibadmin -Ql in cib_xml already |
| # filter out <node uname, but ignore type="ping" nodes, |
| # they don't run resources |
| other_node_uname_attrs=$(set +x; echo "$cib_xml" | |
| sed -e '/<node /!d; / type="ping"/d;s/^.* \(uname="[^"]*"\).*>$/\1/' | |
| grep -v -F uname=\"$HOSTNAME\") |
| set -- $other_node_uname_attrs |
| nr_other_nodes=$# |
| |
| while :; do |
| local state_lines='' node_state='' crmd='' in_ccm='' |
| local expected='' join='' will_fence='' ha_dead='' |
| |
| while :; do |
| local t=$SECONDS |
| # |
| # Update our view of the cib, ask the DC this time. |
| # Timeout, in case no DC is available. |
| # Caution, some cibadmin (pacemaker 0.6 and earlier) |
| # apparently use -t use milliseconds, so will timeout |
| # many times until a suitably long timeout is reached |
| # by increasing below. |
| # |
| # Why not use the default timeout? |
| # Because that would unecessarily wait for 30 seconds |
| # or longer, even if the DC is re-elected right now, |
| # and available within the next second. |
| # |
| get_cib_xml -Q -t $(( cibtimeout/10 )) && break |
| |
| # bash magic $SECONDS is seconds since shell invocation. |
| if (( $SECONDS > $dc_timeout )) ; then |
| # unreachable: cannot even reach the DC |
| peer_state="DC-unreachable" |
| return |
| fi |
| |
| # avoid busy loop |
| [[ $t = $SECONDS ]] && sleep 1 |
| |
| # try again, longer timeout. |
| let "cibtimeout = cibtimeout * 5 / 4" |
| done |
| state_lines=$( set +x; echo "$cib_xml" | grep '<node_state ' | |
| grep -F -e "$other_node_uname_attrs" ) |
| |
| if $CTS_mode; then |
| # CTS requires startup-fencing=false. |
| # For PartialStart, NearQuorumPoint and similar tests, |
| # we would likely stay Consistent, and refuse to Promote. |
| # And CTS would be very unhappy. |
| # Pretend that the peer was reachable if we are missing a node_state entry for it. |
| if [[ $DRBD_PEER ]] && ! echo "$state_lines" | grep -q -F uname=\"$DRBD_PEER\" ; then |
| peer_state="reachable" |
| echo WARNING "CTS-mode: pretending that unseen node $DRBD_PEER was reachable" |
| return |
| fi |
| fi |
| |
| # very unlikely: no DRBD_PEER passed in, |
| # but in fact only one other cluster node. |
| # Use that one as DRBD_PEER. |
| if [[ -z $DRBD_PEER ]] && [[ $nr_other_nodes = 1 ]]; then |
| DRBD_PEER=${other_node_uname_attrs#uname=\"} |
| DRBD_PEER=${DRBD_PEER%\"} |
| fi |
| |
| if [[ -z $DRBD_PEER ]]; then |
| # Multi node cluster, but unknown DRBD Peer. |
| # This should not be a problem, unless you have |
| # no_quorum_policy=ignore in an N > 2 cluster. |
| # (yes, I've seen such beasts in the wild!) |
| # As we don't know the peer, |
| # we could only safely return here if *all* |
| # potential peers are confirmed down. |
| # Don't try to be smart, just wait for the full |
| # timeout, which should allow STONITH to |
| # complete. |
| full_timeout=$(( $timeout - $SECONDS )) |
| if (( $full_timeout > 0 )) ; then |
| echo WARNING "don't know who my peer is; sleep $full_timeout seconds just in case" |
| sleep $full_timeout |
| fi |
| |
| # In the unlikely case that we don't know our DRBD peer, |
| # there is no point in polling the cib again, |
| # that won't teach us who our DRBD peer is. |
| # |
| # We waited $full_timeout seconds already, |
| # to allow for node level fencing to shoot us. |
| # |
| # So if we are still alive, then obviously no-one has shot us. |
| # |
| |
| peer_state="unknown" |
| return |
| fi |
| |
| # |
| # we know the peer or/and are a two node cluster |
| # |
| |
| node_state=$(set +x; echo "$state_lines" | grep -F uname=\"$DRBD_PEER\") |
| |
| # populates in_ccm, crmd, exxpected, join, will_fence=[false|true] |
| guess_if_pacemaker_will_fence |
| |
| if ! $will_fence && [[ $crmd != "online" ]] ; then |
| |
| # "legacy" cman + pacemaker clusters older than 1.1.10 |
| # may "forget" about startup fencing. |
| # We can detect this because the "expected" attribute is missing. |
| # Does not make much difference for our logic, though. |
| [[ $expected/$in_ccm = "down/false" ]] && peer_state="fenced" || peer_state="unreachable" |
| |
| return |
| fi |
| |
| # So the cib does still indicate the peer was reachable. |
| # |
| # try crmadmin; if we can sucessfully query the state of the remote crmd, |
| # it is obviously reachable. |
| # |
| # Do this only after we have been able to reach a DC above. |
| # Note: crmadmin timeout is in milli-seconds, and defaults to 30000 (30 seconds). |
| # Our variable $cibtimeout should be in deci-seconds (see above) |
| # (unless you use a very old version of pacemaker, so don't do that). |
| # Convert deci-seconds to milli-seconds, and double it. |
| if [[ $crmd = "online" ]] ; then |
| local out |
| if out=$( crmadmin -t $(( cibtimeout * 200 )) -S $DRBD_PEER ) \ |
| && [[ $out = *"(ok)" ]]; then |
| peer_state="reachable" |
| return |
| fi |
| fi |
| |
| # We know our DRBD peer. |
| # We are still not sure about its status, though. |
| # |
| # It is not (yet) "expected down" per the cib, but it is not |
| # reliably reachable via crmadmin -S either. |
| # |
| # If we already polled for longer than timeout, give up. |
| # |
| # For a resource-and-stonith setup, or dual-primaries (which |
| # you should only use with resource-and-stonith, anyways), |
| # the recommended timeout is larger than the deadtime or |
| # stonith timeout, and according to beekhof maybe should be |
| # tuned up to the election-timeout (which, btw, defaults to 2 |
| # minutes!). |
| # |
| if (( $SECONDS >= $timeout )) ; then |
| [[ $crmd = offline ]] && peer_state="unreachable" || peer_state="unknown" |
| return |
| fi |
| |
| # wait a bit before we poll the DC again |
| sleep 2 |
| done |
| # NOT REACHED |
| } |
| |
| set_states_from_proc_drbd() |
| { |
| local IFS line lines i disk pdsk |
| # DRBD_MINOR exported by drbdadm since 8.3.3 |
| [[ $DRBD_MINOR ]] || DRBD_MINOR=$(drbdadm ${DRBD_CONF:+ -c "$DRBD_CONF"} sh-minor $DRBD_RESOURCE) || return |
| |
| # if we have more than one minor, do a word split, ... |
| set -- $DRBD_MINOR |
| # ... and convert into regex: |
| IFS="|$IFS"; DRBD_MINOR="($*)"; IFS=${IFS#?} |
| |
| # We must not recurse into netlink, |
| # this may be a callback triggered by "drbdsetup primary". |
| # grep /proc/drbd instead |
| |
| DRBD_peer=() |
| DRBD_role=() |
| DRBD_disk=() |
| DRBD_pdsk=() |
| DRBD_disk_all_uptodate=true |
| DRBD_disk_all_consistent=true |
| DRBD_pdsk_all_uptodate=true |
| |
| IFS=$'\n' |
| lines=($(sed -nre "/^ *$DRBD_MINOR: cs:/ { s/:/ /g; p; }" /proc/drbd)) |
| IFS=$' \t\n' |
| |
| i=0 |
| for line in "${lines[@]}"; do |
| set -- $line |
| DRBD_peer[i]=${5#*/} |
| DRBD_role[i]=${5%/*} |
| pdsk=${7#*/} |
| disk=${7%/*} |
| DRBD_disk[i]=${disk:-Unconfigured} |
| DRBD_pdsk[i]=${pdsk:-DUnknown} |
| case $disk in |
| UpToDate) ;; |
| Consistent) |
| DRBD_disk_all_uptodate=false ;; |
| *) |
| DRBD_disk_all_uptodate=false |
| DRBD_disk_all_consistent=false ;; |
| esac |
| [[ $pdsk != UpToDate ]] && DRBD_pdsk_all_uptodate=false |
| let i++ |
| done |
| if (( i == 0 )) ; then |
| DRBD_pdsk_all_uptodate=false |
| DRBD_disk_all_uptodate=false |
| DRBD_disk_all_consistent=false |
| fi |
| } |
| ############################################################ |
| |
| # try to get possible output on stdout/err to syslog |
| PROG=${0##*/} |
| redirect_to_logger() |
| { |
| local lf=${1:-local5} |
| case $lf in |
| # do we want to exclude some? |
| auth|authpriv|cron|daemon|ftp|kern|lpr|mail|news|syslog|user|uucp|local[0-7]) |
| : OK ;; |
| *) |
| echo >&2 "invalid logfacility: $lf" |
| return |
| ;; |
| esac |
| # Funky redirection to avoid logger feeding its own output to itself accidentally. |
| # Funky double exec to avoid an intermediate sub-shell. |
| # Sometimes, the sub-shell lingers around, keeps file descriptors open, |
| # and logger then won't notice the main script has finished, |
| # forever waiting for further input. |
| # The second exec replaces the subshell, and logger will notice directly |
| # when its stdin is closed once the main script exits. |
| # This avoids the spurious logger processes. |
| exec > >( exec 1>&- 2>&- logger -t "$PROG[$$]" -p $lf.info ) 2>&1 |
| } |
| if [[ $- != *x* ]]; then |
| # you may override with --logfacility below |
| redirect_to_logger local5 |
| fi |
| |
| # clean environment just in case. |
| unset fencing_attribute id_prefix timeout dc_timeout unreachable_peer_is |
| unset flock_timeout flock_required lock_dir lock_file |
| quiet=false |
| unfence_only_if_owner_match=false |
| CTS_mode=false |
| suicide_on_failure_if_primary=false |
| |
| # poor mans command line argument parsing, |
| # allow for command line overrides |
| set -- "$@" $OCF_RESKEY_unfence_extra_args |
| while [[ $# != 0 ]]; do |
| case $1 in |
| --logfacility=*) |
| redirect_to_logger ${1#*=} |
| ;; |
| --logfacility) |
| redirect_to_logger $2 |
| shift |
| ;; |
| --resource=*) |
| DRBD_RESOURCE=${1#*=} |
| ;; |
| -r|--resource) |
| DRBD_RESOURCE=$2 |
| shift |
| ;; |
| --master-id=*) |
| master_id=${1#*=} |
| ;; |
| -i|--master-id) |
| master_id=$2 |
| shift |
| ;; |
| --role=*) |
| role=${1#*=} |
| ;; |
| -l|--role) |
| role=${2} |
| shift |
| ;; |
| --fencing-attribute=*) |
| fencing_attribute=${1#*=} |
| ;; |
| -a|--fencing-attribute) |
| fencing_attribute=$2 |
| shift |
| ;; |
| --id-prefix=*) |
| id_prefix=${1#*=} |
| ;; |
| -p|--id-prefix) |
| id_prefix=$2 |
| shift |
| ;; |
| --timeout=*) |
| timeout=${1#*=} |
| ;; |
| -t|--timeout) |
| timeout=$2 |
| shift |
| ;; |
| --dc-timeout=*) |
| dc_timeout=${1#*=} |
| ;; |
| -d|--dc-timeout) |
| dc_timeout=$2 |
| shift |
| ;; |
| --quiet) |
| quiet=true |
| ;; |
| --unfence-only-if-owner-match) |
| unfence_only_if_owner_match=true |
| ;; |
| --flock-required) |
| flock_required=true |
| ;; |
| --flock-timeout=*) |
| flock_timeout=${1#*=} |
| ;; |
| --flock-timeout) |
| flock_timeout=$2 |
| shift |
| ;; |
| --lock-dir=*) |
| lock_dir=${1#*=} |
| ;; |
| --lock-dir) |
| lock_dir=$2 |
| shift |
| ;; |
| --lock-file=*) |
| lock_file=${1#*=} |
| ;; |
| --lock-file) |
| lock_file=$2 |
| shift |
| ;; |
| --net-hickup=*|--network-hickup=*) |
| net_hickup_time=${1#*=} |
| ;; |
| --net-hickup|--network-hickup) |
| net_hickup_time=$2 |
| shift |
| ;; |
| --CTS-mode) |
| CTS_mode=true |
| ;; |
| --unreachable-peer-is-outdated) |
| # This is NOT to be scripted. |
| # Or people will put this into the handler definition in |
| # drbd.conf, and all this nice work was useless. |
| test -t 0 && |
| unreachable_peer_is=outdated |
| ;; |
| --suicide-on-failure-if-primary) |
| suicide_on_failure_if_primary=true |
| ;; |
| -*) |
| echo >&2 "ignoring unknown option $1" |
| ;; |
| *) |
| echo >&2 "ignoring unexpected argument $1" |
| ;; |
| esac |
| shift |
| done |
| |
| # |
| # Sanitize lock_file and lock_dir |
| # |
| if [[ ${lock_dir:=/var/lock/drbd} != /* ]] ; then |
| echo WARNING "lock_dir needs to be an absolute path, not [$lock_dir]; using default." |
| lock_dir=/var/lock/drbd |
| fi |
| case $lock_file in |
| "") lock_file=$lock_dir/fence.${DRBD_RESOURCE//\//_} ;; |
| NONE) : ;; |
| /*) : ;; |
| *) lock_file=$lock_dir/$lock_file ;; |
| esac |
| if [[ $lock_file != NONE && $lock_file != $lock_dir/* ]]; then |
| lock_dir=${lock_file%/*}; : ${lock_dir:=/} |
| : == DEBUG == "override: lock_dir=$lock_dir to match lock_file=$lock_file" |
| fi |
| |
| # DRBD_RESOURCE: from environment |
| # master_id: parsed from cib |
| |
| : "== unreachable_peer_is == ${unreachable_peer_is:=unknown}" |
| # apply defaults: |
| : "== fencing_attribute == ${fencing_attribute:="#uname"}" |
| : "== id_prefix == ${id_prefix:="drbd-fence-by-handler"}" |
| : "== role == ${role:="Master"}" |
| |
| # defaults suitable for most cases |
| : "== net_hickup_time == ${net_hickup_time:=0}" |
| : "== timeout == ${timeout:=90}" |
| : "== dc_timeout == ${dc_timeout:=20}" |
| : "== flock_timeout == ${flock_timeout:=120}" |
| : "== flock_required == ${flock_required:=false}" |
| : "== lock_file == ${lock_file}" |
| : "== lock_dir == ${lock_dir}" |
| |
| |
| # check envars normally passed in by drbdadm |
| # TODO DRBD_CONF is also passed in. we may need to use it in the |
| # xpath query, in case someone is crazy enough to use different |
| # conf files with the _same_ resource name. |
| # for now: do not do that, or hardcode the cib id of the master |
| # in the handler section of your drbd conf file. |
| for var in DRBD_RESOURCE; do |
| if [ -z "${!var}" ]; then |
| echo "Environment variable \$$var not found (this is normally passed in by drbdadm)." >&2 |
| exit 1 |
| fi |
| done |
| |
| # Fixup id-prefix to include the resource name |
| # There may be multiple drbd instances part of the same M/S Group, pointing to |
| # the same master-id. Still they need to all have their own constraint, to be |
| # able to unfence independently when they finish their resync independently. |
| # Be nice to people who already explicitly configure an id prefix containing |
| # the resource name. |
| if [[ $id_prefix != *"-$DRBD_RESOURCE" ]] ; then |
| id_prefix="$id_prefix-$DRBD_RESOURCE" |
| : "== id_prefix == ${id_prefix}" |
| fi |
| |
| # make sure it contains what we expect |
| HOSTNAME=$(uname -n) |
| |
| $quiet || { |
| for k in ${!DRBD_*} UP_TO_DATE_NODES; do printf "%s=%q " "$k" "${!k}"; done |
| printf '%q' "$0" |
| [[ $# != 0 ]] && printf ' %q' "$@" |
| printf '\n' |
| } |
| |
| # to be set by drbd_peer_fencing() |
| drbd_fence_peer_exit_code=1 |
| |
| got_flock=false |
| if [[ $lock_file != NONE ]] ; then |
| test -d "$lock_dir" || |
| mkdir -p -m 0700 "$lock_dir" || |
| echo WARNING "mkdir -p $lock_dir failed" |
| |
| if exec 9>"$lock_file" && flock --exclusive --timeout $flock_timeout 9 |
| then |
| got_flock=true |
| else |
| echo WARNING "Could not get flock on $lock_file" |
| $flock_required && exit 1 |
| |
| # If I cannot get the lock file, I can at least still try to place the constraint |
| fi |
| : == DEBUG == $SECONDS seconds, got_flock=$got_flock == |
| fi |
| |
| case $PROG in |
| crm-fence-peer.sh) |
| if drbd_peer_fencing fence; then |
| : == DEBUG == $cibadmin_invocations cibadmin calls == |
| : == DEBUG == $SECONDS seconds == |
| [[ $drbd_fence_peer_exit_code = [347] ]] && double_check_after_fencing |
| exit $drbd_fence_peer_exit_code |
| fi |
| ;; |
| crm-unfence-peer.sh) |
| if drbd_peer_fencing unfence; then |
| : == DEBUG == $cibadmin_invocations cibadmin calls == |
| : == DEBUG == $SECONDS seconds == |
| exit 0 |
| fi |
| esac 9>&- # Don't want to "leak" the lock fd to child processes. |
| |
| # 1: unexpected error |
| exit 1 |