/*
   drbdadm_adjust.c

   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.

   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.

   drbd is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   drbd is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with drbd; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#define _GNU_SOURCE
#define _XOPEN_SOURCE 600
#define _FILE_OFFSET_BITS 64

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>

#include "drbdadm.h"
#include "drbdtool_common.h"
#include "drbdadm_parser.h"
#include "config_flags.h"

/* drbdsetup show might complain that the device minor does
   not exist at all. Redirect stderr to /dev/null therefore.
 */
static FILE *m_popen(int *pid,char** argv)
{
	int mpid;
	int pipes[2];
	int dev_null;

	if(pipe(pipes)) {
		err("Creation of pipes failed: %m\n");
		exit(E_EXEC_ERROR);
	}

	dev_null = open("/dev/null", O_WRONLY);
	if (dev_null == -1) {
		err("Opening /dev/null failed: %m\n");
		exit(E_EXEC_ERROR);
	}

	mpid = fork();
	if(mpid == -1) {
		err("Can not fork");
		exit(E_EXEC_ERROR);
	}
	if(mpid == 0) {
		close(pipes[0]); // close reading end
		dup2(pipes[1], fileno(stdout));
		close(pipes[1]);
		dup2(dev_null, fileno(stderr));
		close(dev_null);
		execvp(argv[0],argv);
		err("Can not exec");
		exit(E_EXEC_ERROR);
	}

	close(pipes[1]); // close writing end
	close(dev_null);
	*pid=mpid;
	return fdopen(pipes[0],"r");
}

static int is_equal(struct context_def *ctx, struct d_option *a, struct d_option *b)
{
	struct field_def *field;

	for (field = ctx->fields; field->name; field++) {
		if (!strcmp(field->name, a->name))
			return field->is_equal(field, a->value, b->value);
	}

	err("Internal error: option '%s' not known in this context\n", a->name);
	abort();
}

static bool is_default(struct context_def *ctx, struct d_option *opt)
{
	struct field_def *field;

	for (field = ctx->fields; field->name; field++) {
		if (strcmp(field->name, opt->name))
			continue;
		return field->is_default(field, opt->value);
	}
	return false;
}

static int opts_equal(struct context_def *ctx, struct d_option* conf, struct d_option* running)
{
	struct d_option* opt;

	while(running) {
		if((opt=find_opt(conf,running->name))) {
			if(!is_equal(ctx, running, opt)) {
				if (verbose > 2)
					err("Value of '%s' differs: r=%s c=%s\n",
						opt->name,running->value,opt->value);
				return 0;
			}
			if (verbose > 3)
				err("Value of '%s' equal: r=%s c=%s\n",
					opt->name,running->value,opt->value);
			opt->mentioned=1;
		} else {
			if(!is_default(ctx, running)) {
				if (verbose > 2)
					err("Only in running config %s: %s\n",
						running->name,running->value);
				return 0;
			}
			if (verbose > 3)
				err("Is default: '%s' equal: r=%s\n",
					running->name,running->value);
		}
		running=running->next;
	}

	while(conf) {
		if(conf->mentioned==0 && !is_default(ctx, conf)) {
			if (verbose > 2)
				err("Only in config file %s: %s\n", conf->name,conf->value);
			return 0;
		}
		conf=conf->next;
	}
	return 1;
}

static int addr_equal(struct d_resource* conf, struct d_resource* running)
{
	int equal;
	char *peer_addr, *peer_af, *peer_port;

	if (conf->peer == NULL && running->peer == NULL) return 1;
	if (running->peer == NULL) return 0;

	equal = !strcmp(conf->me->address,        running->me->address) &&
		!strcmp(conf->me->port,           running->me->port) &&
		!strcmp(conf->me->address_family, running->me->address_family);

	if(conf->me->proxy) {
		peer_addr = conf->me->proxy->inside_addr;
		peer_port = conf->me->proxy->inside_port;
		peer_af = conf->me->proxy->inside_af;
	} else {
		peer_addr = conf->peer->address;
		peer_port = conf->peer->port;
		peer_af = conf->peer->address_family;
	}

	equal = equal && conf->peer &&
		!strcmp(peer_addr, running->peer->address) &&
		!strcmp(peer_port, running->peer->port) &&
		!strcmp(peer_af, running->peer->address_family);

	if (!equal && verbose > 2)
		err("Network addresses differ:\n"
			"\trunning: %s:%s:%s -- %s:%s:%s\n"
			"\t config: %s:%s:%s -- %s:%s:%s\n",
			running->me->address_family, running->me->address, running->me->port,
			running->peer->address_family, running->peer->address, running->peer->port,
			conf->me->address_family, conf->me->address, conf->me->port,
			peer_af, peer_addr, peer_port);

	return equal;
}

/* Are both internal, or are both not internal. */
static int int_eq(char* m_conf, char* m_running)
{
	return !strcmp(m_conf,"internal") == !strcmp(m_running,"internal");
}

static int disk_equal(struct d_volume *conf, struct d_volume *running)
{
	int eq = 1;

	if (conf->disk == NULL && running->disk == NULL)
		return 1;
	if (conf->disk == NULL || running->disk == NULL)
		return 0;

	eq &= !strcmp(conf->disk, running->disk);
	eq &= int_eq(conf->meta_disk, running->meta_disk);
	if (!strcmp(conf->meta_disk, "internal"))
		return eq;
	eq &= !strcmp(conf->meta_disk, running->meta_disk);

	return eq;
}


/* NULL terminated */
static void find_option_in_resources(char *name,
		struct d_option *list, struct d_option **opt, ...)
{
	va_list va;

	va_start(va, opt);
	/* We need to keep setting *opt to NULL, even if a list == NULL. */
	while (list || opt) {
		while (list) {
			if (strcmp(list->name, name) == 0)
				break;
			list = list->next;
		}

		*opt = list;

		list = va_arg(va, struct d_option*);
		opt  = va_arg(va, struct d_option**);
	}
	va_end(va);
}

static int do_proxy_reconf(struct cfg_ctx *ctx)
{
	int rv;
	char *argv[4] = { drbd_proxy_ctl, "-c", (char*)ctx->arg, NULL };

	rv = m_system_ex(argv, SLEEPS_SHORT, ctx->res->name);
	return rv;
}

#define MAX_PLUGINS (10)
#define MAX_PLUGIN_NAME (16)

/* The new name is appended to the alist. */
int _is_plugin_in_list(char *string,
		char slist[MAX_PLUGINS][MAX_PLUGIN_NAME],
		char alist[MAX_PLUGINS][MAX_PLUGIN_NAME],
		int list_len)
{
	int word_len, i;
	char *copy;

	for(word_len=0; string[word_len]; word_len++)
		if (isspace(string[word_len]))
			break;

	if (word_len+1 >= MAX_PLUGIN_NAME) {
		err("Wrong proxy plugin name %*.*s", word_len, word_len, string);
		exit(E_CONFIG_INVALID);
	}

	copy = alist[list_len];
	strncpy(copy, string, word_len);
	copy[word_len] = 0;


	for(i=0; i<list_len && *slist; i++) {
		if (strcmp(slist[i], copy) == 0)
			return 1;
	}

	/* Not found, insert into list. */
	if (list_len >= MAX_PLUGINS) {
		err("Too many proxy plugins.");
		exit(E_CONFIG_INVALID);
	}

	return 0;
}


static int proxy_reconf(struct cfg_ctx *ctx, struct d_resource *running)
{
	int reconn = 0;
	struct d_resource *res = ctx->res;
	struct d_option* res_o, *run_o;
	unsigned long long v1, v2, minimum;
	char *plugin_changes[MAX_PLUGINS], *cp, *conn_name;
	/* It's less memory usage when we're storing char[]. malloc overhead for
	 * the few bytes + pointers is much more. */
	char p_res[MAX_PLUGINS][MAX_PLUGIN_NAME],
		 p_run[MAX_PLUGINS][MAX_PLUGIN_NAME];
	int used, i, re_do;

	reconn = 0;

	if (!running)
		goto redo_whole_conn;

	find_option_in_resources("memlimit",
			res->me->proxy->options, &res_o,
			running->proxy_options, &run_o,
			NULL, NULL);
	v1 = res_o ? m_strtoll(res_o->value, 1) : 0;
	v2 = run_o ? m_strtoll(run_o->value, 1) : 0;
	minimum = v1 < v2 ? v1 : v2;
	/* We allow an є [epsilon] of 2%, so that small (rounding) deviations do
	 * not cause the connection to be re-established. */
	if (res_o &&
			(!run_o || abs(v1-v2)/(float)minimum > 0.02))
	{
redo_whole_conn:
		/* As the memory is in use while the connection is allocated we have to
		 * completely destroy and rebuild the connection. */

		schedule_deferred_cmd( do_proxy_conn_down, ctx, NULL, CFG_NET_PREREQ);
		schedule_deferred_cmd( do_proxy_conn_up, ctx, NULL, CFG_NET_PREREQ);
		schedule_deferred_cmd( do_proxy_conn_plugins, ctx, NULL, CFG_NET_PREREQ);

		/* With connection cleanup and reopen everything is rebuild anyway, and
		 * DRBD will get a reconnect too.  */
		return 0;
	}


	res_o = res->me->proxy->plugins;
	run_o = running->proxy_plugins;
	used = 0;
	conn_name = proxy_connection_name(res);
	for(i=0; i<MAX_PLUGINS; i++)
	{
		if (used >= sizeof(plugin_changes)-1) {
			err("Too many proxy plugin changes");
			exit(E_CONFIG_INVALID);
		}
		/* Now we can be sure that we can store another pointer. */

		if (!res_o) {
			if (run_o) {
				/* More plugins running than configured - just stop here. */
				m_asprintf(&cp, "set plugin %s %d end", conn_name, i);
				plugin_changes[used++] = cp;
			}
			else {
				/* Both at the end? ok, quit loop */
			}
			break;
		}

		/* res_o != NULL. */

		if (!run_o) {
			p_run[i][0] = 0;
			if (_is_plugin_in_list(res_o->name, p_run, p_res, i)) {
				/* Current plugin was already active, just at another position.
				 * Redo the whole connection. */
				goto redo_whole_conn;
			}

			/* More configured than running - just add it, if it's not already
			 * somewhere else. */
			m_asprintf(&cp, "set plugin %s %d %s", conn_name, i, res_o->name);
			plugin_changes[used++] = cp;
		} else {
			/* If we get here, both lists have been filled in parallel, so we
			 * can simply use the common counter. */
			re_do = _is_plugin_in_list(res_o->name, p_run, p_res, i) ||
				_is_plugin_in_list(run_o->name, p_res, p_run, i);
			if (re_do) {
				/* Plugin(s) were moved, not simple reconfigured.
				 * Re-do the whole connection. */
				goto redo_whole_conn;
			}

			/* TODO: We don't (yet) account for possible different ordering of
			 * the parameters to the plugin.
			 *    plugin A 1 B 2
			 * should be treated as equal to
			 *    plugin B 2 A 1. */
			if (strcmp(run_o->name, res_o->name) != 0) {
				/* Either a different plugin, or just different settings
				 * - plugin can be overwritten.  */
				m_asprintf(&cp, "set plugin %s %d %s", conn_name, i, res_o->name);
				plugin_changes[used++] = cp;
			}
		}


		if (res_o)
			res_o = res_o->next;
		if (run_o)
			run_o = run_o->next;
	}

	/* change only a few plugin settings. */
	for(i=0; i<used; i++)
		schedule_deferred_cmd(do_proxy_reconf, ctx, plugin_changes[i], CFG_NET);

	return reconn;
}

int need_trigger_kobj_change(struct d_resource *res)
{
	struct stat sbuf;
	char *link_name;
	int err;

	m_asprintf(&link_name, "/dev/drbd/by-res/%s", res->name);

	err = stat("/dev/drbd/by-res", &sbuf);
	if (err)	/* probably no udev rules in use */
		return 0;

	err = stat(link_name, &sbuf);
	if (err)
		/* resource link cannot be stat()ed. */
		return 1;

	/* double check device information */
	if (!S_ISBLK(sbuf.st_mode))
		return 1;
	if (major(sbuf.st_rdev) != DRBD_MAJOR)
		return 1;
	if (minor(sbuf.st_rdev) != res->me->volumes->device_minor)
		return 1;

	/* Link exists, and is expected block major:minor.
	 * Do nothing. */
	return 0;
}

/* moves option to the head of the single linked option list,
 * and marks it as to be skiped for "adjust only" commands
 * like disk-options see e.g. adm_attach_and_or_disk_options().
 */
static void move_opt_to_head(struct d_option **head, struct d_option *o)
{
	struct d_option *t;
	if (!o)
		return;
	o->adj_skip = 1;
	if (o == *head)
		return;

	for (t = *head; t->next != o; t = t->next)
		;
	t->next = o->next;
	o->next = *head;
	*head = o;
}

void compare_max_bio_bvecs(struct d_volume *conf, struct d_volume *kern)
{
	struct d_option *c = find_opt(conf->disk_options, "max-bio-bvecs");
	struct d_option *k = find_opt(kern->disk_options, "max-bio-bvecs");

	/* move to front of list, so we can skip it
	 * for the following opts_equal */
	move_opt_to_head(&conf->disk_options, c);
	move_opt_to_head(&kern->disk_options, k);

	/* simplify logic below, would otherwise have to
	 * (!x || is_default(x) all the time. */
	if (k && is_default(&attach_cmd_ctx, k))
		k = NULL;

	/* there was a bvec restriction set,
	 * but it is no longer in config, or vice versa */
	if (!k != !c)
		conf->adj_attach = 1;

	/* restrictions differ */
	if (k && c && !is_equal(&attach_cmd_ctx, k, c))
		conf->adj_attach = 1;
}

/* similar to compare_max_bio_bvecs above */
void compare_size(struct d_volume *conf, struct d_volume *kern)
{
	struct d_option *c = find_opt(conf->disk_options, "size");
	struct d_option *k = find_opt(kern->disk_options, "size");

	/* Special-case "max-bio-bvecs", we do not allow to change that
	 * while attached, yet.
	 * Also special case "size", we need to issue a resize command to change that.
	 * Move both options to the head of the disk_options list,
	 * so we can easily skip them in the opts_equal, later.
	 */
	move_opt_to_head(&conf->disk_options, c);
	move_opt_to_head(&kern->disk_options, k);

	if (k && is_default(&attach_cmd_ctx, k))
		k = NULL;
	if (!k != !c)
		conf->adj_resize = 1;
	if (k && c && !is_equal(&attach_cmd_ctx, c, k))
		conf->adj_resize = 1;
}

void compare_volume(struct d_volume *conf, struct d_volume *kern)
{
	struct d_option *c, *k;

	conf->adj_add_minor = conf->device_minor != kern->device_minor;
	conf->adj_del_minor = conf->adj_add_minor;

	/* do we need to do a full attach,
	 * potentially with a detach first? */
	if (!disk_equal(conf, kern) || conf->adj_add_minor) {
		conf->adj_attach = conf->disk != NULL;
		conf->adj_detach = kern->disk != NULL;
	}

	/* do we need to do a full (detach/)attach,
	 * because max_bio_bvec setting differs? */
	compare_max_bio_bvecs(conf, kern);

	/* do we need to resize? */
	if (!conf->adj_attach)
		compare_size(conf, kern);

	/* skip these two options (if present) for the opts_equal below.
	 * These have been move_opt_to_head()ed before already. */
	k = kern->disk_options;
	while (k && (!strcmp(k->name, "size") || !strcmp(k->name, "max-bio-bvecs")))
		k = k->next;
	c = conf->disk_options;
	while (c && (!strcmp(c->name, "size") || !strcmp(c->name, "max-bio-bvecs")))
		c = c->next;

	/* is it sufficient to only adjust the disk options? */
	if (!conf->adj_attach)
		conf->adj_disk_opts = !opts_equal(&disk_options_ctx, c, k);

	if (conf->adj_attach && kern->disk)
		conf->adj_detach = 1;
}

struct d_volume *new_to_be_deleted_minor_from_template(struct d_volume *kern)
{
	/* need to delete it from kernel.
	 * Create a minimal volume,
	 * and flag it as "del_minor". */
	struct d_volume *conf = calloc(1, sizeof(*conf));
	conf->vnr = kern->vnr;
	/* conf->device: no need */
	conf->device_minor = kern->device_minor;
	if (kern->disk) {
		conf->disk = strdup(kern->disk);
		conf->meta_disk = strdup(kern->meta_disk);
		conf->meta_index = strdup(kern->meta_index);
		conf->adj_detach = 1;
	}

	conf->adj_del_minor = 1;
	return conf;
}

#define ASSERT(x) do { if (!(x)) {				\
	err("%s:%u:%s: ASSERT(%s) failed.\n", __FILE__,		\
	     __LINE__, __func__, #x);				\
	abort(); }						\
	} while (0)

/* Both conf and kern are single linked lists
 * supposed to be ordered by ->vnr;
 * We may need to conjure dummy volumes to issue "del-minor" on,
 * and insert these into the conf list.
 * The resulting new conf list head is returned.
 */
struct d_volume *compare_volumes(struct d_volume *conf, struct d_volume *kern)
{
	struct d_volume *to_be_deleted = NULL;
	struct d_volume *conf_head = conf;
	while (conf || kern) {
		if (kern && (conf == NULL || kern->vnr < conf->vnr)) {
			to_be_deleted = INSERT_SORTED(to_be_deleted,
					new_to_be_deleted_minor_from_template(kern),
					vnr);
			kern = kern->next;
		} else if (conf && (kern == NULL || kern->vnr > conf->vnr)) {
			conf->adj_add_minor = 1;
			conf->adj_attach = 1;
			conf = conf->next;
		} else {
			ASSERT(conf);
			ASSERT(kern);
			ASSERT(conf->vnr == kern->vnr);

			compare_volume(conf, kern);
			conf = conf->next;
			kern = kern->next;
		}
	}
	for_each_volume(conf, to_be_deleted)
		conf_head = INSERT_SORTED(conf_head, conf, vnr);
	return conf_head;
}

static struct d_volume *matching_volume(struct d_volume *conf_vol, struct d_volume *kern_head)
{
	struct d_volume *vol;

	for_each_volume(vol, kern_head) {
		if (vol->vnr == conf_vol->vnr)
			return vol;
	}
	return NULL;
}

/*
 * CAUTION this modifies global static char * config_file!
 */
int adm_adjust(struct cfg_ctx *ctx)
{
	char* argv[20];
	int pid, argc;
	struct d_resource* running;
	struct d_volume *vol;

	/* necessary per resource actions */
	int do_res_options = 0;

	/* necessary per connection actions
	 * (currently we still only have one connection per resource */
	int do_net_options = 0;
	int do_disconnect = 0;
	int do_connect = 0;

	/* necessary per volume actions are flagged
	 * in the vol->adj_* members. */

	int can_do_proxy = 1;
	char config_file_dummy[250];
	char *show_conn;
	char *resource_name;

	/* disable check_uniq, so it won't interfere
	 * with parsing of drbdsetup show output */
	config_valid = 2;


	/* setup error reporting context for the parsing routines */
	line = 1;
	sprintf(config_file_dummy,"drbdsetup show %s", ctx->res->name);
	config_file = config_file_dummy;

	argc=0;
	argv[argc++]=drbdsetup;
	argv[argc++]="show";
	ssprintf(argv[argc++], "%s", ctx->res->name);
	argv[argc++]=0;

	/* actually parse drbdsetup show output */
	yyin = m_popen(&pid,argv);
	running = parse_resource_for_adjust(ctx);
	fclose(yyin);
	waitpid(pid, 0, 0);

	if (running) {
		/* Sets "me" and "peer" pointer */
		post_parse(running, 0);
		set_peer_in_resource(running, 0);
	}


	/* Parse proxy settings, if this host has a proxy definition.
	 * FIXME what about "zombie" proxy settings, if we remove proxy
	 * settings from the config file without prior proxy-down, this won't
	 * clean them from the proxy. */
	if (ctx->res->me->proxy) {
		line = 1;
		resource_name = proxy_connection_name(ctx->res);
		m_asprintf(&show_conn, "show proxy-settings %s", resource_name);
		sprintf(config_file_dummy,"drbd-proxy-ctl -c '%s'", show_conn);
		config_file = config_file_dummy;

		argc=0;
		argv[argc++]=drbd_proxy_ctl;
		argv[argc++]="-c";
		argv[argc++]=show_conn;
		argv[argc++]=0;

		/* actually parse "drbd-proxy-ctl show" output */
		yyin = m_popen(&pid,argv);
		can_do_proxy = !parse_proxy_options_section(running);
		fclose(yyin);

		waitpid(pid,0,0);
	}

	ctx->res->me->volumes = compare_volumes(ctx->res->me->volumes,
			running ? running->me->volumes : NULL);

	if (running) {
		do_connect = !addr_equal(ctx->res,running);
		do_net_options = !opts_equal(&net_options_ctx, ctx->res->net_options, running->net_options);
		do_res_options = !opts_equal(&resource_options_cmd_ctx, ctx->res->res_options, running->res_options);
	} else {
		do_res_options = 0;
		do_connect = 1;
		schedule_deferred_cmd(adm_new_resource, ctx, "new-resource", CFG_PREREQ);
	}

	if (ctx->res->me->proxy && can_do_proxy)
		do_connect |= proxy_reconf(ctx, running);

	do_disconnect = do_connect && running && (running->peer || running->net_options);

	if (do_res_options)
		schedule_deferred_cmd(adm_set_default_res_options, ctx, "resource-options", CFG_RESOURCE);

	/* do we need to attach,
	 * do we need to detach first,
	 * or is this just some attribute change? */
	for_each_volume(vol, ctx->res->me->volumes) {
		struct cfg_ctx tmp_ctx = { .res = ctx->res, .vol = vol };
		if (vol->adj_detach || vol->adj_del_minor) {
			struct d_volume *kern_vol = matching_volume(vol, running->me->volumes);
			struct cfg_ctx k_ctx = tmp_ctx;
			if (kern_vol != NULL)
				k_ctx.vol = kern_vol;
			if (vol->adj_detach)
				schedule_deferred_cmd(adm_generic_s, &k_ctx, "detach", CFG_PREREQ);
			if (vol->adj_del_minor)
				schedule_deferred_cmd(adm_generic_s, &k_ctx, "del-minor", CFG_PREREQ);
	        }
		if (vol->adj_add_minor)
			schedule_deferred_cmd(adm_new_minor, &tmp_ctx, "new-minor", CFG_DISK_PREREQ);
		if (vol->adj_attach)
			schedule_deferred_cmd(adm_attach, &tmp_ctx, "attach", CFG_DISK);
		if (vol->adj_disk_opts)
			schedule_deferred_cmd(adm_set_default_disk_options, &tmp_ctx, "disk-options", CFG_DISK);
		if (vol->adj_resize)
			schedule_deferred_cmd(adm_resize, &tmp_ctx, "resize", CFG_DISK);
	}

	if (do_connect) {
		/* "disconnect" specifying the end-point addresses currently in-use,
		 * before "connect"ing with the addresses currently in-config-file. */
		if (do_disconnect) {
			struct cfg_ctx tmp_ctx = { .res = running, .vol = vol, };
			schedule_deferred_cmd(adm_disconnect, &tmp_ctx, "disconnect", CFG_NET_PREREQ);
		}
		schedule_deferred_cmd(adm_connect, ctx, "connect", CFG_NET);
		do_net_options = 0;
	}

	if (do_net_options)
		schedule_deferred_cmd(adm_set_default_net_options, ctx, "net-options", CFG_NET);

	return 0;
}
