blob: df4627e0c4b4eb55fa5913050cc2d8763fc146a0 [file] [log] [blame]
/*
* device-discovery.c: main function, discovering device and processing
* pipe request from kernel.
*
* Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/select.h>
#include <sys/inotify.h>
#include <linux/kdev_t.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/sg.h>
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <syslog.h>
#include <dirent.h>
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
#include <libgen.h>
#include <errno.h>
#include <libdevmapper.h>
#include "device-discovery.h"
#define EVENT_SIZE (sizeof(struct inotify_event))
#define EVENT_BUFSIZE (1024 * EVENT_SIZE)
#define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/nfs/blocklayout"
#define NFSPIPE_DIR "/var/lib/nfs/rpc_pipefs/nfs"
#define RPCPIPE_DIR "/var/lib/nfs/rpc_pipefs"
#define PID_FILE "/var/run/blkmapd.pid"
struct bl_disk *visible_disk_list;
int bl_watch_fd, bl_pipe_fd, nfs_pipedir_wfd, rpc_pipedir_wfd;
int pidfd = -1;
struct bl_disk_path *bl_get_path(const char *filepath,
struct bl_disk_path *paths)
{
struct bl_disk_path *tmp = paths;
while (tmp) {
if (!strcmp(tmp->full_path, filepath))
break;
tmp = tmp->next;
}
return tmp;
}
/* Check whether valid_path is a substring(partition) of path */
int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
{
if (!strncmp(valid_path->full_path, path->full_path,
strlen(valid_path->full_path)))
return 1;
return 0;
}
/*
* For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
* where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
* create pseudo device. So if state is higher, the device path needs to
* be updated.
* If device-mapper multipath support is a must, pseudo devices should
* exist for each multipath device. If not, active device path will be
* chosen for device creation.
* Treat partition as invalid path.
*/
int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
struct bl_disk *disk)
{
struct bl_disk_path *valid_path = disk->valid_path;
if (valid_path) {
if (valid_path->state >= state) {
if (bl_is_partition(valid_path, path))
return 0;
}
}
return 1;
}
void bl_release_disk(void)
{
struct bl_disk *disk;
struct bl_disk_path *path = NULL;
while (visible_disk_list) {
disk = visible_disk_list;
path = disk->paths;
while (path) {
disk->paths = path->next;
free(path->full_path);
free(path);
path = disk->paths;
}
if (disk->serial)
free(disk->serial);
visible_disk_list = disk->next;
free(disk);
}
}
void bl_add_disk(char *filepath)
{
struct bl_disk *disk = NULL;
int fd = 0;
struct stat sb;
off_t size = 0;
struct bl_serial *serial = NULL;
enum bl_path_state_e ap_state;
struct bl_disk_path *diskpath = NULL, *path = NULL;
dev_t dev;
fd = open(filepath, O_RDONLY | O_LARGEFILE);
if (fd < 0)
return;
if (fstat(fd, &sb)) {
close(fd);
return;
}
if (!sb.st_size)
ioctl(fd, BLKGETSIZE, &size);
else
size = sb.st_size;
if (!size) {
close(fd);
return;
}
dev = sb.st_rdev;
serial = bldev_read_serial(fd, filepath);
if (dm_is_dm_major(major(dev)))
ap_state = BL_PATH_STATE_PSEUDO;
else
ap_state = bldev_read_ap_state(fd);
close(fd);
if (ap_state != BL_PATH_STATE_ACTIVE)
return;
for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
/* Already scanned or a partition?
* XXX: if released each time, maybe not need to compare
*/
if ((serial->len == disk->serial->len) &&
!memcmp(serial->data, disk->serial->data, serial->len)) {
diskpath = bl_get_path(filepath, disk->paths);
break;
}
}
if (disk && diskpath)
return;
/* add path */
path = malloc(sizeof(struct bl_disk_path));
if (!path) {
BL_LOG_ERR("%s: Out of memory!\n", __func__);
goto out_err;
}
path->next = NULL;
path->state = ap_state;
path->full_path = strdup(filepath);
if (!path->full_path)
goto out_err;
if (!disk) { /* add disk */
disk = malloc(sizeof(struct bl_disk));
if (!disk) {
BL_LOG_ERR("%s: Out of memory!\n", __func__);
goto out_err;
}
disk->next = visible_disk_list;
disk->dev = dev;
disk->size = size;
disk->serial = serial;
disk->valid_path = path;
disk->paths = path;
visible_disk_list = disk;
} else {
path->next = disk->paths;
disk->paths = path;
/* check whether we need to update disk info */
if (bl_update_path(path, path->state, disk)) {
disk->dev = dev;
disk->size = size;
disk->valid_path = path;
}
}
return;
out_err:
if (path) {
if (path->full_path)
free(path->full_path);
free(path);
}
return;
}
int bl_discover_devices(void)
{
FILE *f;
int n;
char buf[PATH_MAX], devname[PATH_MAX], fulldevname[PATH_MAX];
/* release previous list */
bl_release_disk();
/* scan all block devices */
f = fopen("/proc/partitions", "r");
if (f == NULL)
return 0;
while (1) {
if (fgets(buf, sizeof buf, f) == NULL)
break;
n = sscanf(buf, "%*d %*d %*d %31s", devname);
if (n != 1)
continue;
snprintf(fulldevname, sizeof fulldevname, "/sys/block/%s",
devname);
if (access(fulldevname, F_OK) < 0)
continue;
snprintf(fulldevname, sizeof fulldevname, "/dev/%s", devname);
bl_add_disk(fulldevname);
}
fclose(f);
return 0;
}
/* process kernel request
* return 0: request processed, and no more request waiting;
* return 1: request processed, and more requests waiting;
* return < 0: error
*/
static int bl_disk_inquiry_process(int fd)
{
int ret = 0;
struct bl_pipemsg_hdr head;
char *buf = NULL;
uint32_t major, minor;
uint16_t buflen;
struct bl_dev_msg reply;
/* read request */
if (atomicio(read, fd, &head, sizeof(head)) != sizeof(head)) {
/* Note that an error in this or the next read is pretty
* catastrophic, as there is no good way to resync into
* the pipe's stream.
*/
BL_LOG_ERR("Read pipefs head error!\n");
ret = -EIO;
goto out;
}
buflen = head.totallen;
buf = malloc(buflen);
if (!buf) {
BL_LOG_ERR("%s: Out of memory!\n", __func__);
ret = -ENOMEM;
goto out;
}
if (atomicio(read, fd, buf, buflen) != buflen) {
BL_LOG_ERR("Read pipefs content error!\n");
ret = -EIO;
goto out;
}
reply.status = BL_DEVICE_REQUEST_PROC;
switch (head.type) {
case BL_DEVICE_MOUNT:
/*
* It shouldn't be necessary to discover devices here, since
* process_deviceinfo() will re-discover if it can't find
* the devices it needs. But in the case of multipath
* devices (ones that appear more than once, for example an
* active and a standby LUN), this will re-order them in the
* correct priority.
*/
bl_discover_devices();
if (!process_deviceinfo(buf, buflen, &major, &minor)) {
reply.status = BL_DEVICE_REQUEST_ERR;
break;
}
reply.major = major;
reply.minor = minor;
break;
case BL_DEVICE_UMOUNT:
if (!dm_device_remove_all((uint64_t *) buf))
reply.status = BL_DEVICE_REQUEST_ERR;
break;
default:
reply.status = BL_DEVICE_REQUEST_ERR;
break;
}
/* write to pipefs */
if (atomicio((void *)write, fd, &reply, sizeof(reply))
!= sizeof(reply)) {
BL_LOG_ERR("Write pipefs error!\n");
ret = -EIO;
}
out:
if (buf)
free(buf);
return ret;
}
static void bl_watch_dir(const char* dir, int *wd)
{
*wd = inotify_add_watch(bl_watch_fd, dir, IN_CREATE|IN_DELETE);
if (*wd < 0)
BL_LOG_ERR("failed to watch %s: %s\n", dir, strerror(errno));
}
static void bl_rpcpipe_cb(void)
{
int rc, curr_byte = 0;
char eventArr[EVENT_BUFSIZE];
struct inotify_event *event;
rc = read(bl_watch_fd, &eventArr, EVENT_BUFSIZE);
if (rc < 0)
BL_LOG_ERR("read event fail: %s", strerror(errno));
while (rc > curr_byte) {
event = (struct inotify_event *)&eventArr[curr_byte];
curr_byte += EVENT_SIZE + event->len;
if (event->wd == rpc_pipedir_wfd) {
if (strncmp(event->name, "nfs", 3))
continue;
if (event->mask & IN_CREATE) {
BL_LOG_WARNING("nfs pipe dir created\n");
bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
} else if (event->mask & IN_DELETE) {
BL_LOG_WARNING("nfs pipe dir deleted\n");
inotify_rm_watch(bl_watch_fd, nfs_pipedir_wfd);
close(bl_pipe_fd);
nfs_pipedir_wfd = -1;
bl_pipe_fd = -1;
}
} else if (event->wd == nfs_pipedir_wfd) {
if (strncmp(event->name, "blocklayout", 11))
continue;
if (event->mask & IN_CREATE) {
BL_LOG_WARNING("blocklayout pipe file created\n");
bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
if (bl_pipe_fd < 0)
BL_LOG_ERR("open %s failed: %s\n",
event->name, strerror(errno));
} else if (event->mask & IN_DELETE) {
BL_LOG_WARNING("blocklayout pipe file deleted\n");
close(bl_pipe_fd);
bl_pipe_fd = -1;
}
}
}
}
static int bl_event_helper(void)
{
fd_set rset;
int ret = 0, maxfd;
for (;;) {
FD_ZERO(&rset);
FD_SET(bl_watch_fd, &rset);
if (bl_pipe_fd > 0)
FD_SET(bl_pipe_fd, &rset);
maxfd = (bl_watch_fd>bl_pipe_fd)?bl_watch_fd:bl_pipe_fd;
switch (select(maxfd + 1, &rset, NULL, NULL, NULL)) {
case -1:
if (errno == EINTR)
continue;
else {
ret = -errno;
goto out;
}
case 0:
goto out;
default:
if (FD_ISSET(bl_watch_fd, &rset))
bl_rpcpipe_cb();
else if (bl_pipe_fd > 0 && FD_ISSET(bl_pipe_fd, &rset))
ret = bl_disk_inquiry_process(bl_pipe_fd);
if (ret)
goto out;
}
}
out:
return ret;
}
void sig_die(int signal)
{
if (pidfd >= 0) {
close(pidfd);
unlink(PID_FILE);
}
BL_LOG_ERR("exit on signal(%d)\n", signal);
exit(1);
}
/* Daemon */
int main(int argc, char **argv)
{
int opt, dflag = 0, fg = 0, ret = 1;
struct stat statbuf;
char pidbuf[64];
while ((opt = getopt(argc, argv, "df")) != -1) {
switch (opt) {
case 'd':
dflag = 1;
break;
case 'f':
fg = 1;
break;
}
}
if (fg) {
openlog("blkmapd", LOG_PERROR, 0);
} else {
if (!stat(PID_FILE, &statbuf)) {
fprintf(stderr, "Pid file %s already existed\n", PID_FILE);
exit(1);
}
if (daemon(0, 0) != 0) {
fprintf(stderr, "Daemonize failed\n");
exit(1);
}
openlog("blkmapd", LOG_PID, 0);
pidfd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
if (pidfd < 0) {
BL_LOG_ERR("Create pid file %s failed\n", PID_FILE);
exit(1);
}
if (lockf(pidfd, F_TLOCK, 0) < 0) {
BL_LOG_ERR("Lock pid file %s failed\n", PID_FILE);
close(pidfd);
exit(1);
}
ftruncate(pidfd, 0);
sprintf(pidbuf, "%d\n", getpid());
write(pidfd, pidbuf, strlen(pidbuf));
}
signal(SIGINT, sig_die);
signal(SIGTERM, sig_die);
signal(SIGHUP, SIG_IGN);
if (dflag) {
bl_discover_devices();
exit(0);
}
if ((bl_watch_fd = inotify_init()) < 0) {
BL_LOG_ERR("init inotify failed %s\n", strerror(errno));
exit(1);
}
/* open pipe file */
bl_watch_dir(RPCPIPE_DIR, &rpc_pipedir_wfd);
bl_watch_dir(NFSPIPE_DIR, &nfs_pipedir_wfd);
bl_pipe_fd = open(BL_PIPE_FILE, O_RDWR);
if (bl_pipe_fd < 0)
BL_LOG_ERR("open pipe file %s failed: %s\n", BL_PIPE_FILE, strerror(errno));
while (1) {
/* discover device when needed */
bl_discover_devices();
ret = bl_event_helper();
if (ret < 0) {
/* what should we do with process error? */
BL_LOG_ERR("inquiry process return %d\n", ret);
}
}
if (pidfd >= 0) {
close(pidfd);
unlink(PID_FILE);
}
exit(ret);
}