rping: terminate CM event thread before exiting
[ Upstream commit f3ae6534ad93c4f1aca7374d9a75f61b790fa03c ]
The CM event thread processes events in a loop with no explicit
termination. When the last CM event is received, the main thread
proceeds to clean up and destroy the CM event channel. If this occurs
after the CM event thread has processed the last event, but before it
reaches rdma_get_cm_event again, then the subsequent call to
rdma_get_cm_event will fail and cause the process to exit with a failure
code even though the test was actually successful.
This causes flakiness in test scripts that use rping for basic
functional testing.
Fix this by using an eventfd+poll to explicitly signal the CM event
thread for termination.
Tested by running 4096 parallel rping processes.
Fixes: 6f640ff ("r7019: Introduce event channels.")
Signed-off-by: Jacob Moroni <jmoroni@google.com>
Signed-off-by: Nicolas Morey <nmorey@suse.com>
diff --git a/librdmacm/examples/rping.c b/librdmacm/examples/rping.c
index 9401cf1..4f0b3f9 100644
--- a/librdmacm/examples/rping.c
+++ b/librdmacm/examples/rping.c
@@ -37,6 +37,9 @@
#include <string.h>
#include <stdio.h>
#include <errno.h>
+#include <unistd.h>
+#include <sys/poll.h>
+#include <sys/eventfd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
@@ -152,6 +155,7 @@
int validate; /* validate ping data */
/* CM stuff */
+ int eventfd;
pthread_t cmthread;
struct rdma_event_channel *cm_channel;
struct rdma_cm_id *cm_id; /* connection on client side,*/
@@ -667,18 +671,36 @@
{
struct rping_cb *cb = arg;
struct rdma_cm_event *event;
+ struct pollfd pfds[2];
int ret;
+ pfds[0].fd = cb->eventfd;
+ pfds[0].events = POLLIN;
+ pfds[1].fd = cb->cm_channel->fd;
+ pfds[1].events = POLLIN;
+
while (1) {
- ret = rdma_get_cm_event(cb->cm_channel, &event);
- if (ret) {
- perror("rdma_get_cm_event");
+ ret = poll(pfds, 2, -1);
+ if (ret == -1 && errno != EINTR) {
+ perror("poll failed");
exit(ret);
+ } else if (ret < 1)
+ continue;
+
+ if (pfds[0].revents & POLLIN)
+ return NULL;
+
+ if (pfds[1].revents & POLLIN) {
+ ret = rdma_get_cm_event(cb->cm_channel, &event);
+ if (ret) {
+ perror("rdma_get_cm_event");
+ exit(ret);
+ }
+ ret = rping_cma_event_handler(event->id, event);
+ rdma_ack_cm_event(event);
+ if (ret)
+ exit(ret);
}
- ret = rping_cma_event_handler(event->id, event);
- rdma_ack_cm_event(event);
- if (ret)
- exit(ret);
}
}
@@ -1280,6 +1302,7 @@
int op;
int ret = 0;
int persistent_server = 0;
+ const uint64_t efdw = 1;
cb = malloc(sizeof(*cb));
if (!cb)
@@ -1367,6 +1390,13 @@
goto out;
}
+ cb->eventfd = eventfd(0, EFD_NONBLOCK);
+ if (cb->eventfd == -1) {
+ perror("Could not create event FD");
+ ret = errno;
+ goto out;
+ }
+
cb->cm_channel = create_first_event_channel();
if (!cb->cm_channel) {
ret = errno;
@@ -1398,6 +1428,10 @@
DEBUG_LOG("destroy cm_id %p\n", cb->cm_id);
rdma_destroy_id(cb->cm_id);
out2:
+ if (write(cb->eventfd, &efdw, sizeof(efdw)) != sizeof(efdw))
+ fprintf(stderr, "Failed to signal CM thread\n");
+
+ pthread_join(cb->cmthread, NULL);
rdma_destroy_event_channel(cb->cm_channel);
out:
free(cb);