40 #include <sys/types.h>
42 #include <sys/socket.h>
45 #include <sys/ioctl.h>
46 #include <sys/param.h>
47 #include <netinet/in.h>
48 #include <arpa/inet.h>
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <rdma/rdma_cma.h>
74 #include <qb/qbdefs.h>
75 #include <qb/qbloop.h>
76 #define LOGSYS_UTILS_ONLY 1
80 #define COMPLETION_QUEUE_ENTRIES 100
82 #define TOTAL_READ_POSTS 100
84 #define MAX_MTU_SIZE 4096
86 #define MCAST_REJOIN_MSEC 100
112 unsigned int msg_len);
196 const char *
function,
224 #define log_printf(level, format, args...) \
226 instance->totemiba_log_printf ( \
228 instance->totemiba_subsys_id, \
229 __FUNCTION__, __FILE__, __LINE__, \
230 (const char *)format, ##args); \
249 void2wrid (
void *v) {
union u u; u.
v =
v;
return u.
wr_id; }
264 static inline struct send_buf *mcast_send_buf_get (
275 send_buf = malloc (
sizeof (
struct send_buf));
276 if (send_buf == NULL) {
279 send_buf->
mr = ibv_reg_mr (instance->
mcast_pd,
282 if (send_buf->
mr == NULL) {
293 static inline void mcast_send_buf_put (
295 struct send_buf *send_buf)
301 static inline struct send_buf *token_send_buf_get (
304 struct send_buf *send_buf;
312 send_buf = malloc (
sizeof (
struct send_buf));
313 if (send_buf == NULL) {
319 if (send_buf->
mr == NULL) {
333 struct send_buf *send_buf;
338 ibv_dereg_mr (send_buf->
mr);
346 static inline void token_send_buf_put (
348 struct send_buf *send_buf)
354 static inline struct recv_buf *recv_token_recv_buf_create (
359 recv_buf = malloc (
sizeof (
struct recv_buf));
360 if (recv_buf == NULL) {
366 IBV_ACCESS_LOCAL_WRITE);
371 recv_buf->
recv_wr.wr_id = (uintptr_t)recv_buf;
374 recv_buf->
sge.lkey = recv_buf->
mr->lkey;
375 recv_buf->
sge.addr = (uintptr_t)recv_buf->
buffer;
382 static inline int recv_token_recv_buf_post (
struct totemiba_instance *instance,
struct recv_buf *recv_buf)
384 struct ibv_recv_wr *fail_recv;
392 static inline void recv_token_recv_buf_post_initial (
struct totemiba_instance *instance)
394 struct recv_buf *recv_buf;
398 recv_buf = recv_token_recv_buf_create (instance);
400 recv_token_recv_buf_post (instance, recv_buf);
404 static inline void recv_token_recv_buf_post_destroy (
407 struct recv_buf *recv_buf;
413 recv_buf =
list_entry (list,
struct recv_buf, list_all);
415 ibv_dereg_mr (recv_buf->
mr);
421 static inline struct recv_buf *mcast_recv_buf_create (
struct totemiba_instance *instance)
423 struct recv_buf *recv_buf;
426 recv_buf = malloc (
sizeof (
struct recv_buf));
427 if (recv_buf == NULL) {
433 IBV_ACCESS_LOCAL_WRITE);
438 recv_buf->
recv_wr.wr_id = (uintptr_t)recv_buf;
441 recv_buf->
sge.lkey = mr->lkey;
442 recv_buf->
sge.addr = (uintptr_t)recv_buf->
buffer;
447 static inline int mcast_recv_buf_post (
struct totemiba_instance *instance,
struct recv_buf *recv_buf)
449 struct ibv_recv_wr *fail_recv;
457 static inline void mcast_recv_buf_post_initial (
struct totemiba_instance *instance)
459 struct recv_buf *recv_buf;
463 recv_buf = mcast_recv_buf_create (instance);
465 mcast_recv_buf_post (instance, recv_buf);
469 static inline void iba_deliver_fn (
struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes)
472 const struct recv_buf *recv_buf;
474 recv_buf = wrid2void(wr_id);
475 addr = &recv_buf->
buffer[
sizeof (
struct ibv_grh)];
477 bytes -=
sizeof (
struct ibv_grh);
481 static int mcast_cq_send_event_fn (
int fd,
int events,
void *context)
484 struct ibv_wc wc[32];
485 struct ibv_cq *ev_cq;
491 ibv_ack_cq_events (ev_cq, 1);
492 res = ibv_req_notify_cq (ev_cq, 0);
496 for (i = 0; i < res; i++) {
497 mcast_send_buf_put (instance, wrid2void(wc[i].wr_id));
504 static int mcast_cq_recv_event_fn (
int fd,
int events,
void *context)
507 struct ibv_wc wc[64];
508 struct ibv_cq *ev_cq;
514 ibv_ack_cq_events (ev_cq, 1);
515 res = ibv_req_notify_cq (ev_cq, 0);
519 for (i = 0; i < res; i++) {
520 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
521 mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id));
528 static void mcast_rejoin (
void *data)
535 ibv_destroy_ah (instance->
mcast_ah);
542 "rdma_join_multicast failed, errno=%d, rejoining in %u ms",
554 static int mcast_rdma_event_fn (
int fd,
int events,
void *context)
557 struct rdma_cm_event *event;
566 switch (event->event) {
570 case RDMA_CM_EVENT_ADDR_RESOLVED:
574 case RDMA_CM_EVENT_MULTICAST_ERROR:
586 case RDMA_CM_EVENT_MULTICAST_JOIN:
587 instance->
mcast_qpn =
event->param.ud.qp_num;
589 instance->
mcast_ah = ibv_create_ah (instance->
mcast_pd, &event->param.ud.ah_attr);
598 case RDMA_CM_EVENT_ADDR_ERROR:
599 case RDMA_CM_EVENT_ROUTE_ERROR:
600 case RDMA_CM_EVENT_DEVICE_REMOVAL:
607 rdma_ack_cm_event (event);
611 static int recv_token_cq_send_event_fn (
617 struct ibv_wc wc[32];
618 struct ibv_cq *ev_cq;
624 ibv_ack_cq_events (ev_cq, 1);
625 res = ibv_req_notify_cq (ev_cq, 0);
629 for (i = 0; i < res; i++) {
630 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
631 ibv_dereg_mr (wrid2void(wc[i].wr_id));
638 static int recv_token_cq_recv_event_fn (
int fd,
int events,
void *context)
641 struct ibv_wc wc[32];
642 struct ibv_cq *ev_cq;
648 ibv_ack_cq_events (ev_cq, 1);
649 res = ibv_req_notify_cq (ev_cq, 0);
653 for (i = 0; i < res; i++) {
654 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
655 recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id));
678 recv_token_recv_buf_post_destroy (instance);
697 struct ibv_qp_init_attr init_qp_attr;
754 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
755 init_qp_attr.cap.max_send_wr = 50;
757 init_qp_attr.cap.max_send_sge = 1;
758 init_qp_attr.cap.max_recv_sge = 1;
759 init_qp_attr.qp_context = instance;
760 init_qp_attr.sq_sig_all = 0;
761 init_qp_attr.qp_type = IBV_QPT_UD;
771 recv_token_recv_buf_post_initial (instance);
777 POLLIN, instance, recv_token_cq_recv_event_fn);
783 POLLIN, instance, recv_token_cq_send_event_fn);
790 static int recv_token_rdma_event_fn (
int fd,
int events,
void *context)
793 struct rdma_cm_event *event;
794 struct rdma_conn_param conn_param;
803 switch (event->event) {
804 case RDMA_CM_EVENT_CONNECT_REQUEST:
805 recv_token_accept_destroy (instance);
808 recv_token_accept_setup (instance);
809 memset (&conn_param, 0,
sizeof (
struct rdma_conn_param));
818 res = rdma_ack_cm_event (event);
822 static int send_token_cq_send_event_fn (
int fd,
int events,
void *context)
825 struct ibv_wc wc[32];
826 struct ibv_cq *ev_cq;
832 ibv_ack_cq_events (ev_cq, 1);
833 res = ibv_req_notify_cq (ev_cq, 0);
837 for (i = 0; i < res; i++) {
838 token_send_buf_put (instance, wrid2void(wc[i].wr_id));
845 static int send_token_cq_recv_event_fn (
int fd,
int events,
void *context)
848 struct ibv_wc wc[32];
849 struct ibv_cq *ev_cq;
855 ibv_ack_cq_events (ev_cq, 1);
856 res = ibv_req_notify_cq (ev_cq, 0);
860 for (i = 0; i < res; i++) {
861 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
868 static int send_token_rdma_event_fn (
int fd,
int events,
void *context)
871 struct rdma_cm_event *event;
872 struct rdma_conn_param conn_param;
881 switch (event->event) {
885 case RDMA_CM_EVENT_ADDR_RESOLVED:
891 case RDMA_CM_EVENT_ROUTE_RESOLVED:
892 memset (&conn_param, 0,
sizeof (
struct rdma_conn_param));
893 conn_param.private_data = NULL;
894 conn_param.private_data_len = 0;
897 case RDMA_CM_EVENT_ESTABLISHED:
904 case RDMA_CM_EVENT_ADDR_ERROR:
905 case RDMA_CM_EVENT_ROUTE_ERROR:
906 case RDMA_CM_EVENT_MULTICAST_ERROR:
908 "send_token_rdma_event_fn multicast error");
910 case RDMA_CM_EVENT_DEVICE_REMOVAL:
912 case RDMA_CM_EVENT_UNREACHABLE:
914 "send_token_rdma_event_fn unreachable");
918 "send_token_rdma_event_fn unknown event %d",
923 rdma_ack_cm_event (event);
930 struct ibv_qp_init_attr init_qp_attr;
989 "couldn't request notifications of the completion queue");
1019 "couldn't request notifications of the completion queue");
1022 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
1023 init_qp_attr.cap.max_send_wr = 50;
1025 init_qp_attr.cap.max_send_sge = 1;
1026 init_qp_attr.cap.max_recv_sge = 1;
1027 init_qp_attr.qp_context = instance;
1028 init_qp_attr.sq_sig_all = 0;
1029 init_qp_attr.qp_type = IBV_QPT_UD;
1043 POLLIN, instance, send_token_cq_recv_event_fn);
1049 POLLIN, instance, send_token_cq_send_event_fn);
1055 POLLIN, instance, send_token_rdma_event_fn);
1088 token_send_buf_destroy (instance);
1098 struct ibv_port_attr port_attr;
1147 POLLIN, instance, recv_token_rdma_event_fn);
1155 struct ibv_qp_init_attr init_qp_attr;
1239 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
1240 init_qp_attr.cap.max_send_wr = 50;
1242 init_qp_attr.cap.max_send_sge = 1;
1243 init_qp_attr.cap.max_recv_sge = 1;
1244 init_qp_attr.qp_context = instance;
1245 init_qp_attr.sq_sig_all = 0;
1246 init_qp_attr.qp_type = IBV_QPT_UD;
1256 mcast_recv_buf_post_initial (instance);
1262 POLLIN, instance, mcast_cq_recv_event_fn);
1268 POLLIN, instance, mcast_cq_send_event_fn);
1274 POLLIN, instance, mcast_rdma_event_fn);
1279 static void timer_function_netif_check_timeout (
1308 (
const struct sockaddr_storage *)&instance->
bind_addr,
1316 (
struct sockaddr_storage *)&instance->
mcast_addr, &addr_len);
1318 res = recv_token_bind (instance);
1320 res = mcast_bind (instance);
1325 const char *cipher_type,
1326 const char *hash_type)
1351 qb_loop_t *qb_poll_handle,
1353 struct totem_config *totem_config,
1358 void (*deliver_fn) (
1361 unsigned int msg_len),
1363 void (*iface_change_fn) (
1367 void (*target_set_completed) (
1374 if (instance == NULL) {
1378 totemiba_instance_initialize (instance);
1399 100*QB_TIME_NS_IN_MSEC,
1401 timer_function_netif_check_timeout,
1407 *iba_context = instance;
1423 int processor_count)
1456 unsigned int msg_len)
1460 struct ibv_send_wr send_wr, *failed_send_wr;
1463 struct send_buf *send_buf;
1465 send_buf = token_send_buf_get (instance);
1466 if (send_buf == NULL) {
1470 memcpy (msg, ms, msg_len);
1472 send_wr.next = NULL;
1473 send_wr.sg_list = &sge;
1474 send_wr.num_sge = 1;
1475 send_wr.opcode = IBV_WR_SEND;
1476 send_wr.send_flags = IBV_SEND_SIGNALED;
1477 send_wr.wr_id = void2wrid(send_buf);
1478 send_wr.imm_data = 0;
1483 sge.length = msg_len;
1484 sge.lkey = send_buf->
mr->lkey;
1485 sge.addr = (uintptr_t)msg;
1488 res = ibv_post_send (instance->
send_token_cma_id->qp, &send_wr, &failed_send_wr);
1496 unsigned int msg_len)
1500 struct ibv_send_wr send_wr, *failed_send_wr;
1503 struct send_buf *send_buf;
1505 send_buf = mcast_send_buf_get (instance);
1506 if (send_buf == NULL) {
1511 memcpy (msg, ms, msg_len);
1512 send_wr.next = NULL;
1513 send_wr.sg_list = &sge;
1514 send_wr.num_sge = 1;
1515 send_wr.opcode = IBV_WR_SEND;
1516 send_wr.send_flags = IBV_SEND_SIGNALED;
1517 send_wr.wr_id = void2wrid(send_buf);
1518 send_wr.imm_data = 0;
1519 send_wr.wr.ud.ah = instance->
mcast_ah;
1520 send_wr.wr.ud.remote_qpn = instance->
mcast_qpn;
1521 send_wr.wr.ud.remote_qkey = instance->
mcast_qkey;
1523 sge.length = msg_len;
1524 sge.lkey = send_buf->
mr->lkey;
1525 sge.addr = (uintptr_t)msg;
1528 res = ibv_post_send (instance->
mcast_cma_id->qp, &send_wr, &failed_send_wr);
1537 unsigned int msg_len)
1541 struct ibv_send_wr send_wr, *failed_send_wr;
1544 struct send_buf *send_buf;
1546 send_buf = mcast_send_buf_get (instance);
1547 if (send_buf == NULL) {
1552 memcpy (msg, ms, msg_len);
1553 send_wr.next = NULL;
1554 send_wr.sg_list = &sge;
1555 send_wr.num_sge = 1;
1556 send_wr.opcode = IBV_WR_SEND;
1557 send_wr.send_flags = IBV_SEND_SIGNALED;
1558 send_wr.wr_id = void2wrid(send_buf);
1559 send_wr.imm_data = 0;
1560 send_wr.wr.ud.ah = instance->
mcast_ah;
1561 send_wr.wr.ud.remote_qpn = instance->
mcast_qpn;
1562 send_wr.wr.ud.remote_qkey = instance->
mcast_qkey;
1564 sge.length = msg_len;
1565 sge.lkey = send_buf->
mr->lkey;
1566 sge.addr = (uintptr_t)msg;
1569 res = ibv_post_send (instance->
mcast_cma_id->qp, &send_wr, &failed_send_wr);
1594 const char *ret_char;
1625 res = send_token_unbind (instance);
1627 res = send_token_bind (instance);
unsigned int clear_node_high_bit
struct ibv_cq * mcast_recv_cq
struct list_head mcast_send_buf_head
struct sockaddr mcast_addr
void(* totemiba_iface_change_fn)(void *context, const struct totem_ip_address *iface_address)
struct ibv_comp_channel * send_token_recv_completion_channel
struct ibv_cq * recv_token_send_cq
int totemiba_recv_mcast_empty(void *iba_context)
struct totem_interface * interfaces
struct ibv_comp_channel * send_token_send_completion_channel
struct ibv_cq * send_token_recv_cq
The totem_ip_address struct.
const char * totemip_print(const struct totem_ip_address *addr)
struct sockaddr send_token_bind_addr
struct sockaddr local_mcast_bind_addr
int totemiba_send_flush(void *iba_context)
int totemiba_token_target_set(void *iba_context, const struct totem_ip_address *token_target)
char buffer[MAX_MTU_SIZE]
struct sockaddr token_addr
struct rdma_event_channel * send_token_channel
struct rdma_event_channel * recv_token_channel
void(* totemiba_deliver_fn)(void *context, const void *msg, unsigned int msg_len)
struct ibv_comp_channel * mcast_recv_completion_channel
int totemiba_initialize(qb_loop_t *qb_poll_handle, void **iba_context, struct totem_config *totem_config, totemsrp_stats_t *stats, int interface_no, void *context, void(*deliver_fn)(void *context, const void *msg, unsigned int msg_len), void(*iface_change_fn)(void *context, const struct totem_ip_address *iface_address), void(*target_set_completed)(void *context))
Create an instance.
int totemiba_mcast_flush_send(void *iba_context, const void *ms, unsigned int msg_len)
unsigned char addr[TOTEMIP_ADDRLEN]
void * totemiba_buffer_alloc(void)
#define MCAST_REJOIN_MSEC
struct list_head list_all
int totemiba_iface_check(void *iba_context)
struct list_head token_send_buf_free
struct list_head recv_token_recv_buf_head
qb_loop_timer_handle timer_netif_check_timeout
struct totem_interface * totem_interface
int totemiba_crypto_set(void *iba_context, const char *cipher_type, const char *hash_type)
int totemip_iface_check(struct totem_ip_address *bindnet, struct totem_ip_address *boundto, int *interface_up, int *interface_num, int mask_high_bit)
struct list_head mcast_send_buf_free
void(*) in totemiba_subsys_id)
qb_loop_timer_handle mcast_rejoin
int totemiba_token_send(void *iba_context, const void *ms, unsigned int msg_len)
struct ibv_comp_channel * recv_token_recv_completion_channel
struct sockaddr bind_addr
int totemiba_iface_get(void *iba_context, struct totem_ip_address *addr)
struct totem_ip_address mcast_addr
struct sockaddr recv_token_dest_addr
struct ibv_cq * recv_token_recv_cq
#define LOGSYS_LEVEL_ERROR
struct ibv_cq * mcast_send_cq
struct list_head list_all
struct list_head token_send_buf_head
#define LOGSYS_LEVEL_DEBUG
void totemiba_net_mtu_adjust(void *iba_context, struct totem_config *totem_config)
struct ibv_comp_channel * mcast_send_completion_channel
struct totem_ip_address boundto
struct ibv_pd * send_token_pd
void(* log_printf)(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format,...) __attribute__((format(printf
const char * totemiba_iface_print(void *iba_context)
struct ibv_cq * send_token_send_cq
struct rdma_event_channel * mcast_channel
struct rdma_cm_id * listen_recv_token_cma_id
struct rdma_event_channel * listen_recv_token_channel
struct ibv_ah * send_token_ah
int totemiba_finalize(void *iba_context)
struct list_head list_free
qb_loop_t * totemiba_poll_handle
#define COMPLETION_QUEUE_ENTRIES
#define log_printf(level, format, args...)
struct sockaddr mcast_dest_addr
struct sockaddr send_token_dest_addr
struct totem_ip_address my_id
struct ibv_recv_wr recv_wr
struct rdma_cm_id * mcast_cma_id
void(* totemiba_target_set_completed)(void *context)
int totemiba_mcast_noflush_send(void *iba_context, const void *ms, unsigned int msg_len)
struct totem_config * totem_config
struct ibv_pd * recv_token_pd
struct rdma_cm_id * recv_token_cma_id
#define list_entry(ptr, type, member)
struct rdma_cm_id * send_token_cma_id
int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, uint16_t port, struct sockaddr_storage *saddr, int *addrlen)
struct totem_logging_configuration totem_logging_configuration
void(* totemiba_log_printf)(int level, int subsys, const char *function, const char *file, int line, const char *format,...) __attribute__((format(printf
#define LOGSYS_LEVEL_NOTICE
struct ibv_comp_channel * recv_token_send_completion_channel
int totemiba_processor_count_set(void *iba_context, int processor_count)
int totemiba_recv_flush(void *iba_context)
int totemip_sockaddr_to_totemip_convert(const struct sockaddr_storage *saddr, struct totem_ip_address *ip_addr)
char buffer[MAX_MTU_SIZE+sizeof(struct ibv_grh)]
struct totem_ip_address bindnet
void totemiba_buffer_release(void *ptr)