LCOV - code coverage report
Current view: top level - util/io_uring - fd_io_uring_setup.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 179 0.0 %
Date: 2026-02-13 06:06:24 Functions: 0 8 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "fd_io_uring_setup.h"
       3             : #include "../shmem/fd_shmem.h"
       4             : #include <errno.h>
       5             : #include <sys/mman.h> /* mmap */
       6             : #include <unistd.h> /* close */
       7             : 
       8           0 : #define FD_IO_URING_SHMEM_HEADROOM (4096UL)
       9             : 
      10             : ulong
      11           0 : fd_io_uring_shmem_align( void ) {
      12           0 :   return FD_SHMEM_NORMAL_PAGE_SZ;
      13           0 : }
      14             : 
      15             : struct fd_io_uring_shmem_layout {
      16             :   /* offset to completion queue memory region
      17             :      This region contains registers (head/tail numbers), the submission
      18             :      queue array, and the completion queue (array of CQEs).
      19             :      (Do not assume this points to a CQE) */
      20             :   ulong cq_off;
      21             : 
      22             :   /* offset to SQE array */
      23             :   ulong sqe_off;
      24             : };
      25             : 
      26             : typedef struct fd_io_uring_shmem_layout fd_io_uring_shmem_layout_t;
      27             : 
      28             : static ulong
      29             : fd_io_uring_shmem_layout( fd_io_uring_shmem_layout_t * layout,
      30             :                           ulong                        sq_depth,
      31           0 :                           ulong                        cq_depth ) {
      32           0 :   memset( layout, 0, sizeof(fd_io_uring_shmem_layout_t) );
      33             : 
      34           0 :   if( FD_UNLIKELY( !fd_ulong_is_pow2( sq_depth ) ) ) return 0UL;
      35           0 :   if( FD_UNLIKELY( !fd_ulong_is_pow2( cq_depth ) ) ) return 0UL;
      36           0 :   if( FD_UNLIKELY( sq_depth>UINT_MAX             ) ) return 0UL;
      37           0 :   if( FD_UNLIKELY( cq_depth>UINT_MAX             ) ) return 0UL;
      38             : 
      39           0 :   ulong cq_sz;
      40           0 :   if( FD_UNLIKELY( __builtin_umull_overflow( cq_depth, sizeof(struct io_uring_cqe), &cq_sz ) ) ) return 0UL;
      41           0 :   ulong sqa_sz;
      42           0 :   if( FD_UNLIKELY( __builtin_umull_overflow( sq_depth, sizeof(uint), &sqa_sz ) ) ) return 0UL;
      43             : 
      44             :   /* io_uring CQ region
      45             : 
      46             :      This API matches Linux io_uring.c rings_size():
      47             :      https://elixir.bootlin.com/linux/v6.11.5/source/io_uring/io_uring.c#L2559 */
      48             : 
      49           0 :   FD_SCRATCH_ALLOC_INIT( l, NULL );
      50             : 
      51             :   /* The true footprint requirement depends on the kernel version.  The
      52             :      head part of this region is 'struct io_rings', which is not stable
      53             :      ABI.  We use a very conservative 4 KiB here. */
      54             : 
      55           0 :   layout->cq_off = (ulong)
      56           0 :       FD_SCRATCH_ALLOC_APPEND( l, FD_SHMEM_NORMAL_PAGE_SZ, FD_IO_URING_SHMEM_HEADROOM );
      57             : 
      58             :   /* Completion queue (cache line align) */
      59             : 
      60           0 :   FD_SCRATCH_ALLOC_APPEND( l, 128UL, cq_depth*sizeof(struct io_uring_cqe) );
      61             : 
      62             :   /* Submission queue index array (cache line align) */
      63             : 
      64           0 :   FD_SCRATCH_ALLOC_APPEND( l, 128UL, sq_depth*sizeof(uint) );
      65             : 
      66             :   /* io_uring SQEs region */
      67             : 
      68           0 :   layout->sqe_off = (ulong)FD_SCRATCH_ALLOC_APPEND(
      69           0 :       l, FD_SHMEM_NORMAL_PAGE_SZ, sq_depth*sizeof(struct io_uring_sqe) );
      70             : 
      71           0 :   return FD_SCRATCH_ALLOC_FINI( l, FD_SHMEM_NORMAL_PAGE_SZ );
      72           0 : }
      73             : 
      74             : ulong
      75             : fd_io_uring_shmem_footprint( ulong   sq_depth,
      76           0 :                              ulong   cq_depth ) {
      77           0 :   fd_io_uring_shmem_layout_t layout[1];
      78           0 :   return fd_io_uring_shmem_layout( layout, sq_depth, cq_depth );
      79           0 : }
      80             : 
      81             : fd_io_uring_params_t *
      82             : fd_io_uring_shmem_setup( fd_io_uring_params_t * params,
      83             :                          void *                 shmem,
      84             :                          ulong                  sq_depth,
      85           0 :                          ulong                  cq_depth ) {
      86             : 
      87           0 :   fd_io_uring_shmem_layout_t layout[1];
      88           0 :   ulong shmem_footprint = fd_io_uring_shmem_layout( layout, sq_depth, cq_depth );
      89           0 :   if( FD_UNLIKELY( !shmem_footprint ) ) {
      90           0 :     FD_LOG_WARNING(( "invalid sq_depth (%lu) or cq_depth (%lu)", sq_depth, cq_depth ));
      91           0 :     return NULL;
      92           0 :   }
      93             : 
      94           0 :   params->flags |= IORING_SETUP_NO_MMAP;
      95           0 :   params->sq_entries = (uint)sq_depth;
      96           0 :   params->cq_entries = (uint)cq_depth;
      97             : 
      98             :   /* cq_off points to the region containing the kernel private io_rings
      99             :      struct, the completion queue (array of CQEs), and the submission
     100             :      queue array (array of uints). */
     101             : 
     102           0 :   params->cq_off = (fd_io_cqring_offsets_t) {
     103           0 :     .user_addr = (unsigned long long)( (uchar *)shmem ),
     104           0 :   };
     105             : 
     106             :   /* sq_off points to the table of submission queue entries. */
     107             : 
     108           0 :   params->sq_off = (fd_io_sqring_offsets_t) {
     109           0 :     .user_addr = (unsigned long long)( (uchar *)shmem + layout->sqe_off ),
     110           0 :   };
     111             : 
     112           0 :   return params;
     113           0 : }
     114             : 
     115             : static void
     116             : fd_io_uring_init_rings(
     117             :     fd_io_uring_sq_t *     sq,
     118             :     fd_io_uring_cq_t *     cq,
     119             :     fd_io_uring_params_t * params,
     120             :     void *                 sqe_mem,
     121             :     void *                 cq_mem
     122           0 : ) {
     123           0 :   ulong sqe_laddr = (ulong)sqe_mem;
     124           0 :   ulong cq_laddr  = (ulong)cq_mem;
     125             : 
     126           0 :   FD_CRIT( fd_ulong_is_pow2( params->sq_entries ), "invalid params->sq_entries" );
     127           0 :   FD_CRIT( fd_ulong_is_pow2( params->cq_entries ), "invalid params->cq_entries" );
     128             : 
     129           0 :   *sq = (fd_io_uring_sq_t) {
     130             :     /* Confusingly, in Linux io_uring, submission queue registers are
     131             :        located in the completion queue memory region */
     132           0 :     .khead    = (void *)( cq_laddr + params->sq_off.head    ),
     133           0 :     .ktail    = (void *)( cq_laddr + params->sq_off.tail    ),
     134           0 :     .kflags   = (void *)( cq_laddr + params->sq_off.flags   ),
     135           0 :     .kdropped = (void *)( cq_laddr + params->sq_off.dropped ),
     136             : 
     137           0 :     .array = (void *)( cq_laddr + params->sq_off.array ),
     138           0 :     .sqes  = (void *)( sqe_laddr                       ),
     139             : 
     140           0 :     .sqe_head = 0,
     141           0 :     .sqe_tail = 0,
     142           0 :     .depth    = params->sq_entries
     143           0 :   };
     144             : 
     145           0 :   *cq = (fd_io_uring_cq_t) {
     146           0 :     .depth = params->cq_entries,
     147             : 
     148           0 :     .khead     = (void *)( cq_laddr + params->cq_off.head    ),
     149           0 :     .ktail     = (void *)( cq_laddr + params->cq_off.tail    ),
     150           0 :     .koverflow = (void *)( cq_laddr + params->cq_off.overflow ),
     151             : 
     152           0 :     .cqes = (void *)( cq_laddr + params->cq_off.cqes )
     153           0 :   };
     154             : 
     155             :   /* io_uring uses a rather useless indirection table to map queue slots
     156             :      to entries. */
     157             : 
     158           0 :   for( uint i=0; i<params->sq_entries; i++ ) {
     159           0 :     sq->array[ i ] = i;
     160           0 :   }
     161           0 : }
     162             : 
     163             : fd_io_uring_t *
     164             : fd_io_uring_init_shmem(
     165             :     fd_io_uring_t *        ring,
     166             :     fd_io_uring_params_t * params,
     167             :     void *                 shmem,
     168             :     ulong                  sq_depth,
     169             :     ulong                  cq_depth
     170           0 : ) {
     171           0 :   memset( ring, 0, sizeof(fd_io_uring_t) );
     172           0 :   ring->ioring_fd = -1;
     173             : 
     174           0 :   params->flags      |= IORING_SETUP_CQSIZE;
     175           0 :   params->sq_entries  = (uint)sq_depth;
     176           0 :   params->cq_entries  = (uint)cq_depth;
     177             : 
     178           0 :   fd_io_uring_shmem_setup( params, shmem, sq_depth, cq_depth );
     179             : 
     180           0 :   memset( shmem, 0, FD_IO_URING_SHMEM_HEADROOM );
     181             : 
     182           0 :   int ring_fd = fd_io_uring_setup( (uint)sq_depth, params );
     183           0 :   if( FD_UNLIKELY( ring_fd<0 ) ) return NULL;
     184           0 :   ring->ioring_fd = ring_fd;
     185             : 
     186           0 :   fd_io_uring_shmem_layout_t layout[1];
     187           0 :   fd_io_uring_shmem_layout( layout, sq_depth, cq_depth );
     188             : 
     189           0 :   fd_io_uring_init_rings(
     190           0 :       ring->sq,
     191           0 :       ring->cq,
     192           0 :       params,
     193           0 :       (void *)( (ulong)shmem + layout->sqe_off ),
     194           0 :       (void *)( (ulong)shmem + layout->cq_off  )
     195           0 :   );
     196           0 :   return ring;
     197           0 : }
     198             : 
     199             : fd_io_uring_t *
     200             : fd_io_uring_init_mmap(
     201             :     fd_io_uring_t *        ring,
     202             :     fd_io_uring_params_t * params
     203           0 : ) {
     204           0 :   memset( ring, 0, sizeof(fd_io_uring_t) );
     205           0 :   ring->ioring_fd = -1;
     206             : 
     207           0 :   uint sq_depth = params->sq_entries;
     208           0 :   uint cq_depth = params->cq_entries;
     209             : 
     210           0 :   int ring_fd = fd_io_uring_setup( params->sq_entries, params );
     211           0 :   if( FD_UNLIKELY( ring_fd<0 ) ) return NULL;
     212           0 :   ring->ioring_fd = ring_fd;
     213             : 
     214           0 :   if( FD_UNLIKELY( params->sq_entries != sq_depth ||
     215           0 :                    params->cq_entries != cq_depth ) ) {
     216           0 :     FD_LOG_WARNING(( "io_uring setup failed: requested (sq_depth=%u, cq_depth=%u) but kernel returned (sq_depth=%u, cq_depth=%u)",
     217           0 :                      params->sq_entries, params->cq_entries,
     218           0 :                      sq_depth,           cq_depth ));
     219           0 :     close( ring_fd );
     220           0 :     ring->ioring_fd = -1;
     221           0 :     return NULL;
     222           0 :   }
     223             : 
     224           0 :   ring->kern_sq_sz  = params->sq_off.array + params->sq_entries * sizeof(uint);
     225           0 :   ring->kern_sqe_sz = /*                  */ params->sq_entries * sizeof(struct io_uring_sqe);
     226           0 :   ring->kern_cq_sz  = params->cq_off.cqes  + params->cq_entries * sizeof(struct io_uring_cqe);
     227             : 
     228           0 :   ring->kern_sq_mem = mmap( NULL, ring->kern_sq_sz, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, IORING_OFF_SQ_RING );
     229           0 :   if( FD_UNLIKELY( ring->kern_sq_mem==MAP_FAILED ) ) {
     230           0 :     FD_LOG_WARNING(( "mmap SQ ring failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     231           0 :     close( ring_fd );
     232           0 :     ring->ioring_fd = -1;
     233           0 :     return NULL;
     234           0 :   }
     235             : 
     236           0 :   ring->kern_sqe_mem = mmap( NULL, ring->kern_sqe_sz, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, IORING_OFF_SQES );
     237           0 :   if( FD_UNLIKELY( ring->kern_sqe_mem==MAP_FAILED ) ) {
     238           0 :     munmap( ring->kern_sq_mem, ring->kern_sq_sz );
     239           0 :     close( ring_fd );
     240           0 :     ring->ioring_fd = -1;
     241           0 :     FD_LOG_WARNING(( "mmap SQEs failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     242           0 :     return NULL;
     243           0 :   }
     244             : 
     245           0 :   ring->kern_cq_mem = mmap( NULL, ring->kern_cq_sz, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, IORING_OFF_CQ_RING );
     246           0 :   if( FD_UNLIKELY( ring->kern_cq_mem==MAP_FAILED ) ) {
     247           0 :     munmap( ring->kern_sqe_mem, ring->kern_sqe_sz );
     248           0 :     munmap( ring->kern_sq_mem,  ring->kern_sq_sz  );
     249           0 :     close( ring_fd );
     250           0 :     ring->ioring_fd = -1;
     251           0 :     FD_LOG_WARNING(( "mmap CQ ring failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     252           0 :     return NULL;
     253           0 :   }
     254             : 
     255           0 :   fd_io_uring_init_rings(
     256           0 :       ring->sq,
     257           0 :       ring->cq,
     258           0 :       params,
     259           0 :       ring->kern_sqe_mem,
     260           0 :       ring->kern_cq_mem
     261           0 :   );
     262             : 
     263           0 :   return ring;
     264           0 : }
     265             : 
     266             : void *
     267           0 : fd_io_uring_fini( fd_io_uring_t * ring ) {
     268             : 
     269           0 :   if( ring->kern_cq_mem ) {
     270           0 :     if( FD_UNLIKELY( munmap( ring->kern_cq_mem, ring->kern_cq_sz ) ) ) {
     271           0 :       FD_LOG_WARNING(( "munmap CQ ring failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     272           0 :     }
     273           0 :     ring->kern_cq_mem = NULL;
     274           0 :     ring->kern_cq_sz  = 0UL;
     275           0 :   }
     276             : 
     277           0 :   if( ring->kern_sqe_mem ) {
     278           0 :     if( FD_UNLIKELY( munmap( ring->kern_sqe_mem, ring->kern_sqe_sz ) ) ) {
     279           0 :       FD_LOG_WARNING(( "munmap SQEs failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     280           0 :     }
     281           0 :     ring->kern_sqe_mem = NULL;
     282           0 :     ring->kern_sqe_sz  = 0UL;
     283           0 :   }
     284             : 
     285           0 :   if( ring->kern_sq_mem ) {
     286           0 :     if( FD_UNLIKELY( munmap( ring->kern_sq_mem, ring->kern_sq_sz ) ) ) {
     287           0 :       FD_LOG_WARNING(( "munmap SQ ring failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     288           0 :     }
     289           0 :     ring->kern_sq_mem = NULL;
     290           0 :     ring->kern_sq_sz  = 0UL;
     291           0 :   }
     292             : 
     293           0 :   if( ring->ioring_fd>=0 ) {
     294           0 :     if( FD_UNLIKELY( close( ring->ioring_fd ) ) ) {
     295           0 :       FD_LOG_WARNING(( "close(ring_fd) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     296           0 :     }
     297           0 :     ring->ioring_fd = -1;
     298           0 :   }
     299             : 
     300           0 :   memset( ring->sq, 0, sizeof(fd_io_uring_sq_t) );
     301           0 :   memset( ring->cq, 0, sizeof(fd_io_uring_cq_t) );
     302             : 
     303           0 :   return ring;
     304           0 : }

Generated by: LCOV version 1.14