Issue #17059 has been updated by ko1 (Koichi Sasada).


I want to know the general idea how to use `epoll` for `IO.select` backend.

```C
#include <stdlib.h>
#include <stdio.h>

#define _GNU_SOURCE
#include <unistd.h>

#include <sys/resource.h>
#include <poll.h>
#define N 2000 // 10k

static void
task_poll(int fds[])
{
    struct pollfd pfd[N];
    for (int i=0; i<N; i++) {
        struct pollfd *p;
        // in
        p = &pfd[i];
        p->fd = fds[i*2];
        p->events = POLLIN;
    }

    int r = poll(&pfd[0], N, 0);
    if (r==0) {
        // timeout
    }
    else if (r>0) {
        for (int i=0; i<N*2; i++) {
            fprintf(stderr, "%d %d (%d)\n", i, pfd[i].fd, (int)pfd[i].revents);
        }
    }
    else {
        fprintf(stderr, "poll (RLIMIT_NOFILE:%d and N:%d)\n", (int)RLIMIT_NOFILE, N*2);
        exit(1);
    }
}

#include <sys/epoll.h>

static void
task_epoll(int fds[])
{
    struct epoll_event events[N];
    int efd = epoll_create(N);
    if (efd < 0) {
        perror("epoll");
        exit(1);
    }

    for (int i=0; i<N; i++) {
        struct epoll_event *e = &events[i];
        e->events = EPOLLIN;
        e->data.fd = fds[i*2];
        if (epoll_ctl(efd, EPOLL_CTL_ADD, fds[i*2], e) < 0) {
            perror("epoll_ctl");
            exit(1);
        }
    }

    int r = epoll_wait(efd, events, N, 0);
    if (r == 0) {
        // timeout
    }
    else if (r > 0) {
        for (int i=0; i<r; i++) {
            fprintf(stderr, "%d fd:%d\n", i, events[i].data.fd);
        }
    }
    else {
        perror("epoll_wait");
        exit(1);
    }

    // clear
    close(efd);
}


int main(void)
{
    int fds[N * 2];
    int i;
    for (i=0; i<N; i++) {
        if (pipe(&fds[i*2]) < 0) {
            perror("pipe");
            fprintf(stderr, "i:%d\n", i);
            exit(1);
        }
    }

    for (i=0; i<1000 * 10; i++) {
        // task_xxx emulates IO.select

        // task_poll(fds);  // real    0m0.537s
        // task_epoll(fds); // real    0m11.191s
    }

    return 0;
}
```

`epoll` version is x20 slower on my machine.
any misunderstanding?

(`efd` can be reusable, but I'm not sure how to clear all registered fds)


----------------------------------------
Feature #17059: epoll as the backend of IO.select on Linux
https://bugs.ruby-lang.org/issues/17059#change-87090

* Author: dsh0416 (Delton Ding)
* Status: Open
* Priority: Normal
----------------------------------------
Current Ruby's `IO.select` method calls POSIX `select` API directly. With the new non-blocking scheduler, this may be the bottleneck of the I/O scheduling. For keeping the backward compatibilty of the current `IO.select` methods, a proposal may be to create a "duck" `select` which uses the `epoll_wait` as the backend.

One tricky part is that the `fd_set` described in POSIX is write-only, which means it is impossible to iterate for generating the `epoll_event` argument for `epoll_wait`. But similar to the large-size select situation, we could define our own `rb_fdset_t` struct in this case, and implement the following APIs.

```
void rb_fd_init(rb_fdset_t *);
void rb_fd_term(rb_fdset_t *);
void rb_fd_zero(rb_fdset_t *);
void rb_fd_set(int, rb_fdset_t *);
void rb_fd_clr(int, rb_fdset_t *);
int rb_fd_isset(int, const rb_fdset_t *);
void rb_fd_copy(rb_fdset_t *, const fd_set *, int);
void rb_fd_dup(rb_fdset_t *dst, const rb_fdset_t *src);
int rb_fd_select(int, rb_fdset_t *, rb_fdset_t *, rb_fdset_t *, struct timeval *);
```

TODO:
1. Implement the fd_set with dynamic allocated fds.
2. Implement the epoll with select API.
3. Edit io.c to use the customized fd_set struct.

I'm trying to work on a branch for this. Any suggestions for this?

---Files--------------------------------
epoll.h (3.62 KB)
epoll.h (6.44 KB)


-- 
https://bugs.ruby-lang.org/

Unsubscribe: <mailto:ruby-core-request / ruby-lang.org?subject=unsubscribe>
<http://lists.ruby-lang.org/cgi-bin/mailman/options/ruby-core>