Issue #2025 has been updated by Yusuke Endoh.

Category set to core
Assigned to set to Yusuke Endoh

Hi,

> I tried to fix some testsuite failures on GNU/kFreeBSD,

Thank you for your investigation reports!

I show a patch before I answer to each report.
The following patch removes "after_exec()" and changes sigprocmask()
to pthread_sigmask().
"make test" and "make test-rubyspec" was passed on my Linux.

I'm afraid that this patch breaks Ruby on other platforms, such as
os x, Solaris, etc.  Could you (or anyone) test the patch?


diff --git a/process.c b/process.c
index e566e9b..4f68d69 100644
--- a/process.c
+++ b/process.c
@@ -2620,9 +2620,6 @@ rb_f_fork(VALUE obj)
 
     switch (pid = rb_fork(0, 0, 0, Qnil)) {
       case 0:
-#ifdef linux
-	after_exec();
-#endif
 	rb_thread_atfork();
 	if (rb_block_given_p()) {
 	    int status;
diff --git a/signal.c b/signal.c
index 3fe1633..92e5d35 100644
--- a/signal.c
+++ b/signal.c
@@ -888,11 +888,7 @@ static VALUE
 trap_ensure(struct trap_arg *arg)
 {
     /* enable interrupt */
-#ifdef HAVE_SIGPROCMASK
-    sigprocmask(SIG_SETMASK, &arg->mask, NULL);
-#else
-    sigsetmask(arg->mask);
-#endif
+    pthread_sigmask(SIG_SETMASK, &arg->mask, NULL);
     trap_last_mask = arg->mask;
     return 0;
 }
@@ -902,11 +898,7 @@ void
 rb_trap_restore_mask(void)
 {
 #if USE_TRAP_MASK
-# ifdef HAVE_SIGPROCMASK
-    sigprocmask(SIG_SETMASK, &trap_last_mask, NULL);
-# else
-    sigsetmask(trap_last_mask);
-# endif
+    pthread_sigmask(SIG_SETMASK, &trap_last_mask, NULL);
 #endif
 }
 
@@ -966,12 +958,8 @@ sig_trap(int argc, VALUE *argv)
     }
 #if USE_TRAP_MASK
     /* disable interrupt */
-# ifdef HAVE_SIGPROCMASK
     sigfillset(&arg.mask);
-    sigprocmask(SIG_BLOCK, &arg.mask, &arg.mask);
-# else
-    arg.mask = sigblock(~0);
-# endif
+    pthread_sigmask(SIG_BLOCK, &arg.mask, &arg.mask);
 
     return rb_ensure(trap, (VALUE)&arg, trap_ensure, (VALUE)&arg);
 #else
@@ -1026,12 +1014,8 @@ init_sigchld(int sig)
 
 #if USE_TRAP_MASK
     /* disable interrupt */
-# ifdef HAVE_SIGPROCMASK
     sigfillset(&mask);
-    sigprocmask(SIG_BLOCK, &mask, &mask);
-# else
-    mask = sigblock(~0);
-# endif
+    pthread_sigmask(SIG_BLOCK, &mask, &mask);
 #endif
 
     oldfunc = ruby_signal(sig, SIG_DFL);
@@ -1042,13 +1026,8 @@ init_sigchld(int sig)
     }
 
 #if USE_TRAP_MASK
-#ifdef HAVE_SIGPROCMASK
     sigdelset(&mask, sig);
-    sigprocmask(SIG_SETMASK, &mask, NULL);
-#else
-    mask &= ~sigmask(sig);
-    sigsetmask(mask);
-#endif
+    pthread_sigmask(SIG_SETMASK, &mask, NULL);
     trap_last_mask = mask;
 #endif
 }




> Sometimes the child process would have 2 timer threads, sometimes
> it would have the expected 1.

I confirmed that issue on Linux.

The following patch logs when timer thread wakes up.

diff --git a/thread_pthread.c b/thread_pthread.c
index e6295db..51ed234 100644
--- a/thread_pthread.c
+++ b/thread_pthread.c
@@ -762,6 +762,7 @@ thread_timer(void *dummy)
 #define WAIT_FOR_10MS() native_cond_timedwait(&timer_thread_cond, &timer_thread_lock, get_ts(&ts, PER_NANO/100))
     while (system_working > 0) {
 	int err = WAIT_FOR_10MS();
+	printf("pid: %d, tid: %p, timer thread tick\n", getpid(), pthread_self());
 	if (err == ETIMEDOUT);
 	else if (err == 0 || err == EINTR) {
 	    if (rb_signal_buff_size() == 0) break;

With this patch applied,

  $ ./miniruby -e '
    trap(:USR1) { p :signaled }
    if pid = fork
      Process.kill(:USR1, pid)
      Process.wait(pid)
    else
      sleep 0.1
      p "#$$ wait start"
      sleep 3
      p "#$$ wait end"
    end
  '
  pid: 1015, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7b77b90, timer thread tick
  :signaled
  pid: 1015, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7fddb90, timer thread tick
  pid: 1017, tid: 0xb7b77b90, timer thread tick
  *snip*
  pid: 1015, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7fddb90, timer thread tick
  "1017 wait start"
  pid: 1015, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7fddb90, timer thread tick
  pid: 1017, tid: 0xb7b77b90, timer thread tick
  *snip*
  pid: 1017, tid: 0xb7fddb90, timer thread tick
  pid: 1015, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7b77b90, timer thread tick
  pid: 1017, tid: 0xb7fddb90, timer thread tick
  pid: 1017, tid: 0xb7b77b90, timer thread tick
  pid: 1015, tid: 0xb7b77b90, timer thread tick
  "1017 wait end"
  pid: 1017, tid: 0xb7fddb90, timer thread tick
  pid: 1015, tid: 0xb7b77b90, timer thread tick


The sub-process whose pid is 1017 has two timer threads whose tids
are 0xb7fddb90 and 0xb7b77b90.

I agree the cause is "after_exec()" in rb_f_fork().  It should be
removed.


> Ruby should not use PTHREAD_CREATE_DETACHED and after that use pthread_join.

As far as I know, pthread_join is not used against thread created
with PTHREAD_CREATE_DETACHED.  Ruby uses pthread_join against only
timer thread which is not created with PTHREAD_CREATE_DETACHED, I
think.
Did you actually see any detached thread are pthread_join'ed?


> Ruby should use pthread_sigmask() instead of sigprocmask()
> when available and so on.

Agreed.


Thanks once again!

-- 
Yusuke Endoh <mame / tsg.ne.jp>
----------------------------------------
http://redmine.ruby-lang.org/issues/show/2025

----------------------------------------
http://redmine.ruby-lang.org