Bug #2724: fork from other than the main thread causes wrong pthread condition on NetBSD
http://redmine.ruby-lang.org/issues/show/2724

起票者: Yui NARUSE
ステータス: Third Party's Issue, 優先度: Normal
カテゴリ: core
ruby -v: ruby 1.9.2dev (2010-02-07 trunk 26615) [i386-netbsdelf5.0.1]

NetBSD 5.0.[01] において、main thread 以外の pthread から fork すると、
pthread とカーネルスレッド (lwp) との関連が壊れるという現象が確認されています。

後述のパッチがあまりにアレなのでこの問題は Third Party's Issue とし、
Ruby 側では修正を入れない事としますが、情報の共有と記録のために
ここにチケットを切っておきます。

なお、この workaround の作成には @_enamiさんの助けがありました。

Index: thread_pthread.c
===================================================================
--- thread_pthread.c    (revision 26615)
+++ thread_pthread.c    (working copy)
@@ -17,6 +17,93 @@
 #include <sys/resource.h>
 #endif

+#if defined(__NetBSD_Version__) && __NetBSD_Version__ >= 500000000
+/* Hack for NetBSD 5.0.x's broken pthread->pt_lid */
+/* Copied from /src/lib/libpthread/pthread_int.h */
+#define BROKEN_PTHREAD_T_PT_LID
+#include <lwp.h>
+#include <pthread_queue.h>
+#include <sys/tree.h>
+
+#define PTHREAD_KEYS_MAX       256
+#define        PTHREAD__UNPARK_MAX     32
+
+/*
+ * The size of this structure needs to be no larger than struct
+ * __pthread_cleanup_store, defined in pthread.h.
+ */
+struct pt_clean_t {
+       PTQ_ENTRY(pt_clean_t)   ptc_next;
+       void    (*ptc_cleanup)(void *);
+       void    *ptc_arg;
+};
+
+struct pthread_lock_ops {
+       void    (*plo_init)(__cpu_simple_lock_t *);
+       int     (*plo_try)(__cpu_simple_lock_t *);
+       void    (*plo_unlock)(__cpu_simple_lock_t *);
+       void    (*plo_lock)(__cpu_simple_lock_t *);
+};
+
+struct __pthread_st {
+       pthread_t       pt_self;        /* Must be first. */
+       unsigned int    pt_magic;       /* Magic number */
+       int             pt_state;       /* running, blocked, etc. */
+       pthread_mutex_t pt_lock;        /* lock on state */
+       int             pt_flags;       /* see PT_FLAG_* below */
+       int             pt_cancel;      /* Deferred cancellation */
+       int             pt_errno;       /* Thread-specific errno. */
+       stack_t         pt_stack;       /* Our stack */
+       void            *pt_exitval;    /* Read by pthread_join() */
+       char            *pt_name;       /* Thread's name, set by the app. */
+       int             pt_willpark;    /* About to park */
+       lwpid_t         pt_unpark;      /* Unpark this when parking */
+       struct pthread_lock_ops pt_lockops;/* Cached to avoid PIC overhead */
+       pthread_mutex_t *pt_droplock;   /* Drop this lock if cancelled */
+       pthread_cond_t  pt_joiners;     /* Threads waiting to join. */
+
+       /* Threads to defer waking, usually until pthread_mutex_unlock(). */
+       lwpid_t         pt_waiters[PTHREAD__UNPARK_MAX];
+       size_t          pt_nwaiters;
+
+       /* Stack of cancellation cleanup handlers and their arguments */
+       PTQ_HEAD(, pt_clean_t)  pt_cleanup_stack;
+
+       /* LWP ID and entry on the list of all threads. */
+       lwpid_t         pt_lid;
+       RB_ENTRY(__pthread_st) pt_alltree;
+       PTQ_ENTRY(__pthread_st) pt_allq;
+       PTQ_ENTRY(__pthread_st) pt_deadq;
+
+       /*
+        * General synchronization data.  We try to align, as threads
+        * on other CPUs will access this data frequently.
+        */
+       int             pt_dummy1 __aligned(128);
+       struct lwpctl   *pt_lwpctl;     /* Kernel/user comms area */
+       volatile int    pt_blocking;    /* Blocking in userspace */
+       volatile int    pt_rwlocked;    /* Handed rwlock successfully */
+       volatile int    pt_signalled;   /* Received pthread_cond_signal() */
+       volatile int    pt_mutexwait;   /* Waiting to acquire mutex */
+       void * volatile pt_mutexnext;   /* Next thread in chain */
+       void * volatile pt_sleepobj;    /* Object slept on */
+       PTQ_ENTRY(__pthread_st) pt_sleep;
+       void            (*pt_early)(void *);
+       int             pt_dummy2 __aligned(128);
+
+       /* Thread-specific data.  Large so it sits close to the end. */
+       int             pt_havespecific;
+       void            *pt_specific[PTHREAD_KEYS_MAX];
+
+       /*
+        * Context for thread creation.  At the end as it's cached
+        * and then only ever passed to _lwp_create().
+        */
+       ucontext_t      pt_uc;
+};
+#endif /* __NetBSD__ */
+
+
 static void native_mutex_lock(pthread_mutex_t *lock);
 static void native_mutex_unlock(pthread_mutex_t *lock);
 static int native_mutex_trylock(pthread_mutex_t *lock);
@@ -833,6 +920,9 @@
 native_reset_timer_thread(void)
 {
     timer_thread_id = 0;
+#ifdef BROKEN_PTHREAD_T_PT_LID
+    ((struct __pthread_st *)pthread_self())->pt_lid = _lwp_self();
+#endif
 }

 #ifdef HAVE_SIGALTSTACK


----------------------------------------
http://redmine.ruby-lang.org