Issue #8214 has been updated by Satoshi TAGOMORI.


ッ∫#祕A -> B 蕭礇湛B -> A 薛祚鴻検鴻竇 祟礇若ケ罐 秕
Yui NARUSE wrote:
> とりあえず以下のような感じで作ってみました。
> やっぱりバックトレースも欲しいかな
> 
> ```
> % ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
> `miniruby' is up to date.
> ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
> -e:1:in `join': No live threads left. Deadlock? (fatal)
> 4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
> * #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
>    rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
> * #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
>    rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
> * #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
>    rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
>     depended by: tb_thread_id:0x00000802c1b200
>     depended by: tb_thread_id:0x00000802c17300
> * #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
>    rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
>     depended by: tb_thread_id:0x00000802c1af00
>         from -e:1:in `<main>'
> ```
> 
> ```diff
> diff --git a/thread.c b/thread.c
> index d17b663..359dada 100644
> --- a/thread.c
> +++ b/thread.c
> @@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
>  }
>  
>  static const char *
> -thread_status_name(rb_thread_t *th)
> +thread_status_name(rb_thread_t *th, int detail)
>  {
>      switch (th->status) {
>        case THREAD_RUNNABLE:
> @@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
>  	    return "aborting";
>  	else
>  	    return "run";
> -      case THREAD_STOPPED:
>        case THREAD_STOPPED_FOREVER:
> +	if (detail) return "sleep_forever";
> +      case THREAD_STOPPED:
>  	return "sleep";
>        case THREAD_KILLED:
>  	return "dead";
> @@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
>  	}
>  	return Qfalse;
>      }
> -    return rb_str_new2(thread_status_name(th));
> +    return rb_str_new2(thread_status_name(th, FALSE));
>  }
>  
>  
> @@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
>      VALUE str;
>  
>      GetThreadPtr(thread, th);
> -    status = thread_status_name(th);
> +    status = thread_status_name(th, FALSE);
>      str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
>      if (!NIL_P(th->name)) {
>  	rb_str_catf(str, "@%"PRIsVALUE, th->name);
> @@ -4728,26 +4729,36 @@ ruby_native_thread_p(void)
>  }
>  
>  static void
> -debug_deadlock_check(rb_vm_t *vm)
> +debug_deadlock_check(rb_vm_t *vm, VALUE msg)
>  {
> -#ifdef DEBUG_DEADLOCK_CHECK
>      rb_thread_t *th = 0;
>  
> -    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
> +    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
> +	    vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
>      list_for_each(&vm->living_threads, th, vmlt_node) {
> -	printf("th:%p %d %d", th, th->status, th->interrupt_flag);
>  	if (th->locking_mutex) {
>  	    rb_mutex_t *mutex;
> +	    struct rb_thread_struct volatile *mth;
> +	    int waiting;
>  	    GetMutexPtr(th->locking_mutex, mutex);
>  
>  	    native_mutex_lock(&mutex->lock);
> -	    printf(" %p %d\n", mutex->th, mutex->cond_waiting);
> +	    mth = mutex->th;
> +	    waiting = mutex->cond_waiting;
>  	    native_mutex_unlock(&mutex->lock);
> +	    rb_str_catf(msg, "  rb_thread_t:%p %s %u waiting:%p cond_waiting:%d", th, thread_status_name(th, TRUE), th->interrupt_flag, mth, waiting);
> +	}
> +	else {
> +	    rb_str_catf(msg, "  rb_thread_t:%p %s %u\n", th, thread_status_name(th, TRUE), th->interrupt_flag);
> +	}
> +	{
> +	    rb_thread_list_t *list = th->join_list;
> +	    while (list) {
> +		rb_str_catf(msg, "    depended by: %p\n", list->th);
> +		list = list->next;
> +	    }
>  	}
> -	else
> -	    puts("");
>      }
> -#endif
>  }
>  
>  static void
> @@ -4782,7 +4793,7 @@ rb_check_deadlock(rb_vm_t *vm)
>  	VALUE argv[2];
>  	argv[0] = rb_eFatal;
>  	argv[1] = rb_str_new2("No live threads left. Deadlock?");
> -	debug_deadlock_check(vm);
> +	debug_deadlock_check(vm, argv[1]);
>  	vm->sleeper--;
>  	rb_threadptr_raise(vm->main_thread, 2, argv);
>      }
> ```



----------------------------------------
Feature #8214: デッドロックチェックに全スレッドのバックトレースダンプの追加
https://bugs.ruby-lang.org/issues/8214#change-59007

* Author: Motohiro KOSAKI
* Status: Open
* Priority: Normal
* Assignee: 
----------------------------------------
さいきん、くくぱど方面のデッドロックバグの調査を手伝っていて気づいたのですが、
現在のRubyのdeadlock detectorは開発者に優しくないです。

例外があがるとスタックが巻戻ってしまって証拠が消えてしまうので、例外をあげるまえに
全スレッドのバックトレースをダンプするオプションがあるべきではないでしょうか。
デッドロックが起きたときは99%、直さないといけない状況になるはずで、証拠を残す機能は
有意義なはずです。

TBD: ruby level だけで十分か、Cレベルも含めるか




-- 
https://bugs.ruby-lang.org/