Re: deadlock in 10.12, maybe?
Re: deadlock in 10.12, maybe?
- Subject: Re: deadlock in 10.12, maybe?
- From: Jorgen Lundman <email@hidden>
- Date: Wed, 05 Oct 2016 12:02:52 +0900
Vivek Verma wrote:
> Can you file a radar for this (with preferably the kernel core dump attached) ? I would like see some vnode state which isn't in just the stack trace.
>
Certainly, i will re-run the zfs-tester until it triggers again, generally
every second run will hang in some way. (usually in the umount)
I have now turned on panic dump (and NMI) and I notice it takes some 2
hours to copy the dump over. I wonder if I can make it so that when I NMI
the VM, so that I can attach lldb and poke around, and if I then decide to
send a dump across, issue some command into lldb to make the panic dump start?
Is that at all possible?
Meanwhile, the first deadlock I got after turning panic dump on, appears to
be a PAGEIN memory issue;
Most interesting stacks (my guess) are;
vm_page_wait((int) interruptible = <>, )
vm_object_upl_request((vm_object_t) object = <>, ,
(vm_object_offset_t) offset = 464613376, (upl_size_t) size = 67108864,
(upl_t *) upl_ptr = <>, , (upl_page_info_array_t) user_page_list = <>, ,
(unsigned int *) page_list_count = <>, , (upl_control_flags_t) cntrl_flags
= <>, )
memory_object_upl_request [inlined]((memory_object_control_t) control
= <>, , (memory_object_offset_t) offset = <>, , (upl_size_t) size = <>, ,
(upl_t *) upl_ptr = 0xffffff916881ba70, (upl_page_info_array_t)
user_page_list = <>, , (unsigned int *) page_list_count = <no location,
value may have been optimized out>, , (int) cntrl_flags = 134219084)
ubc_create_upl((vnode *) vp = <>, , (off_t) f_offset = <>, , (int)
bufsize = <>, , (upl_t *) uplp = 0xffffff916881ba70, (upl_page_info_t **)
plp = 0xffffff916881ba68, (int) uplflags = <>, )
com.apple.filesystems.hfs.kext + 0x271e3
VNOP_PAGEIN [inlined]((upl_offset_t) pl_offset = <>, , (size_t) size =
18446743527138321536, (int) flags = -1, (vfs_context_t) ctx = <>, )
vnode_pagein((vnode *) vp = <>, , (upl_t) upl = <>, , (upl_offset_t)
upl_offset = 821459000, (vm_object_offset_t) f_offset = <>, , (upl_size_t)
size = <>, , (int) flags = -1, (int *) errorp = <>, )
vnode_pager_cluster_read((vnode_pager_t) vnode_object =
0xffffff8030f6e988, (vm_object_offset_t) base_offset = 464613376,
(vm_object_offset_t) offset = <>, , (uint32_t) io_streaming = <>, ,
(vm_size_t) cnt = 0x0000000000001000)
vnode_pager_data_request((memory_object_t) mem_obj =
0xffffff8030f6e988, (memory_object_offset_t) offset = 464613376,
(memory_object_cluster_size_t) length = <>, , (vm_prot_t) desired_access =
<>, , (memory_object_fault_info_t) fault_info = <>, )
memory_object_data_request [inlined]((memory_object_t) memory_object =
0xffffff8030f6e988, (memory_object_offset_t) offset = <>, ,
(memory_object_cluster_size_t) length = 4096, (vm_prot_t) desired_access =
1, (memory_object_fault_info_t) fault_info = 0xffffff80265106e9)
vm_fault_page((vm_object_t) first_object = <>, , (vm_object_offset_t)
first_offset = <>, , (vm_prot_t) fault_type = <>, , (boolean_t)
must_be_resident = 0, (boolean_t) caller_lookup = 0, (vm_prot_t *)
protection = <>, , (vm_page_t *) result_page = <no location, value may have
been optimized out>, , (vm_page_t *) top_page = <no location, value may
have been optimized out>, , (int *) type_of_fault = 0x00007fff00000001,
(kern_return_t *) error_code = <no location, value may have been optimized
out>, , (boolean_t) no_zero_fill = <no location, value may have been
optimized out>, , (boolean_t) data_supply = 0, (vm_object_fault_info_t)
fault_info = 0xffffff80265106e9)
vm_fault_internal((vm_map_t) map = <>, , (vm_map_offset_t) vaddr = <>,
, (vm_prot_t) caller_prot = <>, , (boolean_t) change_wiring = 0, (int)
interruptible = 2, (pmap_t) caller_pmap = 0x0000000000000000,
(vm_map_offset_t) caller_pmap_addr = 0, (ppnum_t *) physpage_p = <no
location, value may have been optimized out>, )
vm_fault [inlined]((vm_map_t) map = <>, , (vm_map_offset_t) vaddr =
<>, , (vm_prot_t) fault_type = <>, , (boolean_t) change_wiring = 0, (int)
interruptible = 2, (pmap_t) caller_pmap = <>, , (vm_map_offset_t)
caller_pmap_addr = 0)
user_trap((x86_saved_state_t *) saved_state = 0xffffff8035a61e20)
kernel`hndl_alltraps + 0xe5
thread 2:
vm_page_wait((int) interruptible = <>, )
kernel_memory_allocate((vm_map_t) map = <>, , (vm_offset_t *) addrp =
<>, , (vm_size_t) size = <>, , (vm_offset_t) mask = 0x0000000000000000,
(int) flags = <>, , (vm_tag_t) tag = <>, )
kmem_alloc_flags [inlined]((vm_map_t) map = 0xffffff8028e486e8,
(vm_offset_t *) addrp = <>, , (vm_size_t) size = 0x0000000000006008, (int)
flags = 2048)
kalloc_canblock((vm_size_t *) psize = 0xffffff90ef9db670, (boolean_t)
canblock = <>, , (vm_allocation_site_t *) site = <>, )
OSMalloc((uint32_t) size = 24584, (OSMallocTag) tag = 0xffffff90ef9db670)
com.apple.AppleFSCompression.AppleFSCompressionTypeZlib + 0x3787
com.apple.AppleFSCompression.AppleFSCompressionTypeZlib + 0x32fe
com.apple.AppleFSCompression.AppleFSCompressionTypeZlib + 0x2752
decmpfs_fetch_uncompressed_data((vnode_t) vp = <no location, value may
have been optimized out>, , (decmpfs_cnode *) cp = <no location, value may
have been optimized out>, , (decmpfs_header *) hdr = <no location, value
may have been optimized out>, , (off_t) offset = <no location, value may
have been optimized out>, , (user_ssize_t) size = <>, , (int) nvec = 1,
(decmpfs_vector *) vec = <no location, value may have been optimized out>,
, (uint64_t *) bytes_read = <no location, value may have been optimized
out>, )
decmpfs_pagein_compressed((vnop_pagein_args *) ap = <>, , (int *)
is_compressed = 0xffffff90ef9dba58, (decmpfs_cnode *) cp = <>, )
com.apple.filesystems.hfs.kext + 0x27378
VNOP_PAGEIN [inlined]((upl_offset_t) pl_offset = <>, , (size_t) size =
0, (int) flags = 8192, (vfs_context_t) ctx = <>, )
vnode_pagein((vnode *) vp = <>, , (upl_t) upl = <>, , (upl_offset_t)
upl_offset = 81920, (vm_object_offset_t) f_offset = <>, , (upl_size_t) size
= <>, , (int) flags = 8192, (int *) errorp = <>, )
vnode_pager_cluster_read((vnode_pager_t) vnode_object =
0xffffff8030f6e960, (vm_object_offset_t) base_offset = 81920,
(vm_object_offset_t) offset = <>, , (uint32_t) io_streaming = <>, ,
(vm_size_t) cnt = 0x0000000000004000)
vnode_pager_data_request((memory_object_t) mem_obj =
0xffffff8030f6e960, (memory_object_offset_t) offset = 81920,
(memory_object_cluster_size_t) length = <>, , (vm_prot_t) desired_access =
<>, , (memory_object_fault_info_t) fault_info = <>, )
memory_object_data_request [inlined]((memory_object_t) memory_object =
0xffffff8030f6e960, (memory_object_offset_t) offset = <>, ,
(memory_object_cluster_size_t) length = 4096, (vm_prot_t) desired_access =
1, (memory_object_fault_info_t) fault_info = 0x0000000000000206)
vm_fault_page((vm_object_t) first_object = <>, , (vm_object_offset_t)
first_offset = <>, , (vm_prot_t) fault_type = <>, , (boolean_t)
must_be_resident = 0, (boolean_t) caller_lookup = 0, (vm_prot_t *)
protection = <>, , (vm_page_t *) result_page = <no location, value may have
been optimized out>, , (vm_page_t *) top_page = <no location, value may
have been optimized out>, , (int *) type_of_fault = 0x0000000000000001,
(kern_return_t *) error_code = <no location, value may have been optimized
out>, , (boolean_t) no_zero_fill = <no location, value may have been
optimized out>, , (boolean_t) data_supply = 0, (vm_object_fault_info_t)
fault_info = 0x0000000000000206)
vm_fault_internal((vm_map_t) map = <>, , (vm_map_offset_t) vaddr = <>,
, (vm_prot_t) caller_prot = <>, , (boolean_t) change_wiring = 0, (int)
interruptible = 2, (pmap_t) caller_pmap = 0x0000000000000000,
(vm_map_offset_t) caller_pmap_addr = 0, (ppnum_t *) physpage_p = <no
location, value may have been optimized out>, )
vm_fault [inlined]((vm_map_t) map = <>, , (vm_map_offset_t) vaddr =
<>, , (vm_prot_t) fault_type = <>, , (boolean_t) change_wiring = 0, (int)
interruptible = 2, (pmap_t) caller_pmap = <>, , (vm_map_offset_t)
caller_pmap_addr = 0)
(Ugh that is pretty unreadable)
and most other threads are stuck in "vm_page_wait()" except for those in
normal idleness. No active ZFS threads/mounts in this.
Full stack list here:
http://www.lundman.net/hardcopy10.txt
I have full panic dump of this one if wanted.
537365576 Oct 5 10:46 core-xnu-3789.1.32-172.16.248.129-5a07d7df.gz
--
Jorgen Lundman | <email@hidden>
Unix Administrator | +81 (0)90-5578-8500
Shibuya-ku, Tokyo | Japan
_______________________________________________
Do not post admin requests to the list. They will be ignored.
Filesystem-dev mailing list (email@hidden)
Help/Unsubscribe/Update your Subscription:
This email sent to email@hidden