Oops#2 Part10 <4>[ 358.780230] WARNING: drivers/gpu/drm/xe/xe_guc_ct.c:527 at guc_ct_change_state+0x279/0x350 [xe], CPU#11: xe_fault_inject/8029 <4>[ 358.780301] Modules linked in: snd_hda_codec_intelhdmi snd_hda_codec_hdmi pmt_crashlog mei_gsc_proxy mei_lb mtd_intel_dg mei_gsc xe drm_gpuvm drm_gpusvm_helper drm_buddy drm_ttm_helper ttm gpu_sched drm_suballoc_helper drm_exec drm_display_helper cec rc_core drm_kunit_helpers i2c_algo_bit kunit intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp hid_generic cmdlinepart eeepc_wmi asus_wmi coretemp spi_nor sparse_keymap mei_hdcp mei_pxp platform_profile mtd wmi_bmof binfmt_misc kvm_intel usbhid hid kvm irqbypass ghash_clmulni_intel aesni_intel video snd_intel_dspcfg rapl r8169 snd_hda_codec intel_cstate snd_hda_core snd_hwdep realtek snd_pcm snd_timer i2c_i801 mei_me idma64 i2c_mux snd spi_intel_pci soundcore spi_intel nls_iso8859_1 i2c_smbus mei intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry wmi pinctrl_alderlake acpi_pad intel_vsec acpi_tad dm_multipath msr nvme_fabrics fuse efi_pstore nfnetlink <4>[ 358.780362] autofs4 [last unloaded: snd_hda_intel] <4>[ 358.780366] CPU: 11 UID: 0 PID: 8029 Comm: xe_fault_inject Tainted: G S U W L 7.0.0-rc1-lgci-xe-xe-4591-45a3045fc0dc46a89-debug+ #1 PREEMPT(lazy) <4>[ 358.780370] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER, [W]=WARN, [L]=SOFTLOCKUP Oops#2 Part9 <4>[ 358.780371] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 1645 03/15/2024 <4>[ 358.780372] RIP: 0010:guc_ct_change_state+0x2ed/0x350 [xe] <4>[ 358.780437] Code: 1f 85 eb 51 48 c1 ea 25 44 6b ca 64 44 29 c9 51 48 c7 c1 f0 74 18 a1 52 ff 75 b0 44 8b 4d 94 4c 8b 45 88 48 8b 95 78 ff ff ff <67> 48 0f b9 3a 8b 8b 48 01 00 00 48 83 c4 60 85 c9 75 13 44 89 bb <4>[ 358.780439] RSP: 0018:ffffc9002260b748 EFLAGS: 00010002 <4>[ 358.780441] RAX: ffffffffa11fbe6b RBX: ffff88816bc88738 RCX: ffffffffa11874f0 <4>[ 358.780442] RDX: ffff888103ec2b10 RSI: ffffffffa11fbe6b RDI: ffffffffa1002ef0 <4>[ 358.780444] RBP: ffffc9002260b830 R08: ffffffffa11fbebb R09: 0000000000000007 <4>[ 358.780445] R10: 0000000000000001 R11: 0000000000000514 R12: ffff88816bc88740 <4>[ 358.780446] R13: ffff88816bc887d0 R14: 0000000000000515 R15: 0000000000000001 <4>[ 358.780447] FS: 00007a391d72a980(0000) GS:ffff8888db21b000(0000) knlGS:0000000000000000 <4>[ 358.780449] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 358.780450] CR2: 00005e0d0d19c608 CR3: 000000016a957002 CR4: 0000000000f72ef0 <4>[ 358.780452] PKRU: 55555554 <4>[ 358.780453] Call Trace: <4>[ 358.780454] <4>[ 358.780463] ? xe_guc_submit_enable+0xa8/0xf0 [xe] <4>[ 358.780533] xe_guc_ct_disable+0x17/0x80 [xe] <4>[ 358.780598] xe_guc_sanitize+0x2a/0x50 [xe] <4>[ 358.780659] xe_uc_load_hw+0x19a/0x2b0 [xe] <4>[ 358.780745] ? xe_migrate_init+0x277/0x2d0 [xe] <4>[ 358.780818] xe_gt_init+0x35d/0xab0 [xe] <4>[ 358.780885] ? _raw_spin_unlock_irqrestore+0x51/0x80 <4>[ 358.780891] ? __devm_add_action+0x70/0xa0 <4>[ 358.780897] ? xe_irq_install+0x11a/0x490 [xe] Oops#2 Part8 <4>[ 358.781030] xe_device_probe+0x3c5/0xc10 [xe] <4>[ 358.781147] ? __drm_dev_dbg+0x7d/0xb0 <4>[ 358.781155] ? __drmm_add_action_or_reset+0x1e/0x50 <4>[ 358.781163] xe_pci_probe+0x396/0x610 [xe] <4>[ 358.781283] ? trace_hardirqs_on+0x22/0x100 <4>[ 358.781293] local_pci_probe+0x47/0xb0 <4>[ 358.781299] pci_call_probe+0x6c/0x360 <4>[ 358.781308] ? _raw_spin_unlock+0x22/0x50 <4>[ 358.781314] pci_device_probe+0xae/0x110 <4>[ 358.781319] really_probe+0xf1/0x410 <4>[ 358.781325] __driver_probe_device+0x8c/0x190 <4>[ 358.781330] device_driver_attach+0x57/0xd0 <4>[ 358.781335] bind_store+0x142/0x150 <4>[ 358.781341] drv_attr_store+0x24/0x50 <4>[ 358.781344] sysfs_kf_write+0x4d/0x80 <4>[ 358.781350] kernfs_fop_write_iter+0x188/0x240 <4>[ 358.781356] vfs_write+0x283/0x540 <4>[ 358.781369] ksys_write+0x6f/0xf0 <4>[ 358.781375] __x64_sys_write+0x19/0x30 <4>[ 358.781378] x64_sys_call+0x259/0x26e0 <4>[ 358.781382] do_syscall_64+0xdd/0x1470 <4>[ 358.781386] ? __fput+0x1bf/0x2f0 <4>[ 358.781391] ? fput_close_sync+0x3d/0xa0 <4>[ 358.781394] ? trace_hardirqs_on_prepare+0xe1/0x100 <4>[ 358.781399] ? do_syscall_64+0x22e/0x1470 <4>[ 358.781402] ? do_syscall_64+0x22e/0x1470 <4>[ 358.781404] ? exc_page_fault+0xbd/0x2c0 <4>[ 358.781410] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4>[ 358.781413] RIP: 0033:0x7a391f91c5a4 <4>[ 358.781416] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d a5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 <4>[ 358.781418] RSP: 002b:00007ffc2a1c5998 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 Oops#2 Part7 <4>[ 358.781421] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007a391f91c5a4 <4>[ 358.781423] RDX: 000000000000000c RSI: 00007ffc2a1c5e60 RDI: 0000000000000007 <4>[ 358.781424] RBP: 000000000000000c R08: 0000000000000073 R09: 0000000000000000 <4>[ 358.781426] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffc2a1c5e60 <4>[ 358.781428] R13: 0000000000000007 R14: 0000000000000006 R15: 00007ffc2a1c5b10 <4>[ 358.781440] <4>[ 358.781441] irq event stamp: 1539864 <4>[ 358.781443] hardirqs last enabled at (1539863): [] _raw_spin_unlock_irqrestore+0x51/0x80 <4>[ 358.781446] hardirqs last disabled at (1539864): [] _raw_spin_lock_irq+0x6f/0x80 <4>[ 358.781449] softirqs last enabled at (1539500): [] __irq_exit_rcu+0x13f/0x160 <4>[ 358.781453] softirqs last disabled at (1539493): [] __irq_exit_rcu+0x13f/0x160 <4>[ 358.781455] ---[ end trace 0000000000000000 ]--- <7>[ 358.781458] xe 0000:03:00.0: [drm:guc_ct_change_state [xe]] Tile0: GT0: GuC CT communication channel disabled <3>[ 358.781593] xe 0000:03:00.0: probe with driver xe failed with error -12 <3>[ 358.782170] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT0: GuC RC setup HOST_CONTROL(0) failed (-ENODEV) <7>[ 358.782534] xe 0000:03:00.0: [drm:guc_ct_change_state [xe]] Tile0: GT0: GuC CT communication channel disabled <7>[ 358.783888] xe 0000:03:00.0: [drm:guc_ct_change_state [xe]] Tile0: GT0: GuC CT communication channel disabled <7>[ 358.875284] xe 0000:03:00.0: [drm:drm_pagemap_cache_fini [drm_gpusvm_helper]] Destroying dpagemap cache. Oops#2 Part6 <7>[ 358.879501] xe 0000:03:00.0: [drm:drm_pagemap_shrinker_fini [drm_gpusvm_helper]] Destroying dpagemap shrinker. <3>[ 361.046385] xe 0000:03:00.0: [drm] *ERROR* TLB invalidation fence timeout, seqno=39 recv=38 <1>[ 361.048989] BUG: unable to handle page fault for address: ffffc9000e38a188 <1>[ 361.049021] #PF: supervisor write access in kernel mode <1>[ 361.049038] #PF: error_code(0x0002) - not-present page <6>[ 361.049049] PGD 100000067 P4D 100000067 PUD 100aba067 PMD 0 <4>[ 361.049070] Oops: Oops: 0002 [#1] SMP NOPTI <4>[ 361.049086] CPU: 8 UID: 0 PID: 7060 Comm: kworker/8:13 Tainted: G S U W L 7.0.0-rc1-lgci-xe-xe-4591-45a3045fc0dc46a89-debug+ #1 PREEMPT(lazy) <4>[ 361.049113] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER, [W]=WARN, [L]=SOFTLOCKUP <4>[ 361.049125] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 1645 03/15/2024 <4>[ 361.049141] Workqueue: xe-destroy-wq __guc_exec_queue_destroy_async [xe] <4>[ 361.049552] RIP: 0010:xe_mmio_write32+0x58/0x2b0 [xe] <4>[ 361.049990] Code: 24 66 90 65 8b 05 1c 69 2e e3 48 0f a3 05 c0 c5 d0 e2 0f 82 1d 01 00 00 41 f7 c5 00 00 00 01 0f 84 b7 00 00 00 49 03 5c 24 08 <44> 89 3b 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 <4>[ 361.050026] RSP: 0018:ffffc9000a8cf838 EFLAGS: 00010086 <4>[ 361.050043] RAX: 0000000000000002 RBX: ffffc9000e38a188 RCX: 0000000000000000 <4>[ 361.050061] RDX: 0000000000010001 RSI: 000000000000a188 RDI: ffff888157c18060 <4>[ 361.050078] RBP: ffffc9000a8cf8b0 R08: 0000000000000000 R09: 0000000000000000 <4>[ 361.050094] R10: ffff88816e460000 R11: 0000000000000001 R12: ffff888157c18060 Oops#2 Part5 <4>[ 361.050110] R13: 000000000000a188 R14: ffff88816e460000 R15: 0000000000010001 <4>[ 361.050127] FS: 0000000000000000(0000) GS:ffff8888db09b000(0000) knlGS:0000000000000000 <4>[ 361.050148] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 361.050163] CR2: ffffc9000e38a188 CR3: 000000000344a005 CR4: 0000000000f72ef0 <4>[ 361.050179] PKRU: 55555554 <4>[ 361.050189] Call Trace: <4>[ 361.050198] <4>[ 361.050216] xe_force_wake_get+0x2a5/0x940 [xe] <4>[ 361.050614] ? _raw_spin_unlock_irqrestore+0x27/0x80 <4>[ 361.050641] ? mark_held_locks+0x46/0x90 <4>[ 361.050663] send_tlb_inval_ggtt+0xfa/0x270 [xe] <4>[ 361.051086] ? trace_hardirqs_on+0x22/0x100 <4>[ 361.051106] ? _raw_spin_unlock_irq+0x27/0x70 <4>[ 361.051123] ? xe_tlb_inval_fence_prep+0xbf/0x1d0 [xe] <4>[ 361.051613] xe_tlb_inval_ggtt+0x73/0x250 [xe] <4>[ 361.051746] ? find_held_lock+0x31/0x90 <4>[ 361.051767] ? ggtt_node_remove+0xcb/0x140 [xe] <4>[ 361.051838] ggtt_invalidate_gt_tlb.part.0+0x1f/0xb0 [xe] <4>[ 361.051907] ggtt_node_remove+0x12c/0x140 [xe] <4>[ 361.051975] xe_ggtt_node_remove+0x40/0xa0 [xe] <4>[ 361.052043] xe_ggtt_remove_bo+0x87/0x250 [xe] <4>[ 361.052110] ? _raw_write_unlock+0x22/0x50 <4>[ 361.052113] ? drm_vma_offset_remove+0x65/0x80 <4>[ 361.052118] xe_ttm_bo_destroy+0xa2/0x2d0 [xe] <4>[ 361.052184] ? lock_is_held_type+0xa3/0x130 <4>[ 361.052189] ttm_bo_release+0x70/0x330 [ttm] <4>[ 361.052195] ? xe_ggtt_might_lock+0x29/0x60 [xe] <4>[ 361.052262] ? lock_release+0xd0/0x2b0 <4>[ 361.052266] ttm_bo_fini+0x3c/0x70 [ttm] <4>[ 361.052271] xe_gem_object_free+0x1a/0x30 [xe] Oops#2 Part4 <4>[ 361.052338] drm_gem_object_free+0x1d/0x40 <4>[ 361.052341] xe_bo_put+0x12a/0x190 [xe] <4>[ 361.052408] xe_lrc_destroy+0x47/0x60 [xe] <4>[ 361.052483] xe_exec_queue_fini+0x85/0xd0 [xe] <4>[ 361.052550] __guc_exec_queue_destroy_async+0x6c/0x1a0 [xe] <4>[ 361.052621] process_one_work+0x22e/0x740 <4>[ 361.052626] worker_thread+0x1e8/0x3d0 <4>[ 361.052629] ? __pfx_worker_thread+0x10/0x10 <4>[ 361.052632] kthread+0x10d/0x150 <4>[ 361.052635] ? __pfx_kthread+0x10/0x10 <4>[ 361.052638] ret_from_fork+0x3d4/0x480 <4>[ 361.052642] ? __pfx_kthread+0x10/0x10 <4>[ 361.052645] ret_from_fork_asm+0x1a/0x30 <4>[ 361.052649] <4>[ 361.052651] Modules linked in: snd_hda_codec_intelhdmi snd_hda_codec_hdmi pmt_crashlog mei_gsc_proxy mei_lb mtd_intel_dg mei_gsc xe drm_gpuvm drm_gpusvm_helper drm_buddy drm_ttm_helper ttm gpu_sched drm_suballoc_helper drm_exec drm_display_helper cec rc_core drm_kunit_helpers i2c_algo_bit kunit intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp hid_generic cmdlinepart eeepc_wmi asus_wmi coretemp spi_nor sparse_keymap mei_hdcp mei_pxp platform_profile mtd wmi_bmof binfmt_misc kvm_intel usbhid hid kvm irqbypass ghash_clmulni_intel aesni_intel video snd_intel_dspcfg rapl r8169 snd_hda_codec intel_cstate snd_hda_core snd_hwdep realtek snd_pcm snd_timer i2c_i801 mei_me idma64 i2c_mux snd spi_intel_pci soundcore spi_intel nls_iso8859_1 i2c_smbus mei intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry wmi pinctrl_alderlake acpi_pad intel_vsec acpi_tad dm_multipath msr nvme_fabrics fuse efi_pstore nfnetlink Oops#2 Part3 <4>[ 361.052686] autofs4 [last unloaded: snd_hda_intel] <4>[ 361.052713] CR2: ffffc9000e38a188 <4>[ 361.052716] ---[ end trace 0000000000000000 ]--- <4>[ 361.196624] RIP: 0010:xe_mmio_write32+0x58/0x2b0 [xe] <4>[ 361.196723] Code: 24 66 90 65 8b 05 1c 69 2e e3 48 0f a3 05 c0 c5 d0 e2 0f 82 1d 01 00 00 41 f7 c5 00 00 00 01 0f 84 b7 00 00 00 49 03 5c 24 08 <44> 89 3b 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 <4>[ 361.196730] RSP: 0018:ffffc9000a8cf838 EFLAGS: 00010086 <4>[ 361.196734] RAX: 0000000000000002 RBX: ffffc9000e38a188 RCX: 0000000000000000 <4>[ 361.196737] RDX: 0000000000010001 RSI: 000000000000a188 RDI: ffff888157c18060 <4>[ 361.196742] RBP: ffffc9000a8cf8b0 R08: 0000000000000000 R09: 0000000000000000 <4>[ 361.196744] R10: ffff88816e460000 R11: 0000000000000001 R12: ffff888157c18060 <4>[ 361.196747] R13: 000000000000a188 R14: ffff88816e460000 R15: 0000000000010001 <4>[ 361.196750] FS: 0000000000000000(0000) GS:ffff8888db09b000(0000) knlGS:0000000000000000 <4>[ 361.196753] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 361.196756] CR2: ffffc9000e38a188 CR3: 000000000344a005 CR4: 0000000000f72ef0 <4>[ 361.196758] PKRU: 55555554 <6>[ 361.196760] note: kworker/8:13[7060] exited with irqs disabled <6>[ 361.196769] note: kworker/8:13[7060] exited with preempt_count 1 <3>[ 363.348936] xe 0000:03:00.0: [drm] *ERROR* TLB invalidation fence timeout, seqno=-2125596358 recv=0 <1>[ 363.348978] BUG: unable to handle page fault for address: 000000000a8cfbd8 <1>[ 363.348993] #PF: supervisor read access in kernel mode <1>[ 363.349005] #PF: error_code(0x0000) - not-present page <6>[ 363.349016] PGD 0 P4D 0 Oops#2 Part2 <4>[ 363.349029] Oops: Oops: 0000 [#2] SMP NOPTI <4>[ 363.349045] CPU: 6 UID: 0 PID: 12 Comm: kworker/u64:0 Tainted: G S UD W L 7.0.0-rc1-lgci-xe-xe-4591-45a3045fc0dc46a89-debug+ #1 PREEMPT(lazy) <4>[ 363.349074] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER, [D]=DIE, [W]=WARN, [L]=SOFTLOCKUP <4>[ 363.349087] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 1645 03/15/2024 <4>[ 363.349102] Workqueue: gt-ordered-wq xe_tlb_inval_fence_timeout [xe] <4>[ 363.349632] RIP: 0010:__list_del_entry_valid_or_report+0x3b/0x120 <4>[ 363.349658] Code: 6f 08 4d 85 e4 74 50 4d 85 ed 74 5e 48 b8 00 01 00 00 00 00 ad de 49 39 c4 74 62 48 b8 22 01 00 00 00 00 ad de 49 39 c5 74 71 <49> 39 7d 00 0f 85 85 00 00 00 49 39 7c 24 08 0f 85 9f 00 00 00 b8 <4>[ 363.349693] RSP: 0018:ffffc900000b7d58 EFLAGS: 00010003 <4>[ 363.349709] RAX: dead000000000122 RBX: ffffc9000a8cfab0 RCX: 0000000000000000 <4>[ 363.349726] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffc9000a8cfab0 <4>[ 363.349742] RBP: ffffc900000b7d70 R08: 0000000000000000 R09: 0000000000000000 <4>[ 363.349758] R10: 0000000000000000 R11: 0000000000000000 R12: ffffc9000a8cfb40 <4>[ 363.349773] R13: 000000000a8cfbd8 R14: ffffffff814e0094 R15: ffffc9000a8cfaf8 <4>[ 363.349789] FS: 0000000000000000(0000) GS:ffff8888daf9b000(0000) knlGS:0000000000000000 <4>[ 363.349808] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 363.349823] CR2: 000000000a8cfbd8 CR3: 000000000344a001 CR4: 0000000000f72ef0 <4>[ 363.349839] PKRU: 55555554 <4>[ 363.349848] Call Trace: <4>[ 363.349857] <4>[ 363.349869] ? lock_acquire+0x2b3/0x2f0 <4>[ 363.349891] xe_tlb_inval_fence_signal+0x3d/0x1f0 [xe] Oops#2 Part1 <4>[ 363.350369] ? call_rcu+0x34/0x50 <4>[ 363.350388] xe_tlb_inval_fence_timeout+0xb6/0x220 [xe] <4>[ 363.350838] process_one_work+0x22e/0x740 <4>[ 363.350868] worker_thread+0x1e8/0x3d0 <4>[ 363.350887] ? __pfx_worker_thread+0x10/0x10 <4>[ 363.350906] kthread+0x10d/0x150 <4>[ 363.350921] ? __pfx_kthread+0x10/0x10 <4>[ 363.350939] ret_from_fork+0x3d4/0x480 <4>[ 363.350957] ? __pfx_kthread+0x10/0x10 <4>[ 363.350975] ret_from_fork_asm+0x1a/0x30 <4>[ 363.351000] <4>[ 363.351008] Modules linked in: snd_hda_codec_intelhdmi snd_hda_codec_hdmi pmt_crashlog mei_gsc_proxy mei_lb mtd_intel_dg mei_gsc xe drm_gpuvm drm_gpusvm_helper drm_buddy drm_ttm_helper ttm gpu_sched drm_suballoc_helper drm_exec drm_display_helper cec rc_core drm_kunit_helpers i2c_algo_bit kunit intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp hid_generic cmdlinepart eeepc_wmi asus_wmi coretemp spi_nor sparse_keymap mei_hdcp mei_pxp platform_profile mtd wmi_bmof binfmt_misc kvm_intel usbhid hid kvm irqbypass ghash_clmulni_intel aesni_intel video snd_intel_dspcfg rapl r8169 snd_hda_codec intel_cstate snd_hda_core snd_hwdep realtek snd_pcm snd_timer i2c_i801 mei_me idma64 i2c_mux snd spi_intel_pci soundcore spi_intel nls_iso8859_1 i2c_smbus mei intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry wmi pinctrl_alderlake acpi_pad intel_vsec acpi_tad dm_multipath msr nvme_fabrics fuse efi_pstore nfnetlink <4>[ 363.351195] autofs4 [last unloaded: snd_hda_intel] <4>[ 363.351344] CR2: 000000000a8cfbd8 <4>[ 363.351358] ---[ end trace 0000000000000000 ]---