Oops#1 Part8 <3>[ 290.219868] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT0: GuC PC query task state failed: -ENOMEM <4>[ 290.220048] ------------[ cut here ]------------ <4>[ 290.220050] xe 0000:03:00.0: [drm] Assertion `ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED` failed! <4>[ 290.220050] platform: BATTLEMAGE subplatform: 7 <4>[ 290.220050] graphics: Xe2_HPG 20.01 step A0 <4>[ 290.220050] media: Xe2_HPM 13.01 step A1 <4>[ 290.220050] tile: 0 VRAM 12.0 GiB <4>[ 290.220050] GT: 0 type 1 <4>[ 290.220054] WARNING: drivers/gpu/drm/xe/xe_guc_ct.c:526 at guc_ct_change_state+0x279/0x350 [xe], CPU#6: xe_fault_inject/4842 <4>[ 290.220153] Modules linked in: snd_hda_codec_intelhdmi snd_hda_codec_hdmi pmt_crashlog mei_gsc_proxy mei_lb mtd_intel_dg mei_gsc xe drm_gpuvm drm_gpusvm_helper drm_ttm_helper ttm gpu_sched drm_suballoc_helper drm_exec drm_buddy drm_display_helper cec rc_core drm_kunit_helpers i2c_algo_bit kunit overlay intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp cmdlinepart coretemp hid_generic spi_nor asus_nb_wmi asus_wmi mei_pxp mei_hdcp mtd sparse_keymap platform_profile wmi_bmof kvm_intel snd_hda_intel binfmt_misc usbhid snd_intel_dspcfg kvm hid irqbypass snd_hda_codec ghash_clmulni_intel aesni_intel r8169 snd_hda_core rapl video snd_hwdep intel_cstate snd_pcm realtek snd_timer i2c_i801 intel_pmc_core mei_me i2c_mux snd spi_intel_pci nls_iso8859_1 i2c_smbus spi_intel mei soundcore idma64 pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry wmi intel_vsec acpi_tad acpi_pad pinctrl_alderlake dm_multipath msr nvme_fabrics Oops#1 Part7 <4>[ 290.220236] efi_pstore fuse nfnetlink autofs4 <4>[ 290.220243] CPU: 6 UID: 0 PID: 4842 Comm: xe_fault_inject Tainted: G S U W 6.19.0-rc5-lgci-xe-xe-4393-d398f90fc4dec5593+ #1 PREEMPT(voluntary) <4>[ 290.220247] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER, [W]=WARN <4>[ 290.220249] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 1645 03/15/2024 <4>[ 290.220250] RIP: 0010:guc_ct_change_state+0x2ed/0x350 [xe] <4>[ 290.220371] Code: 1f 85 eb 51 48 c1 ea 25 44 6b ca 64 44 29 c9 51 48 c7 c1 98 2c 18 a1 52 ff 75 b0 44 8b 4d 94 4c 8b 45 88 48 8b 95 78 ff ff ff <67> 48 0f b9 3a 8b 8b 48 01 00 00 48 83 c4 60 85 c9 75 13 44 89 bb Oops#1 Part6 <4>[ 290.220373] RSP: 0018:ffffc90003eeb6d0 EFLAGS: 00010002 <4>[ 290.220377] RAX: ffffffffa11f66c7 RBX: ffff8881107308a0 RCX: ffffffffa1182c98 <4>[ 290.220379] RDX: ffff888104f30790 RSI: ffffffffa11f66c7 RDI: ffffffffa1002f50 <4>[ 290.220380] RBP: ffffc90003eeb7b8 R08: ffffffffa11f670c R09: 0000000000000007 <4>[ 290.220382] R10: 0000000000000001 R11: 0000000000000514 R12: ffff8881107308a8 <4>[ 290.220384] R13: ffff888110730938 R14: 0000000000000515 R15: 0000000000000001 <4>[ 290.220386] FS: 00007fedce675940(0000) GS:ffff8888dafdd000(0000) knlGS:0000000000000000 <4>[ 290.220388] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 290.220390] CR2: 00005c22e31bad18 CR3: 000000015935b005 CR4: 0000000000f72ef0 <4>[ 290.220392] PKRU: 55555554 <4>[ 290.220393] Call Trace: <4>[ 290.220395] <4>[ 290.220407] ? xe_guc_submit_enable+0xa8/0xf0 [xe] <4>[ 290.220533] xe_guc_ct_disable+0x17/0x80 [xe] <4>[ 290.220654] xe_guc_sanitize+0x2a/0x50 [xe] <4>[ 290.220774] xe_uc_load_hw+0x187/0x2a0 [xe] <4>[ 290.220930] ? xe_migrate_init+0x277/0x2d0 [xe] <4>[ 290.221066] xe_gt_init+0x363/0xab0 [xe] <4>[ 290.221184] ? trace_hardirqs_on+0x63/0xd0 <4>[ 290.221190] ? _raw_spin_unlock_irqrestore+0x51/0x80 <4>[ 290.221195] ? __devm_add_action+0x70/0xa0 <4>[ 290.221202] ? xe_irq_install+0x11a/0x490 [xe] <4>[ 290.221346] xe_device_probe+0x3cc/0xc10 [xe] <4>[ 290.221467] ? __drm_dev_dbg+0x7d/0xb0 <4>[ 290.221474] ? __drmm_add_action_or_reset+0x1e/0x50 Oops#1 Part5 <4>[ 290.221484] xe_pci_probe+0x396/0x610 [xe] <4>[ 290.221634] local_pci_probe+0x47/0xb0 <4>[ 290.221642] pci_device_probe+0xf3/0x260 <4>[ 290.221651] really_probe+0xf1/0x410 <4>[ 290.221658] __driver_probe_device+0x8c/0x190 <4>[ 290.221664] device_driver_attach+0x57/0xd0 <4>[ 290.221671] bind_store+0x77/0xd0 <4>[ 290.221677] drv_attr_store+0x24/0x50 <4>[ 290.221680] sysfs_kf_write+0x4d/0x80 <4>[ 290.221687] kernfs_fop_write_iter+0x188/0x240 <4>[ 290.221694] vfs_write+0x283/0x540 <4>[ 290.221708] ksys_write+0x6f/0xf0 <4>[ 290.221714] __x64_sys_write+0x19/0x30 <4>[ 290.221718] x64_sys_call+0x79/0x26b0 <4>[ 290.221723] do_syscall_64+0x93/0x1470 <4>[ 290.221728] ? do_syscall_64+0x1e4/0x1470 <4>[ 290.221730] ? mntput_no_expire+0x73/0x170 <4>[ 290.221738] ? ksys_write+0x6f/0xf0 <4>[ 290.221746] ? do_syscall_64+0x1e4/0x1470 <4>[ 290.221748] ? path_put+0x1e/0x30 <4>[ 290.221753] ? do_faccessat+0x110/0x2e0 <4>[ 290.221763] ? do_syscall_64+0x1e4/0x1470 <4>[ 290.221767] ? do_syscall_64+0x1e4/0x1470 <4>[ 290.221769] ? do_syscall_64+0x1e4/0x1470 <4>[ 290.221773] ? do_syscall_64+0x1e4/0x1470 <4>[ 290.221775] ? exc_page_fault+0xbb/0x250 <4>[ 290.221780] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4>[ 290.221783] RIP: 0033:0x7fedd091c5a4 <4>[ 290.221787] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d a5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 <4>[ 290.221790] RSP: 002b:00007ffdd04e3028 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 <4>[ 290.221793] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fedd091c5a4 Oops#1 Part4 <4>[ 290.221795] RDX: 000000000000000c RSI: 00007ffdd04e44e0 RDI: 000000000000000b <4>[ 290.221797] RBP: 000000000000000c R08: 0000000000000073 R09: 0000000000000000 <4>[ 290.221799] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffdd04e44e0 <4>[ 290.221801] R13: 000000000000000b R14: 00005814cc9da35b R15: 00007ffdd04e4190 <4>[ 290.221814] <4>[ 290.221816] irq event stamp: 762506 <4>[ 290.221818] hardirqs last enabled at (762505): [] _raw_spin_unlock_irqrestore+0x51/0x80 <4>[ 290.221821] hardirqs last disabled at (762506): [] _raw_spin_lock_irq+0x6f/0x80 <4>[ 290.221824] softirqs last enabled at (762180): [] __irq_exit_rcu+0x13f/0x160 <4>[ 290.221829] softirqs last disabled at (762167): [] __irq_exit_rcu+0x13f/0x160 <4>[ 290.221832] ---[ end trace 0000000000000000 ]--- <7>[ 290.221836] xe 0000:03:00.0: [drm:guc_ct_change_state [xe]] Tile0: GT0: GuC CT communication channel disabled <3>[ 290.222002] xe 0000:03:00.0: probe with driver xe failed with error -12 <3>[ 290.222979] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT0: GuC RC enable mode=0 failed: -ENODEV <7>[ 290.223282] xe 0000:03:00.0: [drm:guc_ct_change_state [xe]] Tile0: GT0: GuC CT communication channel disabled <7>[ 290.225133] xe 0000:03:00.0: [drm:guc_ct_change_state [xe]] Tile0: GT0: GuC CT communication channel disabled <7>[ 290.314829] xe 0000:03:00.0: [drm:drm_pagemap_cache_fini [drm_gpusvm_helper]] Destroying dpagemap cache. <7>[ 290.316907] xe 0000:03:00.0: [drm:drm_pagemap_shrinker_fini [drm_gpusvm_helper]] Destroying dpagemap shrinker. Oops#1 Part3 <3>[ 294.630466] xe 0000:03:00.0: [drm] *ERROR* TLB invalidation fence timeout, seqno=39 recv=0 <1>[ 294.633145] BUG: unable to handle page fault for address: ffffc9000a38a188 <1>[ 294.633166] #PF: supervisor write access in kernel mode <1>[ 294.633181] #PF: error_code(0x0002) - not-present page <6>[ 294.633193] PGD 100000067 P4D 100000067 PUD 100ac2067 PMD 0 <4>[ 294.633218] Oops: Oops: 0002 [#1] SMP NOPTI <4>[ 294.633236] CPU: 10 UID: 0 PID: 2308 Comm: kworker/10:4 Tainted: G S U W 6.19.0-rc5-lgci-xe-xe-4393-d398f90fc4dec5593+ #1 PREEMPT(voluntary) <4>[ 294.633271] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER, [W]=WARN <4>[ 294.633285] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 1645 03/15/2024 <4>[ 294.633304] Workqueue: xe-destroy-wq __guc_exec_queue_destroy_async [xe] <4>[ 294.633749] RIP: 0010:xe_mmio_write32+0x58/0x280 [xe] <4>[ 294.634206] Code: 24 66 90 65 8b 05 bc 6e 2a e3 48 0f a3 05 60 a3 cd e2 0f 82 ee 00 00 00 41 f7 c5 00 00 00 01 0f 84 88 00 00 00 49 03 5c 24 08 <44> 89 3b 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 <4>[ 294.634241] RSP: 0018:ffffc90003dcb830 EFLAGS: 00010086 <4>[ 294.634260] RAX: 0000000000000002 RBX: ffffc9000a38a188 RCX: 0000000000000000 <4>[ 294.634278] RDX: 0000000000010001 RSI: 000000000000a188 RDI: ffff8881580401c8 <4>[ 294.634295] RBP: ffffc90003dcb8a8 R08: 0000000000000000 R09: 0000000000000000 <4>[ 294.634312] R10: ffff888158978000 R11: 0000000000000001 R12: ffff8881580401c8 <4>[ 294.634328] R13: 000000000000a188 R14: ffff888158978000 R15: 0000000000010001 <4>[ 294.634345] FS: 0000000000000000(0000) GS:ffff8888db1dd000(0000) knlGS:0000000000000000 Oops#1 Part2 <4>[ 294.634365] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 294.634379] CR2: ffffc9000a38a188 CR3: 0000000003448006 CR4: 0000000000f72ef0 <4>[ 294.634396] PKRU: 55555554 <4>[ 294.634405] Call Trace: <4>[ 294.634414] <4>[ 294.634431] xe_force_wake_get+0x417/0x950 [xe] <4>[ 294.634827] ? _raw_spin_unlock_irqrestore+0x27/0x80 <4>[ 294.634855] send_tlb_inval_ggtt+0xfa/0x270 [xe] <4>[ 294.635274] ? trace_hardirqs_on+0x63/0xd0 <4>[ 294.635293] ? _raw_spin_unlock_irq+0x27/0x70 <4>[ 294.635309] ? xe_tlb_inval_fence_prep+0xbf/0x1a0 [xe] <4>[ 294.635705] xe_tlb_inval_ggtt+0x73/0x250 [xe] <4>[ 294.635787] ? find_held_lock+0x31/0x90 <4>[ 294.635791] ? ggtt_node_remove+0xc4/0x140 [xe] <4>[ 294.635860] ggtt_invalidate_gt_tlb.part.0+0x1f/0xb0 [xe] <4>[ 294.635928] ggtt_node_remove+0x122/0x140 [xe] <4>[ 294.635995] xe_ggtt_node_remove+0x40/0xa0 [xe] <4>[ 294.636062] xe_ggtt_remove_bo+0x87/0x250 [xe] <4>[ 294.636129] ? _raw_write_unlock+0x22/0x50 <4>[ 294.636132] ? drm_vma_offset_remove+0x65/0x80 <4>[ 294.636136] xe_ttm_bo_destroy+0xa2/0x2d0 [xe] <4>[ 294.636202] ? lock_is_held_type+0xa3/0x130 <4>[ 294.636206] ttm_bo_release+0x70/0x330 [ttm] <4>[ 294.636213] ? xe_ggtt_might_lock+0x29/0x60 [xe] <4>[ 294.636282] ? lock_release+0xce/0x280 <4>[ 294.636286] ttm_bo_fini+0x3c/0x70 [ttm] <4>[ 294.636291] xe_gem_object_free+0x1a/0x30 [xe] <4>[ 294.636358] drm_gem_object_free+0x1d/0x40 <4>[ 294.636362] xe_bo_put+0x12a/0x190 [xe] <4>[ 294.636430] xe_lrc_destroy+0x47/0x60 [xe] <4>[ 294.636504] xe_exec_queue_fini+0x85/0xd0 [xe] Oops#1 Part1 <4>[ 294.636571] __guc_exec_queue_destroy_async+0x6c/0x170 [xe] <4>[ 294.636642] process_one_work+0x22e/0x6b0 <4>[ 294.636647] worker_thread+0x1e8/0x3d0 <4>[ 294.636649] ? __pfx_worker_thread+0x10/0x10 <4>[ 294.636652] kthread+0x11f/0x250 <4>[ 294.636655] ? __pfx_kthread+0x10/0x10 <4>[ 294.636658] ret_from_fork+0x344/0x3a0 <4>[ 294.636661] ? __pfx_kthread+0x10/0x10 <4>[ 294.636664] ret_from_fork_asm+0x1a/0x30 <4>[ 294.636670] <4>[ 294.636671] Modules linked in: snd_hda_codec_intelhdmi snd_hda_codec_hdmi pmt_crashlog mei_gsc_proxy mei_lb mtd_intel_dg mei_gsc xe drm_gpuvm drm_gpusvm_helper drm_ttm_helper ttm gpu_sched drm_suballoc_helper drm_exec drm_buddy drm_display_helper cec rc_core drm_kunit_helpers i2c_algo_bit kunit overlay intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp cmdlinepart coretemp hid_generic spi_nor asus_nb_wmi asus_wmi mei_pxp mei_hdcp mtd sparse_keymap platform_profile wmi_bmof kvm_intel snd_hda_intel binfmt_misc usbhid snd_intel_dspcfg kvm hid irqbypass snd_hda_codec ghash_clmulni_intel aesni_intel r8169 snd_hda_core rapl video snd_hwdep intel_cstate snd_pcm realtek snd_timer i2c_i801 intel_pmc_core mei_me i2c_mux snd spi_intel_pci nls_iso8859_1 i2c_smbus spi_intel mei soundcore idma64 pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry wmi intel_vsec acpi_tad acpi_pad pinctrl_alderlake dm_multipath msr nvme_fabrics <4>[ 294.636709] efi_pstore fuse nfnetlink autofs4 <4>[ 294.636736] CR2: ffffc9000a38a188 <4>[ 294.636739] ---[ end trace 0000000000000000 ]---