untrusted comment: verify with openbsd-70-base.pub
RWR3KL+gSr4QZzqmC8EqtqwQ9tL/A1wUClUK2zUvYksjbzy9TfzOM/EhopqFxlIO+RbOod07faFLShQqv6Epn9YmzUsqnuj5QQA=

OpenBSD 7.0 errata 012, February 2, 2022:

Userspace controlled code on GPU can access kernel memory on Intel
gen 8 and later GPUs.

Apply by doing:
    signify -Vep /etc/signify/openbsd-70-base.pub -x 012_gpuflush.patch.sig \
        -m - | (cd /usr/src && patch -p0)

And then rebuild and install a new kernel:
    KK=`sysctl -n kern.osversion | cut -d# -f1`
    cd /usr/src/sys/arch/`machine`/compile/$KK
    make obj
    make config
    make
    make install

Index: sys/dev/pci/drm/i915/i915_reg.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_reg.h,v
retrieving revision 1.24
diff -u -p -r1.24 i915_reg.h
--- sys/dev/pci/drm/i915/i915_reg.h	12 Aug 2021 13:06:13 -0000	1.24
+++ sys/dev/pci/drm/i915/i915_reg.h	30 Jan 2022 00:43:46 -0000
@@ -2639,6 +2639,12 @@ static inline bool i915_mmio_reg_valid(i
 #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1 << 28)
 #define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT	(1 << 24)
 
+#define GEN8_RTCR	_MMIO(0x4260)
+#define GEN8_M1TCR	_MMIO(0x4264)
+#define GEN8_M2TCR	_MMIO(0x4268)
+#define GEN8_BTCR	_MMIO(0x426c)
+#define GEN8_VTCR	_MMIO(0x4270)
+
 #if 0
 #define PRB0_TAIL	_MMIO(0x2030)
 #define PRB0_HEAD	_MMIO(0x2034)
@@ -2727,6 +2733,11 @@ static inline bool i915_mmio_reg_valid(i
 #define GEN12_FAULT_TLB_DATA1		_MMIO(0xcebc)
 #define   FAULT_VA_HIGH_BITS		(0xf << 0)
 #define   FAULT_GTT_SEL			(1 << 4)
+
+#define GEN12_GFX_TLB_INV_CR	_MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR	_MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR	_MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR	_MMIO(0xcee4)
 
 #define GEN12_AUX_ERR_DBG		_MMIO(0x43f4)
 
Index: sys/dev/pci/drm/i915/i915_vma.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_vma.c,v
retrieving revision 1.6
diff -u -p -r1.6 i915_vma.c
--- sys/dev/pci/drm/i915/i915_vma.c	7 Jul 2021 02:38:33 -0000	1.6
+++ sys/dev/pci/drm/i915/i915_vma.c	30 Jan 2022 00:43:46 -0000
@@ -455,6 +455,9 @@ int i915_vma_bind(struct i915_vma *vma,
 		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
 	}
 
+	if (vma->obj)
+		set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
 	atomic_or(bind_flags, &vma->flags);
 	return 0;
 }
Index: sys/dev/pci/drm/i915/intel_uncore.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_uncore.c,v
retrieving revision 1.9
diff -u -p -r1.9 intel_uncore.c
--- sys/dev/pci/drm/i915/intel_uncore.c	7 Jul 2021 02:38:34 -0000	1.9
+++ sys/dev/pci/drm/i915/intel_uncore.c	30 Jan 2022 00:43:46 -0000
@@ -727,7 +727,8 @@ void intel_uncore_forcewake_get__locked(
 }
 
 static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
-					 enum forcewake_domains fw_domains)
+					 enum forcewake_domains fw_domains,
+					 bool delayed)
 {
 	struct intel_uncore_forcewake_domain *domain;
 	unsigned int tmp;
@@ -742,7 +743,11 @@ static void __intel_uncore_forcewake_put
 			continue;
 		}
 
-		uncore->funcs.force_wake_put(uncore, domain->mask);
+		if (delayed &&
+		    !(domain->uncore->fw_domains_timer & domain->mask))
+			fw_domain_arm_timer(domain);
+		else
+			uncore->funcs.force_wake_put(uncore, domain->mask);
 	}
 }
 
@@ -763,7 +768,20 @@ void intel_uncore_forcewake_put(struct i
 		return;
 
 	spin_lock_irqsave(&uncore->lock, irqflags);
-	__intel_uncore_forcewake_put(uncore, fw_domains);
+	__intel_uncore_forcewake_put(uncore, fw_domains, false);
+	spin_unlock_irqrestore(&uncore->lock, irqflags);
+}
+
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+					enum forcewake_domains fw_domains)
+{
+	unsigned long irqflags;
+
+	if (!uncore->funcs.force_wake_put)
+		return;
+
+	spin_lock_irqsave(&uncore->lock, irqflags);
+	__intel_uncore_forcewake_put(uncore, fw_domains, true);
 	spin_unlock_irqrestore(&uncore->lock, irqflags);
 }
 
@@ -805,7 +823,7 @@ void intel_uncore_forcewake_put__locked(
 	if (!uncore->funcs.force_wake_put)
 		return;
 
-	__intel_uncore_forcewake_put(uncore, fw_domains);
+	__intel_uncore_forcewake_put(uncore, fw_domains, false);
 }
 
 void assert_forcewakes_inactive(struct intel_uncore *uncore)
Index: sys/dev/pci/drm/i915/intel_uncore.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_uncore.h,v
retrieving revision 1.3
diff -u -p -r1.3 intel_uncore.h
--- sys/dev/pci/drm/i915/intel_uncore.h	7 Jul 2021 02:38:34 -0000	1.3
+++ sys/dev/pci/drm/i915/intel_uncore.h	30 Jan 2022 00:43:46 -0000
@@ -211,6 +211,8 @@ void intel_uncore_forcewake_get(struct i
 				enum forcewake_domains domains);
 void intel_uncore_forcewake_put(struct intel_uncore *uncore,
 				enum forcewake_domains domains);
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+					enum forcewake_domains domains);
 void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
 				  enum forcewake_domains fw_domains);
 
Index: sys/dev/pci/drm/i915/gem/i915_gem_object_types.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gem/i915_gem_object_types.h,v
retrieving revision 1.2
diff -u -p -r1.2 i915_gem_object_types.h
--- sys/dev/pci/drm/i915/gem/i915_gem_object_types.h	7 Jul 2021 02:38:34 -0000	1.2
+++ sys/dev/pci/drm/i915/gem/i915_gem_object_types.h	30 Jan 2022 00:43:46 -0000
@@ -159,6 +159,7 @@ struct drm_i915_gem_object {
 #define I915_BO_ALLOC_VOLATILE   BIT(1)
 #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE)
 #define I915_BO_READONLY         BIT(2)
+#define I915_BO_WAS_BOUND_BIT    3
 
 	/*
 	 * Is the object to be mapped as read-only to the GPU
Index: sys/dev/pci/drm/i915/gem/i915_gem_pages.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gem/i915_gem_pages.c,v
retrieving revision 1.3
diff -u -p -r1.3 i915_gem_pages.c
--- sys/dev/pci/drm/i915/gem/i915_gem_pages.c	7 Jul 2021 02:38:34 -0000	1.3
+++ sys/dev/pci/drm/i915/gem/i915_gem_pages.c	30 Jan 2022 00:43:47 -0000
@@ -10,6 +10,8 @@
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
 
+#include "gt/intel_gt.h"
+
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes)
@@ -185,6 +187,14 @@ __i915_gem_object_unset_pages(struct drm
 
 	__i915_gem_object_reset_page_iter(obj);
 	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+			intel_gt_invalidate_tlbs(&i915->gt);
+	}
 
 	return pages;
 }
Index: sys/dev/pci/drm/i915/gt/intel_gt.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_gt.c,v
retrieving revision 1.3
diff -u -p -r1.3 intel_gt.c
--- sys/dev/pci/drm/i915/gt/intel_gt.c	7 Jul 2021 02:38:35 -0000	1.3
+++ sys/dev/pci/drm/i915/gt/intel_gt.c	30 Jan 2022 00:43:47 -0000
@@ -26,6 +26,8 @@ void intel_gt_init_early(struct intel_gt
 
 	mtx_init(&gt->irq_lock, IPL_TTY);
 
+	rw_init(&gt->tlb_invalidate_lock, "itlbinv");
+
 	INIT_LIST_HEAD(&gt->closed_vma);
 	mtx_init(&gt->closed_lock, IPL_TTY);
 
@@ -664,4 +666,103 @@ void intel_gt_info_print(const struct in
 	drm_printf(p, "available engines: %x\n", info->engine_mask);
 
 	intel_sseu_dump(&info->sseu, p);
+}
+
+struct reg_and_bit {
+	i915_reg_t reg;
+	u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+		const i915_reg_t *regs, const unsigned int num)
+{
+	const unsigned int class = engine->class;
+	struct reg_and_bit rb = { };
+
+	if (drm_WARN_ON_ONCE(&engine->i915->drm,
+			     class >= num || !regs[class].reg))
+		return rb;
+
+	rb.reg = regs[class];
+	if (gen8 && class == VIDEO_DECODE_CLASS)
+		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+	else
+		rb.bit = engine->instance;
+
+	rb.bit = BIT(rb.bit);
+
+	return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+	static const i915_reg_t gen8_regs[] = {
+		[RENDER_CLASS]			= GEN8_RTCR,
+		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
+		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
+		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
+	};
+	static const i915_reg_t gen12_regs[] = {
+		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
+		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
+		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
+		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
+	};
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	const i915_reg_t *regs;
+	unsigned int num = 0;
+
+	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+		return;
+
+	if (INTEL_GEN(i915) == 12) {
+		regs = gen12_regs;
+		num = ARRAY_SIZE(gen12_regs);
+	} else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) {
+		regs = gen8_regs;
+		num = ARRAY_SIZE(gen8_regs);
+	} else if (INTEL_GEN(i915) < 8) {
+		return;
+	}
+
+	if (drm_WARN_ONCE(&i915->drm, !num,
+			  "Platform does not implement TLB invalidation!"))
+		return;
+
+	GEM_TRACE("\n");
+
+	assert_rpm_wakelock_held(&i915->runtime_pm);
+
+	mutex_lock(&gt->tlb_invalidate_lock);
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+	for_each_engine(engine, gt, id) {
+		/*
+		 * HW architecture suggest typical invalidation time at 40us,
+		 * with pessimistic cases up to 100us and a recommendation to
+		 * cap at 1ms. We go a bit higher just in case.
+		 */
+		const unsigned int timeout_us = 100;
+		const unsigned int timeout_ms = 4;
+		struct reg_and_bit rb;
+
+		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+		if (!i915_mmio_reg_offset(rb.reg))
+			continue;
+
+		intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+		if (__intel_wait_for_register_fw(uncore,
+						 rb.reg, rb.bit, 0,
+						 timeout_us, timeout_ms,
+						 NULL))
+			DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
+					      engine->name, timeout_ms);
+	}
+
+	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+	mutex_unlock(&gt->tlb_invalidate_lock);
 }
Index: sys/dev/pci/drm/i915/gt/intel_gt.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_gt.h,v
retrieving revision 1.2
diff -u -p -r1.2 intel_gt.h
--- sys/dev/pci/drm/i915/gt/intel_gt.h	7 Jul 2021 02:38:35 -0000	1.2
+++ sys/dev/pci/drm/i915/gt/intel_gt.h	30 Jan 2022 00:43:47 -0000
@@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(co
 void intel_gt_info_print(const struct intel_gt_info *info,
 			 struct drm_printer *p);
 
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
 #endif /* __INTEL_GT_H__ */
Index: sys/dev/pci/drm/i915/gt/intel_gt_types.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_gt_types.h,v
retrieving revision 1.2
diff -u -p -r1.2 intel_gt_types.h
--- sys/dev/pci/drm/i915/gt/intel_gt_types.h	7 Jul 2021 02:38:35 -0000	1.2
+++ sys/dev/pci/drm/i915/gt/intel_gt_types.h	30 Jan 2022 00:43:47 -0000
@@ -36,6 +36,8 @@ struct intel_gt {
 
 	struct intel_uc uc;
 
+	struct rwlock tlb_invalidate_lock;
+
 	struct intel_gt_timelines {
 		spinlock_t lock; /* protects active_list */
 		struct list_head active_list;
