summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2020-01-18 13:00:23 +1000
committerDave Airlie <airlied@redhat.com>2020-01-18 13:00:24 +1000
commitdf95968ff78931576ac7a3d3b30312894aaaf22e (patch)
tree8bd80d7f3314854c52e04d446c3394af4d415a1e /drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
parent71e7274066c646bb3d9da39d2f4db0a6404c0a2d (diff)
parent7b19914383fc008a6b51871f18da72cf9aa43cae (diff)
Merge tag 'amd-drm-next-5.6-2020-01-17' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.6-2020-01-17: amdgpu: - Fix 32 bit harder - Powerplay cleanups - VCN fixes for Arcturus - RAS fixes - eDP/DP fixes - SR-IOV fixes - Re-enable S/G display for PCO/RV2 - Free stolen memory after init on gmc10 - DF hashing optimizations for Arcturus - Properly handle runtime pm in sysfs and debugfs - Unify more GC programming between amdgpu and amdkfd - Golden settings updates for gfx10 - GDDR6 training fixes - Freesync fixes - DSC fixes - TMDS fixes - Renoir USB-C fixes - DC dml updates from hw team - Pollock support - Mutex init regresson fix amdkfd: - Unify more GC programming between amdgpu and amdkfd - Use KIQ to setup HIQ rather than using MMIO scheduler: - Documentation fixes - Improve job distribution with load sharing drm: - DP MST fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200117213625.4722-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c114
1 files changed, 105 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index da9765ff45d6..bbede09983e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
#include "hdp/hdp_5_0_0_sh_mask.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
#include "navi10_enum.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "nbio_v2_3.h"
@@ -234,6 +237,19 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
(!amdgpu_sriov_vf(adev)));
}
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +396,63 @@ error_alloc:
DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ i, 0);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, 0);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
@@ -531,6 +604,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
.map_mtype = gmc_v10_0_map_mtype,
@@ -566,6 +640,13 @@ static int gmc_v10_0_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
+ /*
+ * Can't free the stolen VGA memory when it might be used for memory
+ * training again.
+ */
+ if (!adev->fw_vram_usage.mem_train_support)
+ amdgpu_bo_late_init(adev);
+
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
if (r)
return r;
@@ -720,6 +801,10 @@ static int gmc_v10_0_sw_init(void *handle)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
@@ -732,15 +817,6 @@ static int gmc_v10_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- /*
- * Reserve 8M stolen memory for navi10 like vega10
- * TODO: will check if it's really needed on asic.
- */
- if (amdgpu_emu_mode == 1)
- adev->gmc.stolen_size = 0;
- else
- adev->gmc.stolen_size = 9 * 1024 *1024;
-
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
@@ -753,6 +829,19 @@ static int gmc_v10_0_sw_init(void *handle)
adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev);
+ /*
+ * In dual GPUs scenario, stolen_size is assigned to zero on the
+ * secondary GPU, since there is no pre-OS console using that memory.
+ * Then the bottom region of VRAM was allocated as GTT, unfortunately a
+ * small region of bottom VRAM was encroached by UMC firmware during
+ * GDDR6 BIST training, this cause page fault.
+ * The page fault can be fixed by forcing stolen_size to 3MB, then the
+ * bottom region of VRAM was allocated as stolen memory, GTT corruption
+ * avoid.
+ */
+ adev->gmc.stolen_size = max(adev->gmc.stolen_size,
+ AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -792,6 +881,13 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
static int gmc_v10_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ void *stolen_vga_buf;
+
+ /*
+ * Free the stolen memory if it wasn't already freed in late_init
+ * because of memory training.
+ */
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
amdgpu_vm_manager_fini(adev);
gmc_v10_0_gart_fini(adev);