From a5c8e0524dbbe1107d81a1604da3d191b66ead6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 25 Jan 2019 13:40:01 +0100 Subject: drm/amdgpu: cleanup amdgpu_pte_update_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kptr is not used any more. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0bc6f553dc08..a404ac17e5ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -107,14 +107,6 @@ struct amdgpu_pte_update_params { * DMA addresses to use for mapping, used during VM update by CPU */ dma_addr_t *pages_addr; - - /** - * @kptr: - * - * Kernel pointer of PD/PT BO that needs to be updated, - * used during VM update by CPU - */ - void *kptr; }; /** -- cgit v1.2.3 From fe57085a36de5813ab63a8d178ccfb5f257f028e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 22 Jan 2019 15:44:54 -0500 Subject: drm/amdgpu: clean up memory/GDS/GWS/OA alignment code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - move all adjustments into one place - specify GDS/GWS/OA alignment in basic units of the heaps - it looks like GDS alignment was 1 instead of 4 Signed-off-by: Marek Olšák Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index f4f00217546e..d21dd2f369da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, memset(&bp, 0, sizeof(bp)); *obj = NULL; - /* At least align on page size */ - if (alignment < PAGE_SIZE) { - alignment = PAGE_SIZE; - } bp.size = size; bp.byte_align = alignment; @@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; } flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - /* GDS allocations must be DW aligned */ - if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS) - size = ALIGN(size, 4); } if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 728e15e5d68a..fd9c4beeaaa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, size_t acc_size; int r; - page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; - if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) + /* Note that GDS/GWS/OA allocates 1 page per byte/resource. */ + if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { + /* GWS and OA don't need any alignment. */ + page_align = bp->byte_align; size <<= PAGE_SHIFT; - else + } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) { + /* Both size and alignment must be a multiple of 4. */ + page_align = ALIGN(bp->byte_align, 4); + size = ALIGN(size, 4) << PAGE_SHIFT; + } else { + /* Memory should be aligned at least to a page size. */ + page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; size = ALIGN(size, PAGE_SIZE); + } if (!amdgpu_bo_validate_size(adev, size, bp->domain)) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b852abb9db0f..73e71e61dc99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1756,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + 4, AMDGPU_GEM_DOMAIN_GDS, &adev->gds.gds_gfx_bo, NULL, NULL); if (r) return r; @@ -1769,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + 1, AMDGPU_GEM_DOMAIN_GWS, &adev->gds.gws_gfx_bo, NULL, NULL); if (r) return r; @@ -1782,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + 1, AMDGPU_GEM_DOMAIN_OA, &adev->gds.oa_gfx_bo, NULL, NULL); if (r) return r; -- cgit v1.2.3 From c1219b941c1ff0abc615e471ad81507ddb397253 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Feb 2019 10:41:59 +0000 Subject: drm/amd/amdgpu: fix spelling mistake "matech" -> "match" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 0d90672d0e58..407dd16cc35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -125,7 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) if (!hive) { ret = -EINVAL; dev_err(adev->dev, - "XGMI: node 0x%llx, can not matech hive 0x%llx in the hive list.\n", + "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id); goto exit; } -- cgit v1.2.3 From 67dd1a36334ffce82bebeb2d633e152aa436d370 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 31 Jan 2019 15:44:22 -0500 Subject: drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New chunk for dependency on start of job's execution instead on the end. This is used for GPU deadlock prevention when userspace uses mid-IB fences to wait for mid-IB work on other rings. v2: Fix typo in AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES v3: Bump KMS version v4: put old fence AFTER acquiring the scheduled fence. Signed-off-by: Andrey Grodzovsky Suggested-by: Christian Koenig Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 13 ++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1c49b8266d69..52a5e4fdc95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: break; default: @@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + struct dma_fence *old = fence; + + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); + } + if (IS_ERR(fence)) { r = PTR_ERR(fence); amdgpu_ctx_put(ctx); @@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, chunk = &p->chunks[i]; - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || + chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c806f984bcc5..1158a6f4eec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -71,9 +71,10 @@ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 27 +#define KMS_DRIVER_MINOR 28 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -- cgit v1.2.3 From 41cca166cc57e75e94d888595a428d23a3bf4e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 21 Jan 2019 17:22:55 -0500 Subject: drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm not increasing the DRM version because GDS isn't totally without bugs yet. v2: update emit_ib_size Signed-off-by: Marek Olšák Acked-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 19 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 21 +++++++++++++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 +++++++++++++++++++++++++++++++-- 5 files changed, 79 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1158a6f4eec6..2f0ea380c031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -72,9 +72,10 @@ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES + * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 28 +#define KMS_DRIVER_MINOR 29 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index ecbcefe49a98..f89f5734d985 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -37,6 +37,8 @@ struct amdgpu_gds { struct amdgpu_gds_asic_info mem; struct amdgpu_gds_asic_info gws; struct amdgpu_gds_asic_info oa; + uint32_t gds_compute_max_wave_id; + /* At present, GDS, GWS and OA resources for gfx (graphics) * is always pre-allocated and available for graphics operation. * Such resource is shared between all gfx clients. diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7984292f9282..a59e0fdf5a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2264,6 +2264,22 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, @@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a26747681ed6..b8e50a34bdb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6084,6 +6084,22 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -6890,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, @@ -6920,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_fence = gfx_v8_0_ring_emit_fence_kiq, .test_ring = gfx_v8_0_ring_test_ring, .insert_nop = amdgpu_ring_insert_nop, @@ -6996,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 262ee3cf6f1c..5533f6e4f4a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4010,6 +4010,22 @@ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4729,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, @@ -4764,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ - .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, .test_ring = gfx_v9_0_ring_test_ring, .insert_nop = amdgpu_ring_insert_nop, @@ -4846,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA20: + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + case CHIP_VEGA12: + adev->gds.gds_compute_max_wave_id = 0x27f; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 0x8) + adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ + else + adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ + break; + default: + /* this really depends on the chip */ + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + } + adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; -- cgit v1.2.3 From 90d647222a8f004bf1430ecea3099ebcc54bfc21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 30 Jan 2019 14:12:51 +0100 Subject: drm/amdgpu: fix waiting for BO moves with CPU based PD/PT updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we open up the possibility to use uninitialized memory. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a404ac17e5ae..93b936f7de4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1781,13 +1781,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (pages_addr) params.src = ~0; - /* Wait for PT BOs to be free. PTs share the same resv. object + /* Wait for PT BOs to be idle. PTs share the same resv. object * as the root PD BO */ r = amdgpu_vm_wait_pd(adev, vm, owner); if (unlikely(r)) return r; + /* Wait for any BO move to be completed */ + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; return amdgpu_vm_update_ptes(¶ms, start, last + 1, -- cgit v1.2.3 From 1b52f2d5679db90166ea71382211f3c7319aab51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 30 Jan 2019 14:09:29 +0100 Subject: drm/amdgpu: cleanup VM dw estimation a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 93b936f7de4b..1e3a36c90d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1806,13 +1806,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* * reserve space for two commands every (1 << BLOCK_SIZE) * entries or 2k dwords (whatever is smaller) - * - * The second command is for the shadow pagetables. */ + ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); + + /* The second command is for the shadow pagetables. */ if (vm->root.base.bo->shadow) - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; - else - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); + ncmds *= 2; /* padding, etc. */ ndw = 64; @@ -1831,10 +1830,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ndw += ncmds * 10; /* extra commands for begin/end fragments */ + ncmds = 2 * adev->vm_manager.fragment_size; if (vm->root.base.bo->shadow) - ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; - else - ndw += 2 * 10 * adev->vm_manager.fragment_size; + ncmds *= 2; + + ndw += 10 * ncmds; params.func = amdgpu_vm_do_set_ptes; } -- cgit v1.2.3 From c53134577c185533ca7e0b958bafc77539d41fd9 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 1 Feb 2019 17:57:48 -0500 Subject: drm/amdgpu: Fix pci platform speed and width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new Vega series GPU cards have in-built bridges. To get the pcie speed and width supported by the platform walk the hierarchy and get the slowest link. Signed-off-by: Harish Kasiviswanathan Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d7dddb936f84..fcab1fe9bb68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3618,6 +3618,38 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ return r; } +static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *pdev = adev->pdev; + enum pci_bus_speed cur_speed; + enum pcie_link_width cur_width; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while (pdev) { + cur_speed = pcie_get_speed_cap(pdev); + cur_width = pcie_get_width_cap(pdev); + + if (cur_speed != PCI_SPEED_UNKNOWN) { + if (*speed == PCI_SPEED_UNKNOWN) + *speed = cur_speed; + else if (cur_speed < *speed) + *speed = cur_speed; + } + + if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { + if (*width == PCIE_LNK_WIDTH_UNKNOWN) + *width = cur_width; + else if (cur_width < *width) + *width = cur_width; + } + pdev = pci_upstream_bridge(pdev); + } +} + /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3630,8 +3662,8 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { struct pci_dev *pdev; - enum pci_bus_speed speed_cap; - enum pcie_link_width link_width; + enum pci_bus_speed speed_cap, platform_speed_cap; + enum pcie_link_width platform_link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -3648,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) return; } + if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) + return; + + amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, + &platform_link_width); + if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ pdev = adev->pdev; @@ -3673,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; } /* platform caps */ - pdev = adev->ddev->pdev->bus->self; - speed_cap = pcie_get_speed_cap(pdev); - if (speed_cap == PCI_SPEED_UNKNOWN) { + if (platform_speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); } else { - if (speed_cap == PCIE_SPEED_16_0GT) + if (platform_speed_cap == PCIE_SPEED_16_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); - else if (speed_cap == PCIE_SPEED_8_0GT) + else if (platform_speed_cap == PCIE_SPEED_8_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); - else if (speed_cap == PCIE_SPEED_5_0GT) + else if (platform_speed_cap == PCIE_SPEED_5_0GT) adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); else @@ -3697,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } if (adev->pm.pcie_mlw_mask == 0) { - pdev = adev->ddev->pdev->bus->self; - link_width = pcie_get_width_cap(pdev); - if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; } else { - switch (link_width) { + switch (platform_link_width) { case PCIE_LNK_X32: adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | -- cgit v1.2.3 From 7fbd31cceac0596c46394ea8745fe09b98d6ed79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Feb 2019 11:41:59 +0100 Subject: drm/amdgpu: fix NULL ptr dref in the VM code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exclusive fence is of course perfectly optional here. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1e3a36c90d38..75481cf3348f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1789,9 +1789,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, return r; /* Wait for any BO move to be completed */ - r = dma_fence_wait(exclusive, true); - if (unlikely(r)) - return r; + if (exclusive) { + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + } params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; -- cgit v1.2.3